Skip to contents

Subset a GInteractions with tidyverse-like filter

Usage

# S3 method for class 'GInteractions'
filter(.data, ...)

Arguments

.data

a GInteractions object

...

Expressions that return a logical value, and are defined in terms of the variables in .data. If multiple expressions are included, they are combined with the & operator. Only rows for which all conditions evaluate to TRUE are kept.

Value

a GInteractions object.

Examples

gi <- read.table(text = "
chr1 1 10 chr1 1 10
chr1 2 10 chr2 1 10
chr3 3 10 chr3 1 10
chr4 4 10 chr4 1 10
chr5 5 10 chr5 1 10",
col.names = c(
    "seqnames1", "start1", "end1", 
    "seqnames2", "start2", "end2")
) |> 
  as_ginteractions() |> 
  mutate(cis = seqnames1 == seqnames2, score = runif(5)*100, gc = runif(5))
gi
#> GInteractions object with 5 interactions and 3 metadata columns:
#>       seqnames1   ranges1 strand1     seqnames2   ranges2 strand2 |   cis
#>           <Rle> <IRanges>   <Rle>         <Rle> <IRanges>   <Rle> | <Rle>
#>   [1]      chr1      1-10       * ---      chr1      1-10       * |  TRUE
#>   [2]      chr1      2-10       * ---      chr2      1-10       * | FALSE
#>   [3]      chr3      3-10       * ---      chr3      1-10       * |  TRUE
#>   [4]      chr4      4-10       * ---      chr4      1-10       * |  TRUE
#>   [5]      chr5      5-10       * ---      chr5      1-10       * |  TRUE
#>           score        gc
#>       <numeric> <numeric>
#>   [1]  40.23282  0.975548
#>   [2]  19.56698  0.289892
#>   [3]  40.35381  0.678380
#>   [4]   6.36615  0.735320
#>   [5]  38.87013  0.195957
#>   -------
#>   regions: 9 ranges and 0 metadata columns
#>   seqinfo: 5 sequences from an unspecified genome; no seqlengths

####################################################################
# 1. Filter metadata columns from GInteractions by condition
####################################################################

gi |> filter(gc > 0.1)
#> GInteractions object with 5 interactions and 3 metadata columns:
#>       seqnames1   ranges1 strand1     seqnames2   ranges2 strand2 |   cis
#>           <Rle> <IRanges>   <Rle>         <Rle> <IRanges>   <Rle> | <Rle>
#>   [1]      chr1      1-10       * ---      chr1      1-10       * |  TRUE
#>   [2]      chr1      2-10       * ---      chr2      1-10       * | FALSE
#>   [3]      chr3      3-10       * ---      chr3      1-10       * |  TRUE
#>   [4]      chr4      4-10       * ---      chr4      1-10       * |  TRUE
#>   [5]      chr5      5-10       * ---      chr5      1-10       * |  TRUE
#>           score        gc
#>       <numeric> <numeric>
#>   [1]  40.23282  0.975548
#>   [2]  19.56698  0.289892
#>   [3]  40.35381  0.678380
#>   [4]   6.36615  0.735320
#>   [5]  38.87013  0.195957
#>   -------
#>   regions: 9 ranges and 0 metadata columns
#>   seqinfo: 5 sequences from an unspecified genome; no seqlengths
gi |> filter(gc > 0.1, score > 50)
#> GInteractions object with 0 interactions and 3 metadata columns:
#>    seqnames1   ranges1 strand1     seqnames2   ranges2 strand2 |   cis
#>        <Rle> <IRanges>   <Rle>         <Rle> <IRanges>   <Rle> | <Rle>
#>        score        gc
#>    <numeric> <numeric>
#>   -------
#>   regions: 9 ranges and 0 metadata columns
#>   seqinfo: 5 sequences from an unspecified genome; no seqlengths
gi |> filter(cis)
#> GInteractions object with 4 interactions and 3 metadata columns:
#>       seqnames1   ranges1 strand1     seqnames2   ranges2 strand2 |   cis
#>           <Rle> <IRanges>   <Rle>         <Rle> <IRanges>   <Rle> | <Rle>
#>   [1]      chr1      1-10       * ---      chr1      1-10       * |  TRUE
#>   [2]      chr3      3-10       * ---      chr3      1-10       * |  TRUE
#>   [3]      chr4      4-10       * ---      chr4      1-10       * |  TRUE
#>   [4]      chr5      5-10       * ---      chr5      1-10       * |  TRUE
#>           score        gc
#>       <numeric> <numeric>
#>   [1]  40.23282  0.975548
#>   [2]  40.35381  0.678380
#>   [3]   6.36615  0.735320
#>   [4]  38.87013  0.195957
#>   -------
#>   regions: 9 ranges and 0 metadata columns
#>   seqinfo: 5 sequences from an unspecified genome; no seqlengths

####################################################################
# 2. On-the-fly calculations
####################################################################

gi
#> GInteractions object with 5 interactions and 3 metadata columns:
#>       seqnames1   ranges1 strand1     seqnames2   ranges2 strand2 |   cis
#>           <Rle> <IRanges>   <Rle>         <Rle> <IRanges>   <Rle> | <Rle>
#>   [1]      chr1      1-10       * ---      chr1      1-10       * |  TRUE
#>   [2]      chr1      2-10       * ---      chr2      1-10       * | FALSE
#>   [3]      chr3      3-10       * ---      chr3      1-10       * |  TRUE
#>   [4]      chr4      4-10       * ---      chr4      1-10       * |  TRUE
#>   [5]      chr5      5-10       * ---      chr5      1-10       * |  TRUE
#>           score        gc
#>       <numeric> <numeric>
#>   [1]  40.23282  0.975548
#>   [2]  19.56698  0.289892
#>   [3]  40.35381  0.678380
#>   [4]   6.36615  0.735320
#>   [5]  38.87013  0.195957
#>   -------
#>   regions: 9 ranges and 0 metadata columns
#>   seqinfo: 5 sequences from an unspecified genome; no seqlengths
gi |> filter(start1 >= start2 + 3)
#> GInteractions object with 2 interactions and 3 metadata columns:
#>       seqnames1   ranges1 strand1     seqnames2   ranges2 strand2 |   cis
#>           <Rle> <IRanges>   <Rle>         <Rle> <IRanges>   <Rle> | <Rle>
#>   [1]      chr4      4-10       * ---      chr4      1-10       * |  TRUE
#>   [2]      chr5      5-10       * ---      chr5      1-10       * |  TRUE
#>           score        gc
#>       <numeric> <numeric>
#>   [1]   6.36615  0.735320
#>   [2]  38.87013  0.195957
#>   -------
#>   regions: 9 ranges and 0 metadata columns
#>   seqinfo: 5 sequences from an unspecified genome; no seqlengths
gi |> filter(score * gc > score * 0.5)
#> GInteractions object with 3 interactions and 3 metadata columns:
#>       seqnames1   ranges1 strand1     seqnames2   ranges2 strand2 |   cis
#>           <Rle> <IRanges>   <Rle>         <Rle> <IRanges>   <Rle> | <Rle>
#>   [1]      chr1      1-10       * ---      chr1      1-10       * |  TRUE
#>   [2]      chr3      3-10       * ---      chr3      1-10       * |  TRUE
#>   [3]      chr4      4-10       * ---      chr4      1-10       * |  TRUE
#>           score        gc
#>       <numeric> <numeric>
#>   [1]  40.23282  0.975548
#>   [2]  40.35381  0.678380
#>   [3]   6.36615  0.735320
#>   -------
#>   regions: 9 ranges and 0 metadata columns
#>   seqinfo: 5 sequences from an unspecified genome; no seqlengths