Summarize GInteractions per group
Value
a S4Vectors::DataFrame()
object:
The rows come from the underlying
group_keys()
.The columns are a combination of the grouping keys and the summary expressions that you provide.
GInteractions class is not preserved, as a call to
summarize
fundamentally creates a new data frame
Examples
gi <- read.table(text = "
chr1 11 20 chr1 21 30 + +
chr1 11 20 chr1 51 55 + +
chr1 11 30 chr1 51 55 - -
chr1 11 30 chr2 51 60 - -",
col.names = c(
"seqnames1", "start1", "end1",
"seqnames2", "start2", "end2", "strand1", "strand2")
) |>
as_ginteractions() |>
mutate(score = runif(4), type = c('cis', 'cis', 'cis', 'trans'))
####################################################################
# 1. Summarize a single column
####################################################################
gi
#> GInteractions object with 4 interactions and 2 metadata columns:
#> seqnames1 ranges1 strand1 seqnames2 ranges2 strand2 | score
#> <Rle> <IRanges> <Rle> <Rle> <IRanges> <Rle> | <numeric>
#> [1] chr1 11-20 + --- chr1 21-30 + | 0.769675
#> [2] chr1 11-20 + --- chr1 51-55 + | 0.990712
#> [3] chr1 11-30 - --- chr1 51-55 - | 0.970521
#> [4] chr1 11-30 - --- chr2 51-60 - | 0.389183
#> type
#> <character>
#> [1] cis
#> [2] cis
#> [3] cis
#> [4] trans
#> -------
#> regions: 6 ranges and 0 metadata columns
#> seqinfo: 2 sequences from an unspecified genome; no seqlengths
gi |> group_by(type) |> summarize(m = mean(score))
#> DataFrame with 2 rows and 2 columns
#> type m
#> <character> <numeric>
#> 1 cis 0.910303
#> 2 trans 0.389183
gi |> group_by(strand1) |> summarize(m = mean(score))
#> DataFrame with 2 rows and 2 columns
#> strand1 m
#> <Rle> <numeric>
#> 1 + 0.880194
#> 2 - 0.679852
df <- gi |>
group_by(strand1) |>
summarize(m = mean(score), n = table(seqnames2))
df
#> DataFrame with 2 rows and 3 columns
#> strand1 m n
#> <Rle> <numeric> <IntegerList>
#> 1 + 0.880194 2,0
#> 2 - 0.679852 1,1
df$n
#> IntegerList of length 2
#> [["1"]] chr1=2 chr2=0
#> [["2"]] chr1=1 chr2=1
####################################################################
# 2. Summarize by multiple columns
####################################################################
gi |>
group_by(strand1, seqnames2) |>
summarise(m = mean(score), n = table(type))
#> DataFrame with 3 rows and 4 columns
#> strand1 seqnames2 m n
#> <Rle> <Rle> <numeric> <IntegerList>
#> 1 + chr1 0.880194 2,0
#> 2 - chr1 0.970521 1,0
#> 3 - chr2 0.389183 0,1