using GenomicFeatures
using DataFrames
col = [
Interval("chr1", 10628, 10683, '?', "abc")
Interval("chr1", 10643, 10779, '?', "abc")
Interval("chr1", 10645, 10748, '?', "abc")
Interval("chr1", 10648, 10786, '?', "abc")
Interval("chr1", 10676, 10767, '?', "abc")
Interval("chr1", 10690, 10731, '?', "abc")
Interval("chr1", 10707, 10793, '?', "abc")
Interval("chr1", 10742, 10814, '?', "abc")
] |> IntervalCollection
hhh = [
Interval("chr1", 10631, 10638)
Interval("chr1", 10633, 10635)
Interval("chr1", 10636, 10650)
Interval("chr1", 10638, 10649)
Interval("chr1", 10641, 10651)
Interval("chr1", 10644, 10689)
Interval("chr1", 10645, 10660)
Interval("chr1", 10650, 10650)
] |> IntervalCollection
But I want to findoverlap with minoverlap=5.I tried like the following
function number_interval(tp::Tuple)
# Unpack Tuple.
(i, interval) = tp
# Setup numbered metadata.
new_metadata = (
i = i,
original = metadata(interval)
)
# Create new interval with numbered metadata.
return Interval(
seqname(interval),
leftposition(interval),
rightposition(interval),
strand(interval),
new_metadata
)
end
# Convert intervals.
col_numbered = col |> enumerate .|> number_interval |> IntervalCollection
hhh_numbered = hhh |> enumerate .|> number_interval |> IntervalCollection
# Reproduce example.
df = DataFrame()
for (c, h) in eachoverlap(col_numbered, hhh_numbered)
result = (
queryHits = GenomicFeatures.metadata(c).i,
subjectHits = GenomicFeatures.metadata(h).i
)
push!(df, result)
end
julia> df
27Γ2 DataFrame
Row β queryHits subjectHits
β Int64 Int64
ββββββΌββββββββββββββββββββββββ
1 β 1 1
2 β 1 2
3 β 1 3
4 β 1 4
5 β 1 5
6 β 1 6
7 β 1 7
8 β 1 8
9 β 2 3
10 β 2 4
11 β 2 5
12 β 2 6
13 β 2 7
14 β 2 8
15 β 3 3
But I can not solve the problem with minoverlap=5.For example,hhhβs first line has more than 5 overlaps with colβs first ,I will output the index of colβs index and hhhβs index.hhhβs second line does not have 5 overlaps,it will not occur in the final dataframe.The function above seems to solve minoverlap=1.What should I do to solve this problem? Thank all guys for helping me!