@@ -4,7 +4,7 @@
Loading
4 4
    paired_reads_datastores::Vector{Union{PairedReads{DNAAlphabet{2}},PairedReads{DNAAlphabet{4}}}}
5 5
    long_reads_datastores::Vector{LongReads}
6 6
    linked_reads_datastores::Vector{LinkedReads}
7 -
    mer_count_stores::Vector{MerCounts}
7 +
    mer_count_stores::Vector{IndexedCounts}
8 8
end
9 9
10 10
"Create an empty workspace"
@@ -13,7 +13,7 @@
Loading
13 13
                     Vector{PairedReads}(),
14 14
                     Vector{LongReads}(),
15 15
                     Vector{LinkedReads}(),
16 -
                     Vector{MerCounts}())
16 +
                     Vector{IndexedCounts}())
17 17
end
18 18
19 19
graph(ws::WorkSpace) = ws.sdg

@@ -248,6 +248,7 @@
Loading
248 248
end
249 249
250 250
Base.summary(io::IO, sdg::SequenceDistanceGraph) = print(io, "Sequence distance graph (", n_nodes(sdg), " nodes)")
251 +
Base.show(io::IO, sdg::SequenceDistanceGraph) = println(io, summary(sdg))
251 252
252 253
## Internal / not-nessecerily-safe
253 254
254 255
imilarity index 86%
255 256
ename from src/processes/dbg.jl
256 257
ename to src/dbg.jl

@@ -7,7 +7,7 @@
Loading
7 7
    ### Re-exports of GenomeGraphs framework sub-components.
8 8
    ###
9 9
    
10 -
    # ReadDatastores
10 +
    # Re-exports of ReadDatastores
11 11
    ReadDatastore,
12 12
    PairedReads,
13 13
    LongReads,
@@ -15,11 +15,13 @@
Loading
15 15
    buffer,
16 16
    FwRv,
17 17
    
18 -
    # FASTX
18 +
    # Re-exports of FASTX
19 19
    FASTA,
20 20
    FASTQ,
21 21
    
22 -
    # BioSequences
22 +
    ###
23 +
    ### Re-exports of BioSequences.jl
24 +
    ###
23 25
    DNAAlphabet,
24 26
    BioSequence,
25 27
    LongSequence,
@@ -32,17 +34,29 @@
Loading
32 34
    BigDNAKmer,
33 35
    
34 36
    ###
35 -
    ### MerCounts
37 +
    ### Re-exports of KmerAnalysis.jl
36 38
    ###
37 -
    MerCounts,
39 +
    mer,
40 +
    freq,
41 +
    Canonical,
42 +
    NonCanonical,
43 +
    CANONICAL,
44 +
    NONCANONICAL,
45 +
    # Kmer counters
46 +
    serial_mem,
47 +
    dist_mem,
48 +
    spectra,
38 49
    
39 -
    ### WorkSpace
40 -
    WorkSpace,
41 -
    add_paired_reads!,
42 -
    paired_reads,
43 -
    add_mer_counts!,
44 -
    mer_counts,
50 +
    empty_graph,
51 +
    #GRAPH_TYPE,
45 52
    
53 +
    ### WorkSpace
54 +
    #WorkSpace,
55 +
    #add_paired_reads!,
56 +
    #paired_reads,
57 +
    #add_mer_counts!,
58 +
    #mer_counts,
59 +
    read_datastore,
46 60
    ###
47 61
    ### Processes
48 62
    ###
@@ -50,17 +64,27 @@
Loading
50 64
    dbg!,
51 65
    remove_tips!
52 66
53 -
include("MerTools.jl")     # Submodule with tools for working with and counting kmers.
67 +
using BioSequences, FASTX, ReadDatastores, KmerAnalysis
68 +
import BioSequences.EveryMerIterator
69 +
54 70
include("Graphs.jl")       # Submodule defining the key Graph type and basic methods.
55 71
include("GraphIndexes.jl") # Submodule defining types that allow indexing of a graph.
56 72
57 -
using BioSequences, FASTX, ReadDatastores
58 -
import BioSequences.EveryMerIterator
59 73
60 -
include("datastores/kmer-counts.jl")
61 -
include("workspace/WorkSpace.jl")
62 -
include("views/NodeView.jl")
63 74
64 -
include("processes/dbg.jl")
65 -
include("processes/remove_tips.jl")
75 +
#include("workspace/WorkSpace.jl")
76 +
#include("views/NodeView.jl")
77 +
78 +
# Utility function for more quickly making a read datastore.
79 +
function read_datastore(R1file::String, R2file::String, name::String, minlen::Int, maxlen::Int, insertlen::Int, mode::PairedReadOrientation)
80 +
    fwq = open(FASTQ.Reader, R1file)
81 +
    rvq = open(FASTQ.Reader, R2file)
82 +
    return PairedReads{DNAAlphabet{4}}(fwq, rvq, name, name, minlen, maxlen, insertlen, mode)
83 +
end
84 +
85 +
empty_graph(::Type{T}) where {T<:LongSequence} = Graphs.SequenceDistanceGraph{T}()
86 +
87 +
include("dbg.jl")
88 +
include("remove_tips.jl")
89 +
66 90
end # module GenomeGraphs

@@ -91,10 +91,13 @@
Loading
91 91
92 92
const GRAPH_TYPE = Graphs.SequenceDistanceGraph{LongSequence{DNAAlphabet{4}}}
93 93
94 -
function build_unitigs_from_sorted_kmers!(
94 +
function build_unitigs!(
95 95
    sg::Graphs.SequenceDistanceGraph{LongSequence{A}},
96 96
    kmerlist::Vector{M}) where {A<:DNAAlphabet,M<:AbstractMer{DNAAlphabet{2}}}
97 97
    
98 +
    if !issorted(kmerlist)
99 +
        sort!(kmerlist)
100 +
    end
98 101
    
99 102
    @info string("Constructing unitigs from ", length(kmerlist), " ", BioSequences.ksize(M), "-mers")
100 103
    used_kmers = falses(length(kmerlist))
@@ -236,43 +239,44 @@
Loading
236 239
    end
237 240
end
238 241
239 -
"""
240 -
"""
241 -
function dbg(::Type{M}, min_freq::Integer, file::String, name::Union{String,Nothing} = nothing) where {M<:AbstractMer}
242 -
    ws = WorkSpace()
243 -
    ds = open(PairedReads, file, name)
244 -
    add_paired_reads!(ws, ds)
245 -
    _dbg!(ws.sdg, ds, M, UInt8(min_freq))
246 -
    return ws
242 +
#=
243 +
function dbg!(ws::WorkSpace, counted_kmers::Vector{<:MerCount}, min_freq::Integer)
244 +
    return dbg!(graph(ws), counted_kmers, min_freq)
247 245
end
246 +
=#
248 247
249 -
"""
250 -
    dbg!(ws::WorkSpace, ds::String, ::Type{M}, min_freq::Integer, name::Symbol) where {M<:AbstractMer}
251 -
248 +
#=
249 +
function dbg!(graph::GRAPH_TYPE, kmerlist::Vector{M}) where {M<:AbstractMer}
250 +
    str = string("onstructing compressed de-bruijn graph from ", length(kmerlist), ' ', BioSequences.ksize(M), "-mers")
251 +
    @info string('C', str)
252 +
    build_unitigs_from_sorted_kmers!(graph, kmerlist)
253 +
    if Graphs.n_nodes(graph) > 1
254 +
        connect_unitigs_by_overlaps!(graph, M)
255 +
    end
256 +
    @info string("Done c", str)
257 +
    return sg
258 +
end
259 +
=#
252 260
253 -
"""
254 -
function dbg!(ws::WorkSpace, ::Type{M}, min_freq::Integer, name::Symbol) where {M<:AbstractMer}
255 -
    reads = paired_reads(ws, name)
256 -
    _dbg!(ws.sdg, reads, M, UInt8(min_freq))
257 -
    return ws
261 +
function dbg!(sdg::GRAPH_TYPE, kmers)
262 +
    if !(eltype(kmers) <: AbstractMer)
263 +
        throw(ArgumentError("Didn't provide a collection of kmers for the `kmers` parameter did ya?"))
264 +
    end
265 +
    @info dbg_message(kmers)
266 +
    build_unitigs!(sdg, kmers)
267 +
    if Graphs.n_nodes(sdg) > 1
268 +
        connect_unitigs_by_overlaps!(sdg, eltype(kmers))
269 +
    end
270 +
    @info "Done"
271 +
    return sdg
258 272
end
259 273
260 -
function _dbg!(sg::GRAPH_TYPE, ds::PairedReads{<:DNAAlphabet}, ::Type{M}, min_freq::UInt8) where {M<:AbstractMer}
261 -
    @info "Counting kmers in datastore"
262 -
    # In the future do a better kmer counting - but this will do for e.coli to prove a point.
263 -
    spectra = MerTools.build_freq_list(M, buffer(ds), 1:Int(length(ds)))
264 -
    filter!(x -> MerTools.freq(x) ≥ min_freq, spectra)
265 -
    merlist = [MerTools.mer(x) for x in spectra]
266 -
    return _dbg!(sg, merlist)
274 +
function dbg!(sg::GRAPH_TYPE, counted_kmers::Vector{<:MerCount}, min_freq::Integer)
275 +
    filter!(x -> freq(x) ≥ min_freq, counted_kmers)
276 +
    merlist = [mer(x) for x in counted_kmers]
277 +
    dbg!(sg, merlist)
267 278
end
268 279
269 -
function _dbg!(sg::GRAPH_TYPE, kmerlist::Vector{M}) where {M<:AbstractMer}
270 -
    str = string("Constructing compressed de-bruijn graph from ", length(kmerlist), ' ', BioSequences.ksize(M), "-mers")
271 -
    @info string('C', str)
272 -
    build_unitigs_from_sorted_kmers!(sg, kmerlist)
273 -
    if Graphs.n_nodes(sg) > 1
274 -
        connect_unitigs_by_overlaps!(sg, M)
275 -
    end
276 -
    @info string("Done c", str)
277 -
    return sg
280 +
@inline function dbg_message(x::Vector{M}) where {M<:AbstractMer}
281 +
    return string("Constructing a compressed de-bruijn graph from ", length(x), ' ', BioSequences.ksize(M), "-mers")
278 282
end
279 283
imilarity index 88%
280 284
ename from src/processes/remove_tips.jl
281 285
ename to src/remove_tips.jl

@@ -1,7 +1,7 @@
Loading
1 1
2 -
function remove_tips!(ws::WorkSpace, min_size::Integer)
2 +
function remove_tips!(sdg::Graphs.SDG, min_size::Integer)
3 3
    @info "Beginning tip removal process"
4 -
    sdg = graph(ws)
4 +
    #sdg = graph(ws)
5 5
    pass = 1
6 6
    tips = Graphs.find_tip_nodes(sdg, min_size)
7 7
    ntips = length(tips)
@@ -25,5 +25,6 @@
Loading
25 25
        ntips = length(tips)
26 26
    end
27 27
    @info string("Finished tip removal process in ", pass, " passes")
28 -
    return ws
28 +
    #return ws
29 +
    return sdg
29 30
end
Files Coverage
src 0.52%
Project Totals (7 files) 0.52%
codecov-umbrella
Build #158792955 -
unittests
codecov-umbrella
Build #158792955 -
unittests
codecov-umbrella
Build #158792955 -
unittests
codecov-umbrella
Build #158792955 -
unittests
codecov-umbrella
Build #158792955 -
unittests
codecov-umbrella
Build #158792955 -
unittests
codecov-umbrella
Build #158792955 -
unittests

No yaml found.

Create your codecov.yml to customize your Codecov experience

Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading