Skip to content

Commit c4188ab

Browse files
Add TemporalBrains dataset (#222)
* init * Add create_dataset * Export TemporalBrains * add struct and constructor * Optimized version * Update docs * Add tests * Add spaces Co-authored-by: Carlo Lucibello <[email protected]> * Add link * Improve docstring * Add `TemporalBrains` to docs * Improve * Fix & for `julia 1.6` --------- Co-authored-by: Carlo Lucibello <[email protected]>
1 parent 60a2f05 commit c4188ab

File tree

4 files changed

+98
-0
lines changed

4 files changed

+98
-0
lines changed

docs/src/datasets/graphs.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,5 @@ Reddit
3232
TUDataset
3333
METRLA
3434
PEMSBAY
35+
TemporalBrains
3536
```

src/MLDatasets.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,8 @@ include("datasets/graphs/metrla.jl")
135135
export METRLA
136136
include("datasets/graphs/pemsbay.jl")
137137
export PEMSBAY
138+
include("datasets/graphs/temporalbrains.jl")
139+
export TemporalBrains
138140

139141
# Meshes
140142

@@ -156,6 +158,7 @@ function __init__()
156158
__init__tudataset()
157159
__init__metrla()
158160
__init__pemsbay()
161+
__init__temporalbrains()
159162

160163
# misc
161164
__init__iris()

src/datasets/graphs/temporalbrains.jl

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
function __init__temporalbrains()
2+
DEPNAME = "TemporalBrains"
3+
LINK = "http://www-sop.inria.fr/members/Aurora.Rossi/index.html"
4+
register(ManualDataDep(DEPNAME,
5+
"""
6+
Dataset: $DEPNAME
7+
Website : $LINK
8+
"""))
9+
end
10+
11+
12+
function tb_datadir(dir = nothing)
13+
dir = isnothing(dir) ? datadep"TemporalBrains" : dir
14+
LINK = "http://www-sop.inria.fr/members/Aurora.Rossi/data/LabelledTBN.zip"
15+
if length(readdir((dir))) == 0
16+
DataDeps.fetch_default(LINK, dir)
17+
currdir = pwd()
18+
cd(dir) # Needed since `unpack` extracts in working dir
19+
DataDeps.unpack(joinpath(dir, "LabelledTBN.zip"))
20+
# conditions when unzipped folder is our required data dir
21+
cd(currdir)
22+
end
23+
@assert isdir(dir)
24+
return dir
25+
end
26+
27+
28+
function create_tbdataset(dir, thre)
29+
name_filelabels = joinpath(dir, "LabelledTBN", "labels.txt")
30+
filelabels = open(name_filelabels, "r")
31+
temporalgraphs = Vector{MLDatasets.TemporalSnapshotsGraph}(undef, 1000)
32+
33+
for (i,line) in enumerate(eachline(filelabels))
34+
id, gender, age = split(line)
35+
name_network_file = joinpath(dir, "LabelledTBN", "networks", id * "_ws60_wo30_tuk0_pearson_schaefer_100.txt")
36+
37+
data = readdlm(name_network_file,',',Float32; skipstart = 1)
38+
39+
data_thre = view(data,view(data,:,4) .> thre,:)
40+
data_thre_int = Int.(view(data_thre,:,1:3))
41+
42+
activation = [zeros(Float32, 102) for _ in 1:27]
43+
for t in 1:27
44+
for n in 1:102
45+
rows = ((view(data_thre_int,:,1).==n) .& (view(data_thre_int,:,3).==t))
46+
activation[t][n] = mean(view(data_thre,rows,4))
47+
end
48+
end
49+
50+
temporalgraphs[i] = TemporalSnapshotsGraph(num_nodes=ones(Int,27)*102, edge_index = (data_thre_int[:,1], data_thre_int[:,2], data_thre_int[:,3]), node_data= activation, graph_data= (g = gender, a = age))
51+
end
52+
return temporalgraphs
53+
end
54+
55+
"""
56+
TemporalBrains(; dir = nothing, threshold_value = 0.6)
57+
58+
The TemporalBrains dataset contains a collection of temporal brain networks (as `TemporalSnapshotsGraph`s) of 1000 subjects obtained from resting-state fMRI data from the [Human Connectome Project (HCP)](https://www.humanconnectome.org/study/hcp-young-adult/document/extensively-processed-fmri-data-documentation).
59+
60+
The number of nodes is fixed for each of the 27 snapshots at 102, while the edges change over time.
61+
62+
For each `Graph` snapshot, the feature of a node represents the average activation of the node during that snapshot and it is contained in `Graphs.node_data`.
63+
64+
Each `TemporalSnapshotsGraph` has a label representing their gender ("M" for male and "F" for female) and age range (22-25, 26-30, 31-35 and 36+) contained as a named tuple in `graph_data`.
65+
66+
The `threshold_value` is used to binarize the edge weights and is set to 0.6 by default.
67+
"""
68+
struct TemporalBrains <: AbstractDataset
69+
graphs::Vector{MLDatasets.TemporalSnapshotsGraph}
70+
end
71+
72+
function TemporalBrains(;threshold_value = 0.6, dir=nothing)
73+
create_default_dir("TemporalBrains")
74+
dir = tb_datadir(dir)
75+
graphs = create_tbdataset(dir, threshold_value)
76+
return TemporalBrains(graphs)
77+
end
78+
79+
Base.length(d::TemporalBrains) = length(d.graphs)
80+
Base.getindex(d::TemporalBrains, ::Colon) = d.graphs[1]
81+
Base.getindex(d::TemporalBrains, i) = getindex(d.graphs, i)

test/datasets/graphs_no_ci.jl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,4 +363,17 @@ end
363363
@test g.num_nodes == 325
364364
@test g.num_edges == 2694
365365
@test all(g.node_data.features[1][:,:,1][2:end,1] == g.node_data.targets[1][:,:,1][1:end-1])
366+
end
367+
368+
@testset "TemporalBrains" begin
369+
data = TemporalBrains()
370+
@test data isa AbstractDataset
371+
@test length(data) == 1000
372+
g = data[1]
373+
@test g isa MLDatasets.TemporalSnapshotsGraph
374+
375+
@test g.num_nodes == [102 for _ in 1:27]
376+
@test g.num_snapshots == 27
377+
@test g.snapshots[1] isa MLDatasets.Graph
378+
@test length(g.snapshots[1].node_data) == 102
366379
end

0 commit comments

Comments
 (0)