# -*- coding: utf-8 -*-
"""Code for re-generating results from Mirchi et al., 2018 (SCAN)."""
import os
from urllib.request import HTTPError, urlopen
import numpy as np
from .utils import _get_data_dir
TIMESERIES = ("https://s3.amazonaws.com/openneuro/ds000031/ds000031_R1.0.2"
"/uncompressed/derivatives/sub-01/ses-{0}/"
"sub-01_ses-{0}_task-rest_run-001_parcel-timeseries.txt")
BEHAVIOR = ("https://s3.amazonaws.com/openneuro/ds000031/ds000031_R1.0.4"
"/uncompressed/sub-01/sub-01_sessions.tsv")
SESSIONS = [ # list of sessions with parcelled time series and all PANAS items
'016', '019', '025', '026', '028', '029', '030', '032', '035', '037',
'038', '039', '040', '041', '042', '043', '044', '045', '046', '047',
'048', '049', '050', '051', '053', '054', '056', '057', '058', '059',
'060', '061', '062', '063', '064', '065', '066', '067', '068', '069',
'070', '071', '072', '073', '074', '075', '076', '077', '078', '079',
'080', '081', '082', '083', '084', '085', '086', '087', '088', '089',
'091', '092', '094', '095', '096', '097', '098', '099', '100', '101',
'102', '103', '104'
]
PANAS = { # specification for creation of PANAS subscales for item scores
'negative': [
'afraid', 'scared', 'nervous', 'jittery', 'irritable', 'hostile',
'guilty', 'ashamed', 'upset', 'distressed'
],
'positive': [
'active', 'alert', 'attentive', 'determined', 'enthusiastic',
'excited', 'inspired', 'interested', 'proud', 'strong'
],
'fear': [
'afraid', 'scared', 'frightened', 'nervous', 'jittery', 'shaky'
],
'hostility': [
'angry', 'hostile', 'irritable', 'scornful', 'disgusted', 'loathing'
],
'guilt': [
'guilty', 'ashamed', 'blameworthy', 'angry_at_self',
'disgusted_with_self', 'dissatisfied_with_self'
],
'sadness': [
'sad', 'blue', 'downhearted', 'alone', 'lonely'
],
'joviality': [
'happy', 'joyful', 'delighted', 'cheerful', 'excited', 'enthusiastic',
'lively', 'energetic',
],
'self-assurance': [
'proud', 'strong', 'confident', 'bold', 'daring', 'fearless'
],
'attentiveness': [
'alert', 'attentive', 'concentrating', 'determined'
],
'shyness': [
'shy', 'bashful', 'sheepish', 'timid'
],
'fatigue': [
'sleepy', 'tired', 'sluggish', 'drowsy'
],
'serenity': [
'calm', 'relaxed', 'at_ease'
],
'surprise': [
'amazed', 'surprised', 'astonished'
]
}
def _get_fc(data_dir=None, resume=True, verbose=1):
"""
Get functional connections from MyConnectome parcelled time series data.
Returns
-------
fc : (73, 198135) numpy.ndarray
Functional connections (lower triangle)
"""
# download time series data for all sessions
ts = []
for ses in SESSIONS:
if verbose > 0:
print('Fetching time series for session {}'.format(ses))
out = urlopen(TIMESERIES.format(ses))
if out.status == 200:
ts.append(np.loadtxt(out.readlines()))
else:
raise HTTPError('Failed to fetch time series data: session {}'
.format(ses))
# get upper triangle of correlation matrix for each session
fc = [np.corrcoef(ses.T)[np.tril_indices(len(ses.T), k=-1)] for ses in ts]
# return stacked sessions
return np.vstack(fc)
def _get_panas(data_dir=None, resume=True, verbose=1):
"""
Get PANAS subscales from MyConnectome behavioral data.
Returns
-------
panas : dict
Where keys are PANAS subscales names and values are session-level
composite measures
"""
from numpy.lib.recfunctions import structured_to_unstructured as stu
# download behavioral data
out = urlopen(BEHAVIOR)
if out.status == 200:
data = out.readlines()
else:
raise HTTPError('Cannot fetch behavioral data')
# drop sessions with missing PANAS items
sessions = np.genfromtxt(data, delimiter='\t', usecols=0, dtype=object,
names=True, converters={0: lambda s: s.decode()})
keeprows = np.isin(sessions, ['ses-{}'.format(f) for f in SESSIONS])
panas = np.genfromtxt(data, delimiter='\t', names=True, dtype=float,
usecols=range(28, 91))[keeprows]
# create subscales from individual item scores
measures = {}
for subscale, items in PANAS.items():
measure = stu(panas[['panas{}'.format(f) for f in items]])
measures[subscale] = measure.sum(axis=-1)
return measures
[docs]def fetch_mirchi2018(data_dir=None, resume=True, verbose=1):
"""
Download (and creates) dataset for replicating Mirchi et al., 2018, SCAN.
Parameters
----------
data_dir : str, optional
Directory to check for existing data files (if they exist) or to save
generated data files. Files should be named mirchi2018_fc.npy and
mirchi2018_panas.csv for the functional connectivity and behavioral
data, respectively.
Returns
-------
X : (73, 198135) numpy.ndarray
Functional connections from MyConnectome rsfMRI time series data
Y : (73, 13) numpy.ndarray
PANAS subscales from MyConnectome behavioral data
"""
data_dir = os.path.join(_get_data_dir(data_dir=data_dir), 'ds-mirchi2018')
os.makedirs(data_dir, exist_ok=True)
X_fname = os.path.join(data_dir, 'myconnectome_fc.npy')
Y_fname = os.path.join(data_dir, 'myconnectome_panas.csv')
if not os.path.exists(X_fname):
X = _get_fc(data_dir=data_dir, resume=resume, verbose=verbose)
np.save(X_fname, X, allow_pickle=False)
else:
X = np.load(X_fname, allow_pickle=False)
if not os.path.exists(Y_fname):
Y = _get_panas(data_dir=data_dir, resume=resume, verbose=verbose)
np.savetxt(Y_fname, np.column_stack(list(Y.values())),
header=','.join(Y.keys()), delimiter=',', fmt='%i')
# convert dictionary to structured array before returning
Y = np.array([tuple(row) for row in np.column_stack(list(Y.values()))],
dtype=dict(names=list(Y.keys()), formats=['i8'] * len(Y)))
else:
Y = np.genfromtxt(Y_fname, delimiter=',', names=True, dtype=int)
return X, Y