Miscellaneous Scripts#

Here are some scripts useful for performing tasks such as categorizing data into groups, acquiring the PSG files details, removing files by index, computing runtime of the program.

Assigning Subjects to Groups: Topoplots

Categorize subjects into Meditators and Controls. Make Topoplots for the two groups.

#%%Import libraies | Block 0

import pandas as pd
from scipy.stats import trim_mean
import yasa
import matplotlib.pyplot as plt

#%% Load files | Block 1

# load the file containing subject info
masterfile = pd.read_csv('/serverdata/ccshome/nandanik/Downloads/mastersheet.csv')

# load the fil contaning compiled aperiodic paramters
report_sleepstages_II = pd.read_csv('/serverdata/ccshome/nandanik/Documents/CSV/nk_aperiodic_fooof_sleepdata_.csv')

#%% Assign category to subjects | Block 1

# filenames
filenames = report_sleepstages_II['Subject'].unique()

# group labels
group = []
for names in filenames:
         names = names.split('.')[0]
         index = masterfile[masterfile['MappingCode'] == names].index
         group.append(masterfile['Group'][index].values[0])

# Add the group label to 'report_sleepstages_II' dataframe
report_sleepstages_II['Group'] = None  # Initialize the new column with None values

for index, names in enumerate(filenames):
         indices = report_sleepstages_II[report_sleepstages_II['Subject'] == names].index
         report_sleepstages_II.loc[indices, 'Group'] = group[index]

# saving to a csv file
report_sleepstages_II.to_csv('/serverdata/ccshome/nandanik/Documents/CSV/nk_aperiodic_fooof_sleepdata_2.csv', index= False)

#%% Averaging across epochs | Block 2

# Trimmed mean
# Define the trim percentage (here, 10%)
trim_percentage = 0.1

#  Group by columns and calculate trimmed mean for each group
Channel_avg_vals = report_sleepstages_II.groupby(['Stage', 'Channel', 'Group']).apply(lambda group: group.iloc[:, 2:].apply(trim_mean, proportiontocut=trim_percentage))

#reset the index to convert the grouped columns ('Channel' and 'Stage') back to regular columns
Channel_avg_vals = Channel_avg_vals.reset_index()

# Extracting the channel number from the 'Channel' column
Channel_avg_vals['Channel'] = Channel_avg_vals['Channel'].str.split('_').str[1].astype(int)

# Sorting the DataFrame by 'Channel' and 'Stage'
Channel_avg_vals = Channel_avg_vals.sort_values(by=['Stage','Channel'], ascending=[True, True])
Channel_avg_vals.reset_index(drop= True, inplace= True)

# split the dataframe into controls and meditators
Med_group = Channel_avg_vals[Channel_avg_vals['Group'] == 'MED']
Cnt_group = Channel_avg_vals[Channel_avg_vals['Group'] == 'CNT']

#%% Parameters for Topoplot | Block 3

channels_to_pick_topo = ['Fp1', 'Fp2', 'F3', 'F4', 'C3', 'C4', 'P3', 'P4', 'O1', 'O2',
          'F7', 'F8', 'T3', 'T4', 'Fz', 'Cz', 'Pz', 'A1', 'A2']

# Exponet vals

# make a 2D dataframe containing exponent vals corresponding to 19 channels for 5 sleepstages
Exponent_med = Med_group.pivot(index='Channel', columns='Stage', values='Exponent')
Exponent_cnt = Cnt_group.pivot(index='Channel', columns='Stage', values='Exponent')

# The index MUST be the channel names for yasa
Exponent_med.index = channels_to_pick_topo
Exponent_cnt.index = channels_to_pick_topo

# Offset vals

# make a 2D dataframe containing offset vals corresponding to 19 channels for 5 sleepstages
Offset_med = Med_group.pivot(index='Channel', columns='Stage', values='Offset')
Offset_cnt = Cnt_group.pivot(index='Channel', columns='Stage', values='Offset')

# The index MUST be the channel names for yasa
Offset_med.index = channels_to_pick_topo
Offset_cnt.index = channels_to_pick_topo

#%% TOPOPLOT | Block 4

#define sleep_stages
sleep_stages = ['W','N1','N2','N3','REM']

# MEDITATORS

#EXPONENT TOPO
#loop over sleep stages and plot the data# Create a 3-D array
for i in range(0,len(sleep_stages)):
         vmax = Channel_avg_vals['Exponent'].max()
         vmin = Channel_avg_vals['Exponent'].min()
         stage = sleep_stages[i]
         yasa.topoplot(Exponent_med[stage], title =stage,
                                                vmin= vmin,
                                                vmax= vmax,
                                                cmap = 'coolwarm',
                                                n_colors= 10 )
         plt.tight_layout() #adjusts layout of plot
         plt.show()
         plt.savefig('/serverdata/ccshome/nandanik/Documents/Topoplots/'
                                         + 'Exponent_MED_' + stage , facecolor='white')
         plt.close()

#OFFSET TOPO
#loop over sleep stages and plot the data
for i in range(0,len(sleep_stages)):
         vmax = Channel_avg_vals['Offset'].max()
         vmin = Channel_avg_vals['Offset'].min()
         stage = sleep_stages[i]
         yasa.topoplot(Offset_med[stage], title =stage,
                                                vmin= vmin,
                                                vmax= vmax,
                                                cmap = 'coolwarm',
                                                n_colors= 10 )
         plt.tight_layout() #adjusts layout of plot
         plt.show()
         plt.savefig('/serverdata/ccshome/nandanik/Documents/Topoplots/'
                                         + 'Offset_MED_' + stage , facecolor='white')
         plt.close()

# CONTROLS

#EXPONENT TOPO
#loop over sleep stages and plot the data# Create a 3-D array
for i in range(0,len(sleep_stages)):
         vmax = Channel_avg_vals['Exponent'].max()
         vmin = Channel_avg_vals['Exponent'].min()
         stage = sleep_stages[i]
         yasa.topoplot(Exponent_cnt[stage], title =stage,
                                                vmin= vmin,
                                                vmax= vmax,
                                                cmap = 'coolwarm',
                                                n_colors= 10 )
         plt.tight_layout() #adjusts layout of plot
         plt.show()
         plt.savefig('/serverdata/ccshome/nandanik/Documents/Topoplots/'
                                         + 'Exponent_CNT_' + stage , facecolor='white')
         plt.close()

#OFFSET TOPO
#loop over sleep stages and plot the data
for i in range(0,len(sleep_stages)):
         vmax = Channel_avg_vals['Offset'].max()
         vmin = Channel_avg_vals['Offset'].min()
         stage = sleep_stages[i]
         yasa.topoplot(Offset_cnt[stage], title =stage,
                                                vmin= vmin,
                                                vmax= vmax,
                                                cmap = 'coolwarm',
                                                n_colors= 10 )
         plt.tight_layout() #adjusts layout of plot
         plt.show()
         plt.savefig('/serverdata/ccshome/nandanik/Documents/Topoplots/'
                                         + 'Offset_CNT_' + stage , facecolor='white')
         plt.close()

PSG file details

Access PSGfile properties. Dataframe: channels. srate and psgfilename for each subject. Sort files and remove files with srate != 500.

#%% Load files

# specify folderpath
folder_path_psg = '/serverdata/ccshome/nandanik/Documents/FOOOF_data/data'
file_pattern_psg = '*.edf'

# List containing files names
os.chdir(folder_path_psg)
psg_files = sorted(gb.glob( file_pattern_psg))

#%% extract file properties
n_channel=[]
channel_names=[]
sfreq_n=[]
for files in psg_files:

         edfdata = mne.io.read_raw_edf(files, preload=True)
         srate = int(edfdata.info['sfreq'])

         channels_to_pick = ['Fp1', 'FP1' ,'EEG Fp1','EEG FP1' , 'Fp2', 'FP2', 'EEG Fp2', 'EEG FP2',
          'F3', 'EEG F3', 'F4', 'EEG F4', 'C3', 'EEG C3', 'C4', 'EEG C4', 'P3', 'EEG P3', 'P4',
          'EEG P4', 'O1', 'EEG O1', 'O2', 'EEG O2', 'F7', 'EEG F7', 'F8', 'EEG F8', 'T3', 'EEG T3',
          'T4', 'EEG T4', 'Fz', 'FZ' , 'EEG Fz', 'EEG FZ' , 'Cz', 'CZ' , 'EEG Cz', 'EEG CZ',
          'Pz', 'PZ','EEG Pz', 'EEG PZ', 'A1', 'A2' , 'EEG A1', 'EEG A2']

         edfdata.pick_channels(channels_to_pick)

         num = len(edfdata.ch_names)
         n_channel.append(num)

         name = edfdata.ch_names
         channel_names.append(name)

         sfreq= edfdata.info['sfreq']
         sfreq_n.append(sfreq)


print("Total files processed:", len(psg_files))
print("Total entries in n_channel:", len(n_channel))

#%% dataframe
psg_channel= pd.DataFrame({'file': psg_files,
                          'n_channel': n_channel,
                          'channels': channel_names,
                          'sfreq': sfreq_n})

# files with different sampling frequencies
count1 = (psg_channel['sfreq'] == 200).sum()
count2 = (psg_channel['sfreq'] == 500).sum()
count3 = (psg_channel['n_channel'] == 19).sum()

# files to be deleted
delete_files= pd.DataFrame()
delete_files= delete_files.append(psg_channel[psg_channel['sfreq']!=500])

# files to be kept
valid_files= pd.DataFrame()
valid_files= valid_files.append(psg_channel[psg_channel['sfreq']==500])

#%% plotting data
edfdata = mne.io.read_raw_edf(files, preload=True)

edfdata.filter(1,None,fir_design='firwin').load_data()
edfdata.filter(None,40,fir_design='firwin').load_data()

edfdata.plot()

Remove unwanted PSG files by index

# Indices of files to remove
psg_del = [ index1, index2, index3, .....]

#remove unwanted psg files
psg_files_all = [x for i, x in enumerate(psg_files_all) if i not in psg_del]

# Indices of files to remove
scored_del = [index1, index2, index3, .....]

#remove unwanted scored files
scored_files_all = [x for i, x in enumerate(scored_files_all) if i not in scored_del]

Program Runtime

import time

start_time = time.time()

# this is where your loop goes

end_time = time.time()
duration = end_time - start_time

print("Loop duration:", duration, "seconds")