import cell2cell as c2c
import numpy as np
import pandas as pd

from tqdm.auto import tqdm

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


import os

output = '/results/Celegans/'
if not os.path.isdir(output):
    os.mkdir(output)


import warnings
warnings.filterwarnings('ignore')


rnaseq_data = c2c.io.load_rnaseq('/data/Celegans/Celegans_Embryonic_RNASeqData_Warner.txt',
                                 gene_column='CommonName',
                                 drop_nangenes=True,
                                 log_transformation=False,
                                 format='csv',
                                 sep=','
                                )

Opening RNAseq datasets from /data/Celegans/Celegans_Embryonic_RNASeqData_Warner.txt
/data/Celegans/Celegans_Embryonic_RNASeqData_Warner.txt was correctly loaded


rnaseq_data.head()


lr_pairs = pd.read_excel('/data/LR-Pairs/Celegans-2020-Armingol-LR-pairs.xlsx')


lr_pairs.shape

(245, 9)


int_cols = ('Ligand_symbol', 'Receptor_symbol')


lr_pairs = lr_pairs.loc[(lr_pairs[int_cols[0]].isin(rnaseq_data.index)) \
                        & (lr_pairs[int_cols[1]].isin(rnaseq_data.index))]


function_filter = ['Wnt signaling', 'TGF-B signaling', 'Hedgehog signaling', 'Notch signaling']
lr_pairs = lr_pairs.loc[lr_pairs['LR Function'].isin(function_filter)]


lr_pairs.sort_values(by=['LR Function', int_cols[0]], inplace=True)


lr_pairs.head(2)


lr_pairs.shape

(94, 9)


lr_functions = {}
for idx, row in lr_pairs.iterrows():
    lr_name = row[int_cols[0]] + '^' + row[int_cols[1]]
    lr_functions[lr_name] = row['LR Function']


markers = {'hlh1' : 'Muscles & Coelomocytes',
           'end1' : 'Intestine',
           'cnd1' : 'Neurons',
           'pha4' : 'Pharynx',
           'nhr25end1' : 'Hypodermis',
           'ceh32' : 'ABala-derived (Neurons & Glial Cells)',
           'tbx37' : 'ABa-derived (Neurons, Phrarynx & Hypodermis)'
          }


times = ['T0', 'T1', 'T2', 'T3', 'T4']
time_labels = ['120 min', '210  min', '300  min', '390  min', '480  min']

time_dict = dict(zip(times, time_labels))

total_cells = ['cnd1', 
               'ceh32',
               'tbx37', 
               'pha4', 
               'end1', 
               'nhr25end1',
               'hlh1']
cell_labels = [markers[c] + ' - {}'.format(c) for c in total_cells]


continuous_rnaseq = np.log2(rnaseq_data+1.0)


rnaseq_matrices = []

for T in times:
    cell_time = [c + '_' + T for c in total_cells]
    
    df = continuous_rnaseq[cell_time]
    df.columns = cell_labels
    rnaseq_matrices.append(df)


tensor = c2c.tensor.InteractionTensor(rnaseq_matrices=rnaseq_matrices,
                                      ppi_data=lr_pairs,
                                      context_names=time_labels,
                                      how='inner',
                                      upper_letter_comparison=False,
                                      interaction_columns=int_cols,
                                      communication_score='expression_product'
                                     )

Building tensor for the provided context


tensor.tensor.shape

(5, 94, 7, 7)


meta = c2c.tensor.generate_tensor_metadata(interaction_tensor=tensor,
                                           metadata_dicts=[None, lr_functions, None, None],
                                           fill_with_order_elements=True
                                          )


fig, error = tensor.elbow_rank_selection(upper_rank=25,
                                         runs=1,
                                         init='svd',
                                         automatic_elbow=False,
                                         random_state=888)
_ = plt.plot(*error[9], 'ro')
plt.savefig(output + '/Celegans-Elbow.svg', dpi=300, bbox_inches='tight')


error[9]

(10, array(0.17345209))


r = 10


tensor.compute_tensor_factorization(rank=r, random_state=888)


cmaps = ['viridis', 'tab20', 'Pastel1', 'Pastel1']


fig, axes = c2c.plotting.tensor_factors_plot(interaction_tensor=tensor,
                                             order_labels=['Time', 'Ligand-Receptor Pairs', 'Sender Tissues', 'Receiver Tissues'],
                                             metadata = meta,
                                             sample_col='Element',
                                             group_col='Category',
                                             meta_cmaps=cmaps,
                                             fontsize=14,
                                             filename=output + '/Celegans-TensorFactorization.svg'
                                            )


for i in range(tensor.rank):
        print(tensor.get_top_factor_elements('Ligand-Receptor Pairs', 'Factor {}'.format(i+1), 5))
        print('')

cwn-1^cam-1     0.346320
cwn-1^cfz-2     0.312025
cwn-1^vang-1    0.299701
cwn-1^lin-17    0.283951
cwn-1^mom-5     0.252656
Name: Factor 1, dtype: float64

cwn-2^cam-1     0.411229
cwn-2^lin-17    0.358747
cwn-2^cfz-2     0.355586
cwn-2^vang-1    0.345109
cwn-2^mig-1     0.324834
Name: Factor 2, dtype: float64

wrt-1^ptc-3      0.388149
wrt-2^ptc-3      0.348375
wrt-5^ptc-1      0.260977
apl-1^F14B4.1    0.244969
cwn-2^cam-1      0.222672
Name: Factor 3, dtype: float64

mom-2^cfz-2      0.323600
lin-44^cam-1     0.306039
mom-2^cam-1      0.302202
lin-44^cfz-2     0.294243
lin-44^vang-1    0.265855
Name: Factor 4, dtype: float64

apl-1^F14B4.1    0.689900
srp-7^F14B4.1    0.520561
srp-2^F14B4.1    0.233917
sfrp-1^cfz-2     0.159949
sup-17^glp-1     0.156285
Name: Factor 5, dtype: float64

dsl-2^lin-12    0.340190
cwn-2^vang-1    0.256561
cwn-2^cam-1     0.248999
dsl-3^lin-12    0.244976
cwn-2^mom-5     0.242987
Name: Factor 6, dtype: float64

sfrp-1^mig-1     0.412471
sfrp-1^cfz-2     0.408550
sfrp-1^mom-5     0.402333
dsl-3^lin-12     0.280181
sup-17^lin-12    0.257853
Name: Factor 7, dtype: float64

egl-20^cam-1     0.398236
egl-20^cfz-2     0.329903
egl-20^lin-17    0.329275
egl-20^fmi-1     0.319805
egl-20^vang-1    0.313279
Name: Factor 8, dtype: float64

mom-2^mig-1     0.393997
cwn-1^mig-1     0.310323
mom-2^lin-18    0.287104
cwn-2^mig-1     0.249182
sfrp-1^mig-1    0.239089
Name: Factor 9, dtype: float64

sfrp-1^cfz-2    0.385740
sfrp-1^mom-5    0.327037
dsl-2^glp-1     0.305708
mom-2^cfz-2     0.259814
sup-17^glp-1    0.251987
Name: Factor 10, dtype: float64


tensor.export_factor_loadings(output + 'Celegans-Loadings.xlsx')

Loadings of the tensor factorization were successfully saved into /results/Celegans/Celegans-Loadings.xlsx

	ceh32_T0	ceh32_T1	ceh32_T2	ceh32_T3	ceh32_T4	cnd1_T0	cnd1_T1	cnd1_T2	cnd1_T3	cnd1_T4	...	pha4_T0	pha4_T1	pha4_T2	pha4_T3	pha4_T4	tbx37_T0	tbx37_T1	tbx37_T2	tbx37_T3	tbx37_T4
CommonName
aap-1	43.133459	50.658138	50.414970	35.766857	33.895195	58.599474	66.939721	61.222382	65.702870	47.982343	...	52.616365	60.825204	51.979991	43.455773	30.967278	51.326500	50.654093	49.112858	43.996665	33.075259
aat-1	1.621625	3.189402	9.831904	23.548271	22.726582	4.778361	11.748554	20.488394	23.204268	30.890648	...	6.237194	59.957473	130.003444	388.127157	433.615700	11.542751	15.744846	37.220783	102.917061	144.728664
aat-2	5.496731	12.937667	21.763256	29.827811	37.708772	12.952399	24.959862	25.108797	39.361573	44.727870	...	11.464360	41.773713	51.860603	57.235038	45.479755	7.449374	12.206810	18.608906	31.577895	37.431819
aat-3	61.400820	136.940807	198.831633	197.640775	219.688245	181.914836	260.822516	225.358789	303.181266	218.492232	...	111.998325	289.777602	288.678800	285.776295	186.243517	151.952176	203.938290	266.344493	303.632732	272.697353
aat-4	0.204774	1.159116	0.832428	0.591529	0.827419	0.285891	0.305862	0.466678	0.293390	0.913105	...	0.106712	0.948339	0.866276	1.472586	1.381153	0.406641	0.378540	0.714584	0.477434	0.734048

	Ligand_WB	Receptor_WB	Ligand_symbol	Receptor_symbol	LR Function	L Function	R Function	Ligand_desc	Receptor_desc
50	WBGene00001700	WBGene00004208	grd-11	ptc-1	Hedgehog signaling	DHH	PTCH1	Is an ortholog of human DHH (desert hedgehog s...	Is an ortholog of human PTCH1 (patched 1) and ...
148	WBGene00004264	WBGene00004210	qua-1	ptc-3	Hedgehog signaling	IHH	PTCH2	Is an ortholog of human DHH (desert hedgehog s...	Is an ortholog of human PTCH1 (patched 1) and ...

Load Data¶

Data Preprocessing¶

Analysis¶