Skip to content

Commit

Permalink
atrtibutes for git lfs + cleanup of requirements code
Browse files Browse the repository at this point in the history
  • Loading branch information
Vasudha Jha authored and Vasudha Jha committed Jul 7, 2023
1 parent a881355 commit bb3abed
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 119 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
data/TM_data.csv filter=lfs diff=lfs merge=lfs -text
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
__pycache__
*.egg-info
3 changes: 2 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ include README.md
exclude genomap/genoNet.py
exclude genomap/genoNetus.py
recursive-exclude * __pycache__
recursive-include data/readme.txt
recursive-include data/readme.txt
include data/TM_data.csv
136 changes: 66 additions & 70 deletions genoMapDemo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
"import scipy.io as sio\n",
"import numpy as np\n",
"import scipy.io as sio\n",
"import genomap.genoNet as gNet\n",
"import genomap.genoNet.genoNet as gNet\n",
"import os\n",
"import torch\n",
"from torch.utils.data import DataLoader, Dataset\n",
Expand All @@ -56,7 +56,7 @@
"from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer\n",
"from pyclustering.cluster import cluster_visualizer\n",
"\n",
"from genomap.genoNet import geneDataset, get_device, load_genoNet, predict, traingenoNet, rescale\n",
"from genomap.genoNet.genoNet import geneDataset, get_device, load_genoNet, predict, traingenoNet, rescale\n",
"from genomap import construct_genomap\n",
"\n",
"# For reproducibility\n",
Expand Down Expand Up @@ -99,34 +99,32 @@
"# Creation of genomaps\n",
"# Selection of row and column number of the genomaps \n",
"# To create square genomaps, the row and column numbers are set to be the same.\n",
"colNum=31\n",
"rowNum=31\n",
"n=rowNum*colNum # Total number of pixels in genomaps\n",
"\n",
"\n",
"colNum = 31\n",
"rowNum = 31\n",
"n = rowNum*colNum # Total number of pixels in genomaps\n",
"\n",
"# When the dataset has more genes than number of pixels in the desired genomap,\n",
"# select the first n most variable genes\n",
"if n<data.shape[1]:\n",
"if n < data.shape[1]:\n",
" # create an instance of the VarianceThreshold class\n",
" selector = VarianceThreshold()\n",
" # fit the selector to the data and get the indices of the top n most variable features\n",
" var_threshold = selector.fit(data)\n",
" top_n_indices = var_threshold.get_support(indices=True)\n",
" top_n_features=data.columns[top_n_indices[0:n]]\n",
" data=data[top_n_features]\n",
" top_n_features = data.columns[top_n_indices[0:n]]\n",
" data = data[top_n_features]\n",
"# Normalization of the data\n",
"dataNorm=scipy.stats.zscore(data,axis=0,ddof=1)\n",
"dataNorm = scipy.stats.zscore(data,axis=0,ddof=1)\n",
"# Construction of genomaps\n",
"genoMaps=construct_genomap(dataNorm,rowNum,colNum,epsilon=0.0,num_iter=200)\n",
"genoMaps = construct_genomap(dataNorm,rowNum,colNum,epsilon=0.0,num_iter=200)\n",
"\n",
"# Visualization of the constructed genomaps:\n",
"# The \"genoMaps\" array: All the constructed genomaps are saved in the array. This \n",
"# array has four indices. The first one indexes the series of genomaps. \n",
"# The second and third ones denote the row and column number in a genomap.\n",
"# The fourth index is introduced to facillitate the caclulation using Pytorch or Tensorflow \n",
"# to visualize the i-th genomap, set the first index variable to i (i=10 here) \n",
"findI=genoMaps[10,:,:,:]\n",
"findI = genoMaps[10,:,:,:]\n",
"plt.figure(1)\n",
"plt.imshow(findI, origin = 'lower', extent = [0, 10, 0, 10], aspect = 1)\n",
"plt.title('Genomap of a cell from TM dataset')"
Expand Down Expand Up @@ -225,9 +223,9 @@
"gt_data = sio.loadmat('data/index_GRADCAM.mat')\n",
"\n",
"# Compute class activation maps of B-cells using GRAD-CAM\n",
"indxcell=gt_data['idxBcell']\n",
"indxcell = gt_data['idxBcell']\n",
"# Create a numpy array to store the grad CAM results\n",
"grayscale_cam=np.zeros([indxcell.shape[0],colNum*rowNum])\n",
"grayscale_cam = np.zeros([indxcell.shape[0],colNum*rowNum])\n",
"\n",
"for i in range(indxcell.shape[0]):\n",
" # Looping over all B-cells to compute the class activation maps\n",
Expand All @@ -236,7 +234,7 @@
" pred_label = torch.argmax(net(grad_cam_img),axis=-1)\n",
" targets = [ClassifierOutputTarget(pred_label)]\n",
" A = cam(input_tensor=grad_cam_img, targets=targets)\n",
" B=np.reshape(A.T,(1,colNum*rowNum))\n",
" B = np.reshape(A.T,(1,colNum*rowNum))\n",
" grayscale_cam[i,:]=B\n",
"\n",
"# Plot an example activation map of a B-cell, which highlights the genes that are important \n",
Expand All @@ -247,114 +245,113 @@
"\n",
"# Compute the mean of gene activation values across all the B-cells\n",
"# to find the average activity of all the genes in B-cells\n",
"graySmeanB=np.mean(grayscale_cam,axis=0) \n",
"\n",
"graySmeanB = np.mean(grayscale_cam,axis=0) \n",
"\n",
"# Repeat the same procedure for T cell, natural killer cells,\n",
"# Monocytes, Pneumoctyes and Hematopoitic cells\n",
"\n",
"indxcell=gt_data['idxTcell']\n",
"grayscale_cam=np.zeros([indxcell.shape[0],colNum*rowNum])\n",
"indxcell = gt_data['idxTcell']\n",
"grayscale_cam = np.zeros([indxcell.shape[0],colNum*rowNum])\n",
"for i in range(indxcell.shape[0]):\n",
"\n",
" grad_cam_img = torch.Tensor(dataMat_CNNtrain[i,:,:,:].transpose(2,0,1)\n",
" .reshape((1,1,colNum,rowNum))).to(device)\n",
" pred_label = torch.argmax(net(grad_cam_img),axis=-1)\n",
" targets = [ClassifierOutputTarget(pred_label)]\n",
" A = cam(input_tensor=grad_cam_img, targets=targets)\n",
" B=np.reshape(A.T,(1,colNum*rowNum))\n",
" B = np.reshape(A.T,(1,colNum*rowNum))\n",
" grayscale_cam[i,:]=B\n",
"\n",
"graySmeanT=np.mean(grayscale_cam,axis=0) # Activity of genes in T-cells\n",
"graySmeanT = np.mean(grayscale_cam,axis=0) # Activity of genes in T-cells\n",
"\n",
"indxcell=gt_data['idxNK']\n",
"grayscale_cam=np.zeros([indxcell.shape[0],colNum*rowNum])\n",
"indxcell = gt_data['idxNK']\n",
"grayscale_cam = np.zeros([indxcell.shape[0],colNum*rowNum])\n",
"for i in range(indxcell.shape[0]):\n",
"\n",
" grad_cam_img = torch.Tensor(dataMat_CNNtrain[i,:,:,:].transpose(2,0,1)\n",
" .reshape((1,1,colNum,rowNum))).to(device)\n",
" pred_label = torch.argmax(net(grad_cam_img),axis=-1)\n",
" targets = [ClassifierOutputTarget(pred_label)]\n",
" A = cam(input_tensor=grad_cam_img, targets=targets)\n",
" B=np.reshape(A.T,(1,colNum*rowNum))\n",
" B = np.reshape(A.T,(1,colNum*rowNum))\n",
" grayscale_cam[i,:]=B\n",
"\n",
"graySmeanNK=np.mean(grayscale_cam,axis=0) # Activity of genes in natural killer cells\n",
"graySmeanNK = np.mean(grayscale_cam,axis=0) # Activity of genes in natural killer cells\n",
"\n",
"indxcell=gt_data['idxMono']\n",
"grayscale_cam=np.zeros([indxcell.shape[0],colNum*rowNum])\n",
"indxcell = gt_data['idxMono']\n",
"grayscale_cam = np.zeros([indxcell.shape[0],colNum*rowNum])\n",
"for i in range(indxcell.shape[0]):\n",
"\n",
" grad_cam_img = torch.Tensor(dataMat_CNNtrain[i,:,:,:].transpose(2,0,1)\n",
" .reshape((1,1,colNum,rowNum))).to(device)\n",
" pred_label = torch.argmax(net(grad_cam_img),axis=-1)\n",
" targets = [ClassifierOutputTarget(pred_label)]\n",
" A = cam(input_tensor=grad_cam_img, targets=targets)\n",
" B=np.reshape(A.T,(1,colNum*rowNum))\n",
" B = np.reshape(A.T,(1,colNum*rowNum))\n",
" grayscale_cam[i,:]=B\n",
"\n",
"graySmeanMono=np.mean(grayscale_cam,axis=0) # Activity of genes in Monocytes\n",
"graySmeanMono = np.mean(grayscale_cam,axis=0) # Activity of genes in Monocytes\n",
"\n",
"\n",
"indxcell=gt_data['idxPne']\n",
"grayscale_cam=np.zeros([indxcell.shape[0],colNum*rowNum])\n",
"indxcell = gt_data['idxPne']\n",
"grayscale_cam = np.zeros([indxcell.shape[0],colNum*rowNum])\n",
"for i in range(indxcell.shape[0]):\n",
"\n",
" grad_cam_img = torch.Tensor(dataMat_CNNtrain[i,:,:,:].transpose(2,0,1)\n",
" .reshape((1,1,colNum,rowNum))).to(device)\n",
" pred_label = torch.argmax(net(grad_cam_img),axis=-1)\n",
" targets = [ClassifierOutputTarget(pred_label)]\n",
" A = cam(input_tensor=grad_cam_img, targets=targets)\n",
" B=np.reshape(A.T,(1,colNum*rowNum))\n",
" B = np.reshape(A.T,(1,colNum*rowNum))\n",
" grayscale_cam[i,:]=B\n",
"grayscale_cam=np.nan_to_num(grayscale_cam)\n",
"graySmeanPne=np.mean(grayscale_cam,axis=0) # Activity of genes in Pneumoctyes\n",
"grayscale_cam = np.nan_to_num(grayscale_cam)\n",
"graySmeanPne = np.mean(grayscale_cam,axis=0) # Activity of genes in Pneumoctyes\n",
"\n",
"\n",
"indxcell=gt_data['idxHema']\n",
"grayscale_cam=np.zeros([indxcell.shape[0],colNum*rowNum])\n",
"indxcell = gt_data['idxHema']\n",
"grayscale_cam = np.zeros([indxcell.shape[0],colNum*rowNum])\n",
"for i in range(indxcell.shape[0]):\n",
"\n",
" grad_cam_img = torch.Tensor(dataMat_CNNtrain[i,:,:,:].transpose(2,0,1)\n",
" .reshape((1,1,colNum,rowNum))).to(device)\n",
" pred_label = torch.argmax(net(grad_cam_img),axis=-1)\n",
" targets = [ClassifierOutputTarget(pred_label)]\n",
" A = cam(input_tensor=grad_cam_img, targets=targets)\n",
" B=np.reshape(A.T,(1,colNum*rowNum))\n",
" B = np.reshape(A.T,(1,colNum*rowNum))\n",
" grayscale_cam[i,:]=B\n",
"\n",
"graySmeanHema=np.mean(grayscale_cam,axis=0) # Activity of genes in Hematopoitic cells\n",
"graySmeanHema = np.mean(grayscale_cam,axis=0) # Activity of genes in Hematopoitic cells\n",
"\n",
"\n",
"# Stack all the average gene activity values in different types of cells for plotting\n",
"activityMatrix=np.vstack((graySmeanB,graySmeanHema,graySmeanMono,graySmeanNK,graySmeanT,graySmeanPne))\n",
"activityMatrix = np.vstack((graySmeanB,graySmeanHema,graySmeanMono,graySmeanNK,graySmeanT,graySmeanPne))\n",
"\n",
"# Load the gene names for plotting\n",
"geneNames = pd.read_csv('data/gene_interest_ROI_ImXreXB.csv', header=None,\n",
" delim_whitespace=False)\n",
"\n",
"\n",
"# The following are the 10 most variable genes in sci-ATAC-seq dataset\n",
"genes=np.array(('MSC','RPL31','SERPINB7','ARHGEF4','DCAF8','RNF149','ACMSD','ESPNL','FAM135A','RGS13'))\n",
"genes = np.array(('MSC','RPL31','SERPINB7','ARHGEF4','DCAF8','RNF149','ACMSD','ESPNL','FAM135A','RGS13'))\n",
"\n",
"# Plot the activity of the 10 most variable genes in sci-ATAC-seq dataset\n",
"idxS=np.zeros(genes.shape[0])\n",
"idxS = np.zeros(genes.shape[0])\n",
"for ii in range(genes.shape[0]):\n",
" \n",
" idx=[geneNames==genes[ii]]\n",
" idxN=np.squeeze((np.array(idx)))\n",
" idxS[ii]=np.squeeze(np.asarray(np.where(idxN==True)))\n",
" idx = [geneNames==genes[ii]]\n",
" idxN = np.squeeze((np.array(idx)))\n",
" idxS[ii] = np.squeeze(np.asarray(np.where(idxN==True)))\n",
"\n",
"idxSs=idxS.astype(int)\n",
"actData=activityMatrix[:,idxSs]\n",
"idxSs = idxS.astype(int)\n",
"actData = activityMatrix[:,idxSs]\n",
"\n",
"# Rescale the activity in each cell type from 0 to 1 for plotting\n",
"actDataRe=actData\n",
"actDataRe = actData\n",
"for ik in range(6):\n",
" actDataRe[ik,:]=rescale(actData[ik,:])\n",
"\n",
"plt.figure(figsize=(12,4))\n",
"ax=plt.stem(genes,actDataRe[0,:]) # to see the gene activity in other cells, please change this \n",
"ax = plt.stem(genes,actDataRe[0,:]) # to see the gene activity in other cells, please change this \n",
"# index to a value from 1 to 5. Default is 0 (B-cell). 1 is for T-cell, 2 for natural killer cells, \n",
"# 3 for Monocytes, 4 for Pneumoctyes, and 5 for Hematopoitic cells.\n",
"plt.xlabel(\"Genes\")\n",
Expand Down Expand Up @@ -383,25 +380,25 @@
"# In Seurat, the parameter 'nfeatures' (denoting the number of features) is set\n",
"# to 2000. The resulting Seurat output is loaded here\n",
"data = sio.loadmat('data/outSeurat_pancORG.mat')\n",
"outSeurat=data['outSeurat']\n",
"outSeurat = data['outSeurat']\n",
"\n",
"# We now create a genomap for each cell in the data\n",
"# We select the nearest square number to 2000, which is 1936\n",
"# Thus the size of the genomap would be 44 by 44.\n",
"# Next, let us select data with 1936 most variable features \n",
"numRow=44\n",
"numCol=44\n",
"varX=np.var(outSeurat,axis=0)\n",
"idxV=np.argsort(varX)\n",
"idxVX=np.flip(idxV)\n",
"outSeurat1936=outSeurat[:,idxVX[0:numRow*numCol]]\n",
"numRow = 44\n",
"numCol = 44\n",
"varX = np.var(outSeurat,axis=0)\n",
"idxV = np.argsort(varX)\n",
"idxVX = np.flip(idxV)\n",
"outSeurat1936 = outSeurat[:,idxVX[0:numRow*numCol]]\n",
"\n",
"# Construction of the genomaps for the pancreatic data from the five different technologies\n",
"genoMaps=construct_genomap(outSeurat1936,numRow,numCol,epsilon=0.0,num_iter=200)\n",
"genoMaps = construct_genomap(outSeurat1936,numRow,numCol,epsilon=0.0,num_iter=200)\n",
"\n",
"\n",
"# Visualize a genomap (we show here the first one (i=0))\n",
"findI=genoMaps[0,:,:,:]\n",
"findI = genoMaps[0,:,:,:]\n",
"plt.figure(4)\n",
"plt.imshow(findI, origin = 'lower', extent = [0, 10, 0, 10], aspect = 1)\n",
"plt.title('Genomap of a cell from pancreatic dataset')\n",
Expand Down Expand Up @@ -495,12 +492,11 @@
"clusters = xmeans_instance.get_clusters()\n",
"centers = xmeans_instance.get_centers()\n",
"\n",
"clusIndex=np.zeros(dataVec.shape[0]);\n",
"clusIndex = np.zeros(dataVec.shape[0]);\n",
"for i in range(0,len(clusters)):\n",
" a=(clusters[i])\n",
" a = (clusters[i])\n",
" clusIndex[a]=i;\n",
" \n",
"\n",
"# Set up the training and testing data \n",
"dataMat_CNNtrain = genoMaps # For unsupervised genoNet, all the data are used for training\n",
"dataMat_CNNtest = genoMaps\n",
Expand Down Expand Up @@ -530,17 +526,17 @@
"gx=np.reshape(XTrain,(XTrain.shape[0],1,XTrain.shape[2],XTrain.shape[3]))\n",
"device = get_device()\n",
"t = torch.from_numpy(gx).to(device)\n",
"net=net.double()\n",
"dataAtFC=net.forwardX(t)\n",
"data=dataAtFC.cpu().detach().numpy()\n",
"net = net.double()\n",
"dataAtFC = net.forwardX(t)\n",
"data = dataAtFC.cpu().detach().numpy()\n",
"\n",
"# Run PHATE on the genoNet features\n",
"phate_op = phate.PHATE(random_state=1)\n",
"X_embedded = phate_op.fit_transform(data)\n",
"\n",
"# Plot embeddings\n",
"plt.figure(11)\n",
"scatter=plt.scatter(X_embedded[:,0], X_embedded[:,1],s=2,c=GrTruth,cmap=\"jet\")\n",
"scatter = plt.scatter(X_embedded[:,0], X_embedded[:,1],s=2,c=GrTruth,cmap=\"jet\")\n",
"plt.xlabel(\"PHATE1\")\n",
"plt.ylabel(\"PHATE2\")\n",
"plt.title('PHATE embedding of genoNet features')\n",
Expand Down Expand Up @@ -637,15 +633,15 @@
"gx=np.reshape(XTrain,(XTrain.shape[0],1,XTrain.shape[2],XTrain.shape[3]))\n",
"device = get_device()\n",
"t = torch.from_numpy(gx).to(device)\n",
"net=net.double()\n",
"dataAtFC=net.forwardX(t)\n",
"net = net.double()\n",
"dataAtFC = net.forwardX(t)\n",
"\n",
"# Run t-SNE on the genoNet features\n",
"X=dataAtFC.cpu().detach().numpy()\n",
"X = dataAtFC.cpu().detach().numpy()\n",
"X_embedded = TSNE(n_components=2, learning_rate='auto',init='random').fit_transform(X)\n",
"\n",
"plt.figure(13)\n",
"scatter=plt.scatter(X_embedded[:,0], X_embedded[:,1],s=2,c=GrTruth,cmap=\"jet\")\n",
"scatter = plt.scatter(X_embedded[:,0], X_embedded[:,1],s=2,c=GrTruth,cmap=\"jet\")\n",
"plt.xlabel(\"tSNE1\")\n",
"plt.ylabel(\"tSNE2\")\n",
"plt.title('t-SNE embedding of genoNet features')\n",
Expand All @@ -657,7 +653,7 @@
"X_embedded = TSNE(n_components=2, learning_rate='auto',init='random').fit_transform(dataVec)\n",
"\n",
"plt.figure(14)\n",
"scatter=plt.scatter(X_embedded[:,0], X_embedded[:,1],s=2,c=GrTruth,cmap=\"jet\")\n",
"scatter = plt.scatter(X_embedded[:,0], X_embedded[:,1],s=2,c=GrTruth,cmap=\"jet\")\n",
"plt.xlabel(\"tSNE1\")\n",
"plt.ylabel(\"tSNE2\")\n",
"plt.title('t-SNE embedding of raw data')\n",
Expand Down
Loading

0 comments on commit bb3abed

Please sign in to comment.