x=AD7129; x(x<=1)=1; x=log2(x); Des=Descriptions; % find the 200 genes most correlated with the ER gene in expression levels ngene=100; [b,i]=sort(abs(sERG)); i=flipud(i); ind=i(1:ngene); X=x(ind,:); des=Des(ind,:); % perform SVD, and look at some of results [A,D,F]=svd_mw(X); D 100*D.*D/sum(D.*D) figure(1) scatter(1:n,F(1,:)) % first factor on each of 49 arrays ShowGene(F,C_ER,1) % factor is a 'supergene' scatter(1:n,F(2,:)) % second factor ShowGene(F,C_ER,2) scatter(1:n,F(3,:)) % third factor ShowGene(F,C_ER,3) % now scatter plot pairs of factors pairs(F,[1 2]) pairs(F,[1 2 3]) % now a fancy scatter plot of pairs with color coding for two groups bcpairs(F,C_ER,[1 2]) bcpairs(F,C_ER,[1 2 3]) bcpairs(F,C_ER,[3 4 5]) % --------------------------------------------------------------------- % repeat with just the top, say, 10 genes correlated with the ER gene .. X=x(ind(1:10),:); des=Des(ind(1:10),:) % perform SVD, and look at some of results [A,D,F]=svd_mw(X); D 100*D.*D/sum(D.*D) figure(1) bcpairs(F,C_ER,[1 2 3]) subplot(2,2,2) ShowGene(F,C_ER,1); title('Factor 1') subplot(2,2,4) ShowGene(F,C_ER,2); title('Factor 2') A(:,[1 2]) % --------------------------------------------------------------------- % Let's look at some genes related to specific factors, and how they % correlate with ER and Nodal status % First, find vectors of raw correlations with ER and Nodal status s_ER=[]; s_NO=[]; % vectors of correlations v=std(x'); % sample variances of all genes for i=1:N if (v(i)>0) c=corrcoef(C_ER,x(i,:)); s_ER=[s_ER;c(1,2)]; c=corrcoef(C_Nodes,x(i,:)); s_NO=[s_NO;c(1,2)]; else s_ER=[s_ER;0]; s_NO=[s_NO;0]; end; end; plot(sort(s_ER)) plot(sort(s_NO)) plot([sort(s_ER),sort(s_NO)]) % find the 100 genes most correlated with ER status ngene=100; [b,i]=sort(abs(s_ER)); i=flipud(i); ind=i(1:ngene); X=x(ind,:); des=Des(ind,:); des(1:10,:) [A,D,F]=svd_mw(X); %----------------------------------------------------------- % Now let's go back and find SVD of all genes ... [A,D,F]=svd_mw(x); size(A) % top 200 genes on first factor, relate to ER status d=genefactor(A,s_ER,Des,200,1); d(1:20,:) % second, third etc factors, ER status ... d=genefactor(A,s_ER,Des,200,2); d(1:20,:) % factor 2 looks interesting % correlation relates to loading d=genefactor(A,s_ER,Des,200,3); d(1:20,:) d=genefactor(A,s_ER,Des,200,6); d(1:20,:) % factor 6 looks interesting, more so % now let's look at Nodal status ... d=genefactor(A,s_NO,Des,200,1); d(1:20,:) d=genefactor(A,s_NO,Des,200,2); d(1:20,:) d=genefactor(A,s_NO,Des,200,4); d(1:20,:) % etc % of interest to repeat this exploration using subsets % of genes -- e.g., the top 200 correlated with the ER gene itself % .. as follows .. ngene=200; [b,i]=sort(abs(sERG)); i=flipud(i); ind=i(1:ngene); [A,D,F]=svd_mw(x(ind,:)); d=genefactor(A,s_ER(ind),Des(ind,:),200,1); d(1:20,:) d=genefactor(A,s_ER(ind),Des(ind,:),200,2); d(1:20,:) % very clear