% Input file from Pelagia_IBM2_par.m. Search for clusters of parameters that
% mantain the number of indiviudals throughout the simulation.
%

clear all
close all

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
inFile = 'IBM5_par_4y_10k_2000-3.mat';
Nclusters = 6;
DOnormalize=1; % normalize matrix before clustering
saveIMG = 1;
IMGfold = 'IMG/Clustering_IBM5/';
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

mkdir(IMGfold);

disp('Reading input file ...')
load(inFile)

disp(['Inital number of particles: ', num2str(Nini)])

% Keep just those close enough to initial number of individuals.
RunIDs = Nalive > Nini*0.8 & Nalive < Nini*1.2;
TopMP = MP(RunIDs);
disp(['Number of particles in the permanent population: ',num2str(length(TopMP))])

% Create matrix for clustering
disp('Matrix ...')
for n=1:length(TopMP)
    A(n,:) = [TopMP(n).k1, TopMP(n).k2, TopMP(n).k3];
end

% K-means
disp('Clustering ...')
% normalize 
if DOnormalize
    disp('  Normalization ...')
    AN = normalize(A,'range'); 

% Renormalization check
%     for n=1:size(AN,2)
%        AN2(:,n) = min(A(:,n)) + AN(:,n).* (max(A(:,n))-min(A(:,n))) ;
%     end
%     Adiff = AN2-AN;
%     figure
%     plot(Adiff(:))

else
    AN = A;
end
% 
% % Search for the right number of clusters - looking for max silhouette
% % value
% for Nc = 1:10
%     % clustering
%     [idx,C,sumd] = kmeans(AN,Nc,'Distance','cityblock'); % cityblock, sqeuclidean
%     
%     % silhouette for Nclusters evaluation
%     figure
%     [silh,h] = silhouette(AN,idx,'cityblock');
%     xlabel('Silhouette Value')
%     ylabel('Cluster')
%     disp(['Nclusters ',num2str(Nc),'  silh: ',num2str(mean(silh))])
% end
% 
% return


% clustering
[idx,C,sumd] = kmeans(AN,Nclusters,'Distance','cityblock'); % cityblock, sqeuclidean

% Find closest point to cluster centroids
[dist,indices] = pdist2(AN,C,'euclidean','Smallest',1);
NC = AN(indices,:);

% [idx2,C2] = dbscan(AN,0.2,5);
% denormalize
if DOnormalize
    for n=1:size(A,2)
       C(:,n) = min(A(:,n)) + C(:,n).* (max(A(:,n))-min(A(:,n))) ;
       NC(:,n) = min(A(:,n)) + NC(:,n).* (max(A(:,n))-min(A(:,n))) ;
    end
end




% Display K-means result
% number of elements per cluster
disp('K-means:')
for n=1:Nclusters
    disp(['Cluster no.: ',num2str(n),', ',num2str(sum(idx==n)),'   sumd: ', ...
        num2str(sumd(n)),'  sumd/N: ',num2str(sumd(n)/sum(idx==n))])
end

for n=1:Nclusters
    disp(C(n,:));
end
disp('---')

% Display arrays for the Lungrun.sh bash script
Cs = [''];
k1s = [''];
k2s = [''];
k3s = [''];
for n=1:size(NC,1)
    Cs =[Cs,'"C2k7_',num2str(n),'-',num2str(size(NC,1)),'" '];
    k1s =[k1s,num2str(NC(n,1)),' '];
    k2s =[k2s,num2str(NC(n,2)),' '];
    k3s =[k3s,num2str(NC(n,3)),' '];  
end
disp(['C=(',Cs,')'])
disp(['k1=(',k1s,')'])
disp(['k2=(',k2s,')'])
disp(['k3=(',k3s,')'])



%
%% 3D plot
set_clusters = 1;
if set_clusters
    
    % cluster 1 / 6
    k1 = 0.041
    k2 = 1.85
    k3 = 0.028
    n=1;
    C(n,1) = k1;
    C(n,2) = k2;
    C(n,3) = k3;
    
    
    % cluster 2 / 6
    k1 = 0.029
    k2 = 5.25
    k3 = 0.014
    n=2;
    C(n,1) = k1;
    C(n,2) = k2;
    C(n,3) = k3;
    
    
    % cluster 3 / 6
    k1 = 0.087
    k2 = 5
    k3 = 0.05
    n=3;
    C(n,1) = k1;
    C(n,2) = k2;
    C(n,3) = k3;
    
    
    % cluster 4 / 6
    k1 = 0.167
    k2 = 2.25
    k3 = 0.126
    n=4;
    C(n,1) = k1;
    C(n,2) = k2;
    C(n,3) = k3;
    
    % cluster 5 / 6
    k1 = 0.107
    k2 = 1.7
    k3 = 0.082
    n=5;
    C(n,1) = k1;
    C(n,2) = k2;
    C(n,3) = k3;
    
    % cluster 6 / 6
    k1 = 0.159
    k2 = 5.5
    k3 = 0.092
    n=6;
    C(n,1) = k1;
    C(n,2) = k2;
    C(n,3) = k3;
    
    % cluster 1 / 1
    k1 = 0.059;
    k2 = 3.7;
    k3 = 0.036;
    n=7;
    C(n,1) = k1;
    C(n,2) = k2;
    C(n,3) = k3;
    
    % cluster 2 / 1
    k1 = 0.109;
    k2 = 6.2;
    k3 = 0.058;
    n=8;
    C(n,1) = k1;
    C(n,2) = k2;
    C(n,3) = k3;
    
    % cluster 3 / 1
    k1 = 0.093;
    k2 = 3.95;
    k3 = 0.058;
    n=9;
    C(n,1) = k1;
    C(n,2) = k2;
    C(n,3) = k3;
    

end

hFig=figure;
clf
set(hFig, 'Position', [10 10 600 400]) 
set(hFig,'Color','w')
hold on
scatter3(A(:,1),A(:,2),A(:,3),20,'filled');
for n=1:size(C,1)
    scatter3(C(n,1),C(n,2),C(n,3),50,'filled','MarkerFaceColor','red')
%     scatter3(NC(n,1),NC(n,2),NC(n,3),20,'filled')
    text(C(n,1)-0.023,C(n,2),C(n,3)+0.01,[' C',num2str(n)],'FontWeight','bold')
end
hold off
xlabel('k1')
ylabel('k2')
zlabel('k3')
view(-40,30)

if saveIMG
        % save figure
        fOut = [IMGfold,'3D_clustering_plot_6n3.pdf'];
        export_fig(fOut, '-png', '-pdf');
end



%% Plot curves on the same figure- use closest points to centroids (NC)
NC = C;
hFig = figure;
set(hFig, 'Position', [10 10 600 300]); % fixed figure dimensions
set(hFig,'Color','w')
clf
hold
for n=1:size(C,1)
    syms dNfood(PP)
%     dNfood =  NC(n,1) * tanh( (PP-NC(n,2)) / NC(n,2)) - NC(n,3) ;    
    dNfood =  NC(n,1) * PP / (NC(n,2) + PP) - NC(n,3) ;
    fplot(dNfood, [0 20], 'LineWidth', 2)
    lnames{n} = ['C',num2str(n)];
end
line([0 20],[0 0],'Color','k')
hold off
ax = gca; % axes handle
ax.YAxis.Exponent = 0;
ylabel('dM/Mdt [day^{-1}]')
xlabel('Primary production [mg m⁻³ day⁻¹]')
legend(lnames,'Location','southeast')

if saveIMG
    % save figure
    fOut = [IMGfold,'MortalityReproduction_curves'];
    export_fig(fOut, '-png','-pdf');
end




