Commit 0323848d authored by Enrico Glerean's avatar Enrico Glerean

adding various dependences, some might be useless but anyway

parent 602fa8fd
This diff is collapsed.
##OPTICS CLUSTERING##
This MATLAB function computes a set of clusters based on the algorithm introduced in Figure 19 of Ankerst, Mihael, et al. "OPTICS: ordering points to identify the clustering structure." ACM Sigmod Record. Vol. 28. No. 2. ACM, 1999.
**Written by Alex Kendall**
University of Cambridge
18 Feb 2015
http://mi.eng.cam.ac.uk/~agk34/
This software is licensed under GPLv3, see included glpv3.txt.
**Input:**
points - input points to cluster where each point is a separate row and the columns are data dimensions
minpts - the minimum points required to form a cluster
epsilon - a percentage threshold to make a cluster
**Output:**
SetOfClusters - a struct containing each cluster's start and end index
RD - each point's reachability distance
CD - each point's core distance
order - the order of points in the reachability graph
**Dependencies:**
This function requires optics.m from Michal Daszykowski's implementation of calculating the reachability distance for all points.
For more details, refer to http://chemometria.us.edu.pl/index.php?goto=downloads
function [ SetOfClusters, RD, CD, order ] = cluster_optics(points, minpts, epsilon)
% This function computes a set of clusters based on the algorithm introduced in Figure 19 of
% Ankerst, Mihael, et al. "OPTICS: ordering points to identify the clustering structure."
% ACM Sigmod Record. Vol. 28. No. 2. ACM, 1999.
% Written by Alex Kendall
% University of Cambridge
% 18 Feb 2015
% http://mi.eng.cam.ac.uk/~agk34/
% This software is licensed under GPLv3, see included glpv3.txt.
% Input:
% points - input points to cluster where each point is a separate row and the columns are data dimensions
% minpts - the minimum points required to form a cluster
% epsilon - a percentage threshold to make a cluster
% Output:
% SetOfClusters - a struct containing each cluster's start and end index
% RD - each point's reachability distance
% CD - each point's core distance
% order - the order of points in the reachability graph
% Dependencies:
% This function requires optics.m from Michal Daszykowski's implementation of calculating the reachability distance for all points.
% For more details, refer to http://chemometria.us.edu.pl/index.php?goto=downloads
disp('Calculating reachability for all points.');
tic;
[RD,CD,order]=optics(points,minpts);
toc;
disp('Computing clusters.');
tic;
mib = 0;
i = 1;
SetOfSteepDownAreas = struct();
SetOfClusters = struct();
while i < size(points,1)-1
mib = max([mib, RD(order(i))]);
if RD(order(i))*(1-epsilon) >= RD(order(i+1))
% update mib values and filter down areas
for k=2:size(SetOfSteepDownAreas,2)
SetOfSteepDownAreas(k).mib = max(RD(order((SetOfSteepDownAreas(k).end+1):i)));
end
k=2;
while k<=size(SetOfSteepDownAreas,2)
if RD(order(SetOfSteepDownAreas(k).start))*(1-epsilon) < mib
if k==size(SetOfSteepDownAreas,2)
SetOfSteepDownAreas = SetOfSteepDownAreas(1:k-1);
else
SetOfSteepDownAreas = SetOfSteepDownAreas([1:k-1, k+1:size(SetOfSteepDownAreas,2)]);
end
else
k = k+1;
end
end
newD = size(SetOfSteepDownAreas,2)+1;
SetOfSteepDownAreas(newD).start = i;
SetOfSteepDownAreas(newD).mib = 0;
% find end of downward area
while i < size(points,1)-1
if RD(order(i))*(1-epsilon) >= RD(order(i+1))
i = i+1;
else
j = i;
while j < size(points,1)-1
if or(j-i>minpts, RD(order(j)) < RD(order(j+1)))
% if the downward area that isn't steep is longer than minpts, or no longer downward
j=-1;
break;
elseif RD(order(j))*(1-epsilon) >= RD(order(j+1))
% if it is a steepdownward area
break;
else
j = j+1;
end
end
if or(j == -1, j == size(points,1)-1)
% end of downward area
break;
else
i = j;
end
end
end
SetOfSteepDownAreas(newD).end = i-1;
mib = RD(order(i));
elseif RD(order(i)) <= RD(order(i+1))*(1-epsilon)
% Up area
upAreaStart = i;
% update mib values and filter down areas
for k=2:size(SetOfSteepDownAreas,2)
SetOfSteepDownAreas(k).mib = max(RD(order(SetOfSteepDownAreas(k).end:i)));
end
k=2;
while k<=size(SetOfSteepDownAreas,2)
if RD(order(SetOfSteepDownAreas(k).start))*(1-epsilon) < mib
if k==size(SetOfSteepDownAreas,2)
SetOfSteepDownAreas = SetOfSteepDownAreas(1:k-1);
else
SetOfSteepDownAreas = SetOfSteepDownAreas([1:k-1, k+1:size(SetOfSteepDownAreas,2)]);
end
else
k = k+1;
end
end
% find end of upward area
while i < size(points,1)-1
if RD(order(i)) <= RD(order(i+1))*(1-epsilon)
i = i+1;
else
j = i;
while j < size(points,1)-1
if or(j-i>minpts, RD(order(j)) > RD(order(j+1)))
% if the upward area that isn't steep is longer than minpts, or no longer upward
j=-1;
break;
elseif RD(order(j)) <= RD(order(j+1))*(1-epsilon)
% if it is a steepdownward area
break;
else
j = j+1;
end
end
if or(j == -1, j== size(points,1)-1)
% end of downward area
break;
else
i = j;
end
end
end
mib = RD(order(i));
for k=2:size(SetOfSteepDownAreas,2)
if RD(order(i))*(1-epsilon) > SetOfSteepDownAreas(k).mib
if and(RD(order(SetOfSteepDownAreas(k).start)) >= RD(upAreaStart) , RD(order(SetOfSteepDownAreas(k).end)) <= RD(order(i)))
if abs(RD(order(SetOfSteepDownAreas(k).start))-RD(order(i))) <= epsilon*max(RD(order(SetOfSteepDownAreas(k).start)),RD(order(i)))
% condition a
clusterStart = SetOfSteepDownAreas(k).start;
clusterEnd = i;
elseif RD(order(SetOfSteepDownAreas(k).start))*(1-epsilon) > RD(order(i))
% condition b
tmp = abs(RD(SetOfSteepDownAreas(k).start:SetOfSteepDownAreas(k).end)-RD(order(i)));
[~, clusterStart] = min(tmp); %index of closest value
clusterStart = clusterStart+SetOfSteepDownAreas(k).start-1;
clusterEnd = i;
elseif RD(order(SetOfSteepDownAreas(k).start)) < RD(order(i))*(1-epsilon)
% condition c
clusterStart = SetOfSteepDownAreas(k).start;
tmp = abs(RD(upAreaStart:i)-RD(order(SetOfSteepDownAreas(k).start)));
[~, clusterEnd] = min(tmp); %index of closest value
clusterEnd = clusterEnd+upAreaStart;
else
error('ERROR\n');
end
if abs(clusterEnd - clusterStart) >= minpts
newD = size(SetOfClusters,2)+1;
SetOfClusters(newD).start = clusterStart;
SetOfClusters(newD).end = clusterEnd;
end
end
end
end
else
i = i+1;
end
end
SetOfClusters = SetOfClusters(2:size(SetOfClusters,2));
toc;
end
% Brief Demo to Visualise Optics Results
% Written by Alex Kendall
% University of Cambridge
% 18 Feb 2015
% http://mi.eng.cam.ac.uk/~agk34/
% This software is licensed under GPLv3, see included glpv3.txt.
% ::IMPORTANT:: load your data to 'points'. Here is some example data:
load('example_data.mat');
points=points(1:1000:end,:);
minpts=5;
[ SetOfClusters, RD, CD, order ] = cluster_optics(points, minpts, epsilon);
bar(RD(order));
figure;
% Cycle through all clusters
for i=2:length(SetOfClusters)
bar(RD(order(SetOfClusters(i).start:SetOfClusters(i).end)));
pause(0.5)
end
\ No newline at end of file
%points2contour
%Tristan Ursell
%Sept 2013
%
%[Xout,Yout]=points2contour(Xin,Yin,P,direction)
%[Xout,Yout]=points2contour(Xin,Yin,P,direction,dlim)
%[Xout,Yout,orphans]=points2contour(Xin,Yin,P,direction,dlim)
%[Xout,Yout,orphans,indout]=points2contour(Xin,Yin,P,direction,dlim)
%
%Given any list of 2D points (Xin,Yin), construct a singly connected
%nearest-neighbor path in either the 'cw' or 'ccw' directions. The code
%has been written to handle square and hexagon grid points, as well as any
%non-grid arrangement of points.
%
%'P' sets the point to begin looking for the contour from the original
%ordering of (Xin,Yin), and 'direction' sets the direction of the contour,
%with options 'cw' and 'ccw', specifying clockwise and counter-clockwise,
%respectively.
%
%The optional input parameter 'dlim' sets a distance limit, if the distance
%between a point and all other points is greater than or equal to 'dlim',
%the point is left out of the contour.
%
%The optional output 'orphans' gives the indices of the original (Xin,Yin)
%points that were not included in the contour.
%
%The optional output 'indout' is the order of indices that produces
%Xin(indout)=Xout and Yin(indout)=Yout.
%
%There are many (Inf) situations where there is no unique mapping of points
%into a connected contour -- e.g. any time there are more than 2 nearest
%neighbor points, or in situations where the nearest neighbor matrix is
%non-symmetric. Picking a different P will result in a different contour.
%Likewise, in cases where one point is far from its neighbors, it may be
%orphaned, and only connected into the path at the end, giving strange
%results.
%
%The input points can be of any numerical class.
%
%Note that this will *not* necessarily form the shortest path between all
%the points -- that is the NP-Hard Traveling Salesman Problem, for which
%there is no deterministic solution. This will, however, find the shortest
%path for points with a symmetric nearest neighbor matrix.
%
%see also: bwtraceboundary
%
%Example 1: continuous points
%N=200;
%P=1;
%theta=linspace(0,2*pi*(1-1/N),N);
%[~,I]=sort(rand(1,N));
%R=2+sin(5*theta(I))/3;
%
%Xin=R.*cos(theta(I));
%Yin=R.*sin(theta(I));
%
%[Xout,Yout]=points2contour(Xin,Yin,P,'cw');
%
%figure;
%hold on
%plot(Xin,Yin,'b-')
%plot(Xout,Yout,'r-','Linewidth',2)
%plot(Xout(2:end-1),Yout(2:end-1),'k.','Markersize',15)
%plot(Xout(1),Yout(1),'g.','Markersize',15)
%plot(Xout(end),Yout(end),'r.','Markersize',15)
%xlabel('X')
%ylabel('Y')
%axis equal tight
%title(['Black = original points, Blue = original ordering, Red = new ordering, Green = starting points'])
%box on
%
%
%Example 2: square grid
%P=1;
%
%Xin=[1,2,3,4,4,4,4,3,2,1,1,1];
%Yin=[0,0,0,0,1,2,3,3,2,2,1,0];
%
%[Xout,Yout]=points2contour(Xin,Yin,P,'cw');
%
%figure;
%hold on
%plot(Xin,Yin,'b-')
%plot(Xout,Yout,'r-','Linewidth',2)
%plot(Xout(2:end-1),Yout(2:end-1),'k.','Markersize',15)
%plot(Xout(1),Yout(1),'g.','Markersize',15)
%plot(Xout(end),Yout(end),'r.','Markersize',15)
%xlabel('X')
%ylabel('Y')
%axis equal tight
%box on
%
%Example 3: continuous points, pathological case
%N=200;
%P=1;
%theta=linspace(0,2*pi*(1-1/N),N);
%[~,I]=sort(rand(1,N));
%R=2+sin(5*theta(I))/3;
%
%Xin=(1+rand(1,N)/2).*R.*cos(theta(I));
%Yin=(1+rand(1,N)/2).*R.*sin(theta(I));
%
%[Xout,Yout]=points2contour(Xin,Yin,P,'cw');
%
%figure;
%hold on
%plot(Xin,Yin,'b-')
%plot(Xout,Yout,'r-','Linewidth',2)
%plot(Xout(2:end-1),Yout(2:end-1),'k.','Markersize',15)
%plot(Xout(1),Yout(1),'g.','Markersize',15)
%plot(Xout(end),Yout(end),'r.','Markersize',15)
%xlabel('X')
%ylabel('Y')
%axis equal tight
%title(['Black = original points, Blue = original ordering, Red = new ordering, Green = starting points'])
%box on
%
%Example 4: continuous points, distance limit applied
%N=200;
%P=1;
%theta=linspace(0,2*pi*(1-1/N),N);
%[~,I]=sort(rand(1,N));
%R=2+sin(5*theta(I))/3;
%R(2)=5; %the outlier
%
%Xin=(1+rand(1,N)/16).*R.*cos(theta(I));
%Yin=(1+rand(1,N)/16).*R.*sin(theta(I));
%
%[Xout,Yout,orphans,indout]=points2contour(Xin,Yin,P,'cw',1);
%
%figure;
%hold on
%plot(Xin,Yin,'b-')
%plot(Xin(orphans),Yin(orphans),'kx')
%plot(Xin(indout),Yin(indout),'r-','Linewidth',2)
%plot(Xout(2:end-1),Yout(2:end-1),'k.','Markersize',15)
%plot(Xout(1),Yout(1),'g.','Markersize',15)
%plot(Xout(end),Yout(end),'r.','Markersize',15)
%xlabel('X')
%ylabel('Y')
%axis equal tight
%title(['Black = original points, Blue = original ordering, Red = new ordering, Green = starting points'])
%box on
%
function [Xout,Yout,varargout]=points2contour(Xin,Yin,P,direction,varargin)
%check to make sure the vectors are the same length
if length(Xin)~=length(Yin)
error('Input vectors must be the same length.')
end
%check to make sure point list is long enough
if length(Xin)<2
error('The point list must have more than two elements.')
end
%check distance limit
if ~isempty(varargin)
dlim=varargin{1};
if dlim<=0
error('The distance limit parameter must be greater than zero.')
end
else
dlim=-1;
end
%check direction input
if and(~strcmp(direction,'cw'),~strcmp(direction,'ccw'))
error(['Direction input: ' direction ' is not valid, must be either "cw" or "ccw".'])
end
%check to make sure P is in the right range
P=round(P);
npts=length(Xin);
if or(P<1,P>npts)
error('The starting point P is out of range.')
end
%adjust input vectors for starting point
if size(Xin,1)==1
Xin=circshift(Xin,[0,1-P]);
Yin=circshift(Yin,[0,1-P]);
else
Xin=circshift(Xin,[1-P,0]);
Yin=circshift(Yin,[1-P,0]);
end
%find distances between all points
D=zeros(npts,npts);
for q1=1:npts
D(q1,:)=sqrt((Xin(q1)-Xin).^2+(Yin(q1)-Yin).^2);
end
%max distance
maxD=max(D(:));
%avoid self-connections
D=D+eye(npts)*maxD;
%apply distance contraint by removing bad points and starting over
if dlim>0
D(D>=dlim)=-1;
%find bad points
bad_pts=sum(D,1)==-npts;
orphans=find(bad_pts);
%check starting point
if sum(orphans==P)>0
error('The starting point index is a distance outlier, choose a new starting point.')
end
%get good points
Xin=Xin(~bad_pts);
Yin=Yin(~bad_pts);
%number of good points
npts=length(Xin);
%find distances between all points
D=zeros(npts,npts);
for q1=1:npts
D(q1,:)=sqrt((Xin(q1)-Xin).^2+(Yin(q1)-Yin).^2);
end
%max distance
maxD=max(D(:));
%avoid self-connections
D=D+eye(npts)*maxD;
else
orphans=[];
bad_pts=zeros(size(Xin));
end
%tracking vector (has this original index been put into the ordered list?)
track_vec=zeros(1,npts);
%construct directed graph
Xout=zeros(1,npts);
Yout=zeros(1,npts);
indout0=zeros(1,npts);
Xout(1)=Xin(1);
Yout(1)=Yin(1);
indout0(1)=1;
p_now=1;
track_vec(p_now)=1;
for q1=2:npts
%get current row of distance matrix
curr_vec=D(p_now,:);
%remove used points
curr_vec(track_vec==1)=maxD;
%find index of closest non-assigned point
p_temp=find(curr_vec==min(curr_vec),1,'first');
%reassign point
Xout(q1)=Xin(p_temp);
Yout(q1)=Yin(p_temp);
%move index
p_now=p_temp;
%update tracking
track_vec(p_now)=1;
%update index vector
indout0(q1)=p_now;
end
%undo the circshift
temp1=find(~bad_pts);
indout=circshift(temp1(indout0),[P,0]);
%%%%%%% SET CONTOUR DIRECTION %%%%%%%%%%%%
%contour direction is a *global* feature that cannot be determined until
%all the points have been sequentially ordered.
%calculate tangent vectors
tan_vec=zeros(npts,3);
for q1=1:npts
if q1==npts
tan_vec(q1,:)=[Xout(1)-Xout(q1),Yout(1)-Yout(q1),0];
tan_vec(q1,:)=tan_vec(q1,:)/norm(tan_vec(q1,:));
else
tan_vec(q1,:)=[Xout(q1+1)-Xout(q1),Yout(q1+1)-Yout(q1),0];
tan_vec(q1,:)=tan_vec(q1,:)/norm(tan_vec(q1,:));
end
end
%determine direction of contour
local_cross=zeros(1,npts);
for q1=1:npts
if q1==npts
cross1=cross(tan_vec(q1,:),tan_vec(1,:));
else
cross1=cross(tan_vec(q1,:),tan_vec(q1+1,:));
end
local_cross(q1)=asin(cross1(3));
end
%figure out current direction
if sum(local_cross)<0
curr_dir='cw';
else
curr_dir='ccw';
end
%set direction of the contour
if and(strcmp(curr_dir,'cw'),strcmp(direction,'ccw'))
Xout=fliplr(Xout);
Yout=fliplr(Yout);
end
%varargout
if nargout==3
varargout{1}=orphans;
end
if nargout==4
varargout{1}=orphans;
varargout{2}=indout;
end
disp('finished')
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment