This document discusses using MATLAB to test a Bayesian robust mixture model (BRMM) for clustering incomplete data with outliers. It generates synthetic data from a BRMM, corrupts the data by removing values and adding outliers, then estimates the BRMM hyperparameters to recover the original model. The BRMM is tested on synthetic data to demonstrate its capabilities in handling incomplete data with noise. Plots of the estimated model and boundaries are produced to evaluate the results.
1. COMPONENTS EXPLANATION USING MATLAB
Our online Tutors are available 24*7 to provide Help with Components Explanation
Homework/Assignment or a long term Graduate/Undergraduate Components Explanation Project.
Our Tutors being experienced and proficient in Components Explanation ensure to provide high
quality Components Explanation Homework Help. Upload your Components Explanation
Assignment at ‘Submit Your Assignment’ button or email it to info@assignmentpedia.com. You can
use our ‘Live Chat’ option to schedule an Online Tutoring session with our Components Explanation
Tutors.
BAYESIAN CLUSTERING WITH OUTLIERS AND MISSING VALUES
MatLab object for clustering incomplete data with extreme amounts of noise.
TestBRMM()
function TestBRMM()
%TestBRMM Test the Bayesian robust mixture model on synthetic data
%
% This routine tests the capabilities of the BRMM on synthetic data. It
% generates a set of features from a BRMM with unknown parameters and
% subsequently corrupts them by removing elements at random. Next, from
% these features alone, it estimates the hyper-parameters of the model
% responsible for generating them.
%
% (c) 2013 Gabriel Agamennoni.
% Set size options.
NumberOfComponents=3;
NumberOfFeatures=5;
NumberOfPoints=200;
NumberOfMisses=50;
% Set sampling options.
ConcentrationParameter=5;
SeparationParameter=3;
NoiseParameter=10;
OutlierParameter=5;
PrecisionParameter=100;
% Print header and display status.
fprintf('n')
fprintf('Sampling data ... ')
% Generate parameters for sampling.
[Proportions,Locations,Dispersions]=GenerateParameters(NumberOfComponents,...
NumberOfFeatures,ConcentrationParameter,SeparationParameter,NoiseParameter);
2. % Create model for sampling.
Model=BRMM(NumberOfComponents,NumberOfFeatures);
% Set constant.
Model.DegreesOfFreedom=OutlierParameter;
% Set hyper-parameters.
Model.ComponentProportions=Proportions;
Model.ComponentStrength=PrecisionParameter;
Model.ComponentLocations=Locations;
Model.ComponentScales(:)=PrecisionParameter;
Model.ComponentDispersions=Dispersions;
Model.ComponentShapes(:)=max(PrecisionParameter,NumberOfFeatures);
% Sample and corrupt data by adding outliers and removing entries.
[~,~,Features]=Model.Simulate(NumberOfPoints);
Features=CorruptFeatures(Features,NumberOfMisses);
% Update status.
fprintf('Donen')
fprintf('Estimating model ... ')
% Create model for estimation.
Model=BRMM(NumberOfComponents,NumberOfFeatures);
% Estimate model and posterior probabilities.
[Model,Bounds,Probabilities]=Model.Estimate(Features);
% Update status.
fprintf('Donen')
fprintf('Plotting results ... ')
% Close any existing figure.
close('all')
% Plot results.
PlotBounds(Bounds)
PlotResults(Features,Probabilities,Model.ComponentProportions,...
Model.ComponentLocations,Model.ComponentDispersions)
% Update status and print footer.
fprintf('Donen')
fprintf('n')
end
function [Proportions,Locations,Dispersions]=...
3. GenerateParameters(NumberOfComponents,NumberOfFeatures,...
ConcentrationParameter,SeparationParameter,NoiseParameter)
% Sample proportion parameters.
Proportions=randg(ConcentrationParameter,NumberOfComponents);
Proportions=Proportions/sum(Proportions);
% Sample location parameters.
Locations=SeparationParameter*randn(NumberOfFeatures,NumberOfComponents);
% Allocate space for dispersion parameters.
Dispersions=zeros(NumberOfFeatures,NumberOfFeatures,NumberOfComponents);
% Sample dispersion parameters.
for i=1:NumberOfComponents
Gain=randn(NumberOfFeatures,NumberOfFeatures+...
NoiseParameter)/sqrt(NumberOfFeatures+NoiseParameter);
Dispersions(:,:,i)=Gain*Gain';
end
end
function Features=CorruptFeatures(Features,NumberOfMisses)
% Store size.
[NumberOfFeatures,NumberOfPoints]=size(Features);
% Remove features at random.
Features(randi(NumberOfFeatures*NumberOfPoints,NumberOfMisses,1))=nan();
end
function PlotBounds(Bounds)
% Set options.
FontName='times';
FontSize=20;
LineColor=[0,0,1];
LineWidth=2;
MarkerSize=20;
% Create figure.
Figure=figure(...
'NumberTitle','off',...
'Name','Variational Lower Bound on the Model Evidence');
4. % Create axes.
Axes=axes(...
'Parent',Figure,...
'NextPlot','add',...
'Box','on',...
'Layer','top',...
'FontName',FontName,...
'FontSize',FontSize);
% Annotate axes.
set(get(Axes,'XLabel'),...
'String','Iteration',...
'FontName',FontName,...
'FontSize',FontSize)
set(get(Axes,'YLabel'),...
'String','Lower bound',...
'FontName',FontName,...
'FontSize',FontSize)
set(get(Axes,'Title'),...
'String','Variational lower bound on the model evidence',...
'FontName',FontName,...
'FontSize',FontSize)
% Plot lower bound.
line(...
'Parent',Axes,...
'XData',1:numel(Bounds),...
'YData',Bounds,...
'Color',LineColor,...
'LineWidth',LineWidth,...
'Marker','.',...
'MarkerSize',MarkerSize)
% Adjust axes.
set(Axes,...
'XLim',[0,numel(Bounds)+1])
end
function PlotResults(Features,Probabilities,Proportions,Locations,Dispersions)
% Set options.
FontName='times';
FontSize=20;
NumberOfColors=50;
DilutionOfColor=1/2;
5. LineWidth=2;
MarkerSize=20;
Margin=3/20;
Whisker=1/5;
Confidence=95/100;
% Store size.
[NumberOfFeatures,~]=size(Features);
[NumberOfComponents,~]=size(Probabilities);
% Build color map by quantizing colors.
Color=hsv(NumberOfComponents);
State=warning('Off','stats:kLocations:EmptyCluster');
[Indices,Colors]=kmeans(Probabilities'*Color,NumberOfColors,...
'Distance','cityblock',...
'EmptyAction','drop',...
'OnLinePhase','off');
warning(State)
% Create figure.
Figure=figure(...
'NumberTitle','off',...
'Name','Bayesian Clustering with Outliers and Missing Values');
% Create axes.
Axes=axes(...
'Parent',Figure,...
'NextPlot','add',...
'Box','on',...
'Layer','top',...
'FontName',FontName,...
'FontSize',FontSize);
% Annotate axes.
set(get(Axes,'Title'),...
'String','Bayesian clustering with outliers and missing values',...
'FontName',FontName,...
'FontSize',FontSize)
set(get(Axes,'XLabel'),...
'String','Feature dimensions',...
'FontName',FontName,...
'FontSize',FontSize)
set(get(Axes,'YLabel'),...
'String','Feature values',...
'FontName',FontName,...
'FontSize',FontSize)
% Plot features.
for i=1:NumberOfColors
6. for j=find(Indices==i)'
line(...
'Parent',Axes,...
'XData',1:NumberOfFeatures,...
'YData',Features(:,j),...
'Color',(1-DilutionOfColor)*Colors(i,:)+...
DilutionOfColor*get(Axes,'Color'),...
'LineWidth',LineWidth)
end
end
% Allocate space for line handles.
Line=zeros(NumberOfComponents,1);
% Plot model.
for i=1:NumberOfComponents
for j=1:NumberOfFeatures
% Store horizontal limits.
Left=j-Margin*((i-1)/max(NumberOfComponents-1,1)-1/2)-Whisker/2;
Right=j-Margin*((i-1)/max(NumberOfComponents-1,1)-1/2)+Whisker/2;
% Plot location.
Line(i)=line(...
'Parent',Axes,...
'XData',Left/2+Right/2,...
'YData',Locations(j,i),...
'Color',Color(i,:),...
'Marker','.',...
'MarkerSize',MarkerSize);
% Store vertical limits.
Low=Locations(j,i)-sqrt(2*Dispersions(j,j,i))*erfcinv(1-Confidence);
High=Locations(j,i)-sqrt(2*Dispersions(j,j,i))*erfcinv(1+Confidence);
% Plot dispersion.
line(...
'Parent',Axes,...
'XData',[Left,Right,nan(),Left/2+Right/2,...
Left/2+Right/2,nan(),Left,Right],...
'YData',[Low,Low,nan(),Low,High,nan(),High,High],...
'Color',Color(i,:),...
'LineWidth',LineWidth)
end
end
% Allocate space for labels.
Label=cell(NumberOfComponents,1);
7. % Create labels.
for i=1:NumberOfComponents
Label{i}=sprintf('Component %d: %2.1f%%',i,100*Proportions(i));
end
% Annotate plot.
set(legend(Line,Label{:}),...
'FontName',FontName,...
'FontSize',FontSize,...
'Location','northeast')
% Adjust axis limits.
set(Axes,...
'XTick',1:NumberOfFeatures,...
'XLim',[1/2,NumberOfFeatures+1/2])
end
visit us at www.assignmentpedia.com or email us at info@assignmentpedia.com or call us at +1 520 8371215