function f = dklnorm (filename, nrofpartitions, partitionlength, searchdigit)
% Implementation of Nelson, 'Too noisy at the bottom', formula (4)
% Parameters
% - filename is an external file of the first one million digits of pi. 
%   Specify in quotes,ie, 'pi6.txt'
% - nrofpartitions = how many 'documents' are in the 'corpus' = an integer
% - partitionlength = how many digits in each 'document' = an integer
% - searchdigit = digit 0..9 which occur in the pi sequence. The digits are
%   treated as characters rather than numbers, so the parameter must be in
%   quotes, ie,'7'.

disp('DKLnorm');

% Read digits from the external text file, which contains the
% first 1000000 digits of pi.
 f = filename;
 fid = fopen (f);                                                        
 frewind (fid);                                                         
 formatspec = '%s'; % Represent digits as text 
 pidigits = fscanf(fid,formatspec);
 fclose (fid); 
 disp ('Pi digit file read');

 % Create the corpus = a [nrofpartitions x partitionlength] matrix
 % Each parition is a sequence of partitionlength digits, working in
 % sequence from the decimal point in pi. So, if partitionlength = 100.the 
 % first partition is digits 1...100, the second 001...200, and so on.
 k = 0;
 for i = 1:nrofpartitions
  for j = 1:partitionlength
   k = k + 1;
   corpus(i,j) = pidigits(k);
  end
 end
 disp ('Corpus matrix created');

 % Create the expectation vector as described by Nelson
 for i = 1:nrofpartitions
  expected(i) = 1/partitionlength;
 end
 % Write to file for inspection
 dlmwrite('expected.txt',expected(:)); % Write to file
 disp('Expected vector created');
  
 % Create the observation vector
 % Calculate frequencies
 for i = 1:partitionlength
  sum = 0;
  for j = 1:nrofpartitions
   if corpus(j,i) == searchdigit
    sum = sum + 1;
   end
  end
  observedfreq(i) = sum;
 end
 % Calculate proportions
 totalfreq = 0;
 for i = 1:partitionlength
  totalfreq = totalfreq + observedfreq(i); 
 end
 for i = 1:partitionlength
  observedprop(i) = observedfreq(i) / totalfreq;
 end
 % Write to file for inspection
 dlmwrite('observedfreq.txt',observedfreq(:));
 dlmwrite('observedprop.txt',observedprop(:));
 disp('Observed vector created');

 % Calculate Dklnorm and output result as per formula (4) in Nelson
 sum = 0;
 for i= 1:nrofpartitions
  sum = sum + (observedprop(i) * log(observedprop(i) / expected(i)));
 end
 dkl = sum;
 % Normalize < Nelson
 e = 2.718;
 dkl = 1 - e^dkl; 
 disp ('Dklnorm calculated');
 format short;
 f = dkl;
end