-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathremoveDuplicates.m
More file actions
97 lines (83 loc) · 3.37 KB
/
removeDuplicates.m
File metadata and controls
97 lines (83 loc) · 3.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
function [dupl,seq,Data,GPS,tBox,tHead,stat,duplData] = removeDuplicates(seq,Data,GPS,tBox,tHead,stat,HZ)
% [dupl,seq,Data,GPS,tBox,tHead,stat,duplData] =
% removeDuplicates(seq,Data,GPS,tBox,tHead,stat)
%
% optionally, run dupl = removeDuplicates(seq) to find duplicate blocks
%
% Last edited by Anna Kelbert on Nov 2, 2015 to allow usage with 8 Hz data.
% The original version resulted in completely erroneous results: it not
% only wasn't looking for duplicates in the right places for 8 Hz, but also
% cut the data off at 1/8th of the size. 8 Hz data contains just as many
% blocks as similar 1 Hz data, but 8 times the data stream... This has now
% been fixed and can be used with HZ equal to any integer number.
%
% Strangely, for 8 Hz data, only the first measurement of 8 in the
% "duplicate" data blocks is in fact duplicate. I will still remove the
% second of the two blocks to be consistent with the GPS stream.
%
% Edited again by Anna Kelbert and Paul Bedrosian on Mar 25, 2016 to remove
% the first of the duplicate data blocks (not the second!) because that is
% the one where the E-fields tend to get corrupted, resulting in spikes.
% Additionally, we now locate the duplicate by checking that all the
% magnetics are identical, not the electrics. Finally, we now update the
% sequence numbers when we are done, to no longer count these (deleted)
% duplicates as data gaps.
%
% Optional output duplData are the data that have been removed on output.
% E.g., for 8 Hz data, Data(:,8*(dupl(4)-1)+1:8*dupl(4)) will contain the
% block of data that remains in the data stream while duplData(:,8*3+1:8*4)
% will contain the corresponding deleted data block.
% find all duplicate bytes in the seq string
dupl = find(diff(seq)==256);
duplData = [];
if nargin == 1 || isempty(dupl)
return
end
% by default, assume 1 Hz data
if nargin < 7
HZ = 1;
end
% verify that these correspond to duplicate blocks
% AND happen while the GPS lock is being acquired
status = ones(1,length(dupl));
for i = 1:length(dupl)
status(i) = status(i) && min(min(Data(1:3,HZ*dupl(i):HZ*(dupl(i)+1)-1) == Data(1:3,HZ*dupl(i):HZ*(dupl(i)+1)-1)));
status(i) = status(i) && (GPS(dupl(i)) == GPS(dupl(i)+1)) && (GPS(dupl(i))==199);
status(i) = status(i) && (tBox(dupl(i)) == tBox(dupl(i)+1));
status(i) = status(i) && (tHead(dupl(i)) == tHead(dupl(i)+1));
status(i) = status(i) && (stat(dupl(i)) == stat(dupl(i)+1));
end
% count true duplicates
ndupl = sum(status);
dupl = dupl(status==1);
% find the same duplicates in the data stream
data_dupl = zeros(HZ,length(dupl));
for i = 1:length(dupl)
data_dupl(:,i) = (HZ*(dupl(i)-1)+1:HZ*dupl(i));
end
data_dupl = reshape(data_dupl,1,HZ*length(dupl));
% an optional output used for keeping track of duplicate Data values
if nargout >= 8
duplData = Data(:,data_dupl);
end
% remove them from the data
if ndupl > 0
for i = 1:length(dupl)
seq(dupl(i)+1:end) = seq(dupl(i)+1:end) - 256;
end
ind = 1:length(seq);
ind = setdiff(ind,dupl);
data_ind = 1:(HZ*length(seq));
data_ind = setdiff(data_ind,data_dupl);
Data = Data(:,data_ind);
GPS = GPS(ind);
tBox = tBox(ind);
tHead = tHead(ind);
stat = stat(ind);
seq = seq(ind);
disp(['Deleted ' num2str(ndupl) ' duplicate blocks from ' num2str(HZ) ' Hz data']);
end
% update duplicate indices to the new seq array
for i = 1:length(dupl)
dupl(i) = dupl(i) - i + 1;
end