forked from derek591/UTESpac
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathloadData.m
More file actions
172 lines (137 loc) · 7.37 KB
/
loadData.m
File metadata and controls
172 lines (137 loc) · 7.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
%Will allow multiple files per day
function [data, dataInfo] = loadData(dataFiles,currentDateNumber,totalNumberofDates, info,tableNames)
fprintf('\nEvaluating Date %g of %g in folder %s',currentDateNumber,totalNumberofDates,info.siteFolder);
% create dataInfo cell in base workspace
dataInfo = cell(4,size(dataFiles,2));
data = cell(1,size(dataFiles,2));
% iterate through table files
for ii = 1:size(dataFiles,2)
% find table name
tableName = tableNames{ii};
% find table in siteInfo.m
tableNumber = strcmpi(tableName,info.tableNames);
% find table sample frequency
sampleFrequency = info.tableScanFrequency(tableNumber);
% find number of expected table columns
expectedTableColumns = info.tableNumberOfColumns(tableNumber);
% load file into temp
first = 1;
clearvars temp tmp
for jj=1:size(dataFiles, 3)
if ~isdir(dataFiles{1, ii, jj})
if first
% display info
fprintf('\nloading %s.\nDefined in siteInfo.m\n\tExpected Scan Frequency = %0.02g Hz\n\tNumber of .csv Columns = %0.02g.',dataFiles{ii},sampleFrequency,expectedTableColumns)
temp = load(dataFiles{1, ii, jj});
first = 0;
else
tmp = load(dataFiles{1, ii, jj});
temp = [temp; tmp];
end
end
end
if exist('temp', 'var')
% check to ensure number of columns is correct, pad or delete if needed
if size(temp,2) < expectedTableColumns
warning('number of columns found in %s = %0.02g. number expected = %0.02g. Table will be padded with NaNs!',tableName,size(temp,2),expectedTableColumns)
padding = nan(size(temp,1),expectedTableColumns-size(temp,2));
temp = horzcat(temp,padding);
elseif size(temp,2) > expectedTableColumns
warning('number of columns found in %s = %0.02g. number expected = %0.02g. Extra columns will be deleted!',tableName,size(temp,2),expectedTableColumns)
temp(:,end - (size(temp,2)-expectedTableColumns)+1:end) = [];
end
% eliminate round off error in second calculation
temp(:,4) = round(temp(:,4).*100)./100;
% sort data in ascending order and ignore duplicates
dateNumbers = round(temp(:,2).*1e8 + temp(:,3).*10000+temp(:,4)*100);
[dateNumbers, ia2, ~] = unique(dateNumbers,'R2012a');
table = temp(ia2,:);
% find cut off and eliminate all rows beyond 2 days
cutOff = round((dateNumbers(1)+2*1e8)/100)*100;
table(dateNumbers>cutOff,:) = [];
% find table size
tableSize = size(table);
% replace -7999 with NaN
table(table == -7999) = NaN;
numNans = sum(sum(isnan(table)))/(tableSize(1)*tableSize(2));
% create empty table with complete time steps (no missed scans!)
beginSerialDay = datenum(table(2,1),1,0) + table(2,2);
endSerialDay = datenum(table(end-1,1),1,0) + table(end-1,2);
% % % if beginSerialDay~=endSerialDay
% % % error([char(13), 'DEBUG:::Files for day starting on: ', datestr(beginSerialDay),' spans more than 1 day, please check to make sure data file does not contain data going in to next day.', ...
% % % char(13), 'This error is for debuging purposes, comment out if datafiles actually span more than 1 day', char(13)]);
% % % end
filledTable = completeTableCreate(beginSerialDay,endSerialDay,sampleFrequency,expectedTableColumns);
% find percent available data
dataPercent = tableSize(1)/size(filledTable,1);
% merge table into filledTable - round serial date number to 8 decimals
[~, ia, ib] = intersect(floor(campbellDate2SerialDate(table(:,1:4))*1e8),floor(campbellDate2SerialDate(filledTable(:,1:4))*1e8),'stable');
filledTable(ib,2:end) = table(ia,2:end);
% store filled table in data cell
data{ii} = filledTable;
% put table information into dataInfo cell
localTableInfo{1,1} = sprintf('table %g (%s):',ii,tableName);
localTableInfo{2,1} = sprintf('\t percent NaNs: %g%s',numNans*100,'%');
localTableInfo{3,1} = sprintf('\t table scan frequency: %g Hz',sampleFrequency);
localTableInfo{4,1} = sprintf('\t percent data available: %g%s',dataPercent*100,'%');
% display results
for j = 1:length(localTableInfo)
display(localTableInfo{j,1});
end
% assign localTableInfo to dataInfo
dataInfo{1,1} = 'Run Options';
dataInfo{2,1} = ['Run on: ',datestr(now,'dd_mmm_yyyy'),' - UTESpac: ',info.UTESpacVersion];
if strcmp(info.detrendingFormat,'linear')
dataInfo{3,1} = 'Linear Detrending';
else
dataInfo{3,1} = 'Constant Detrending';
end
if strcmp(info.PF.globalCalculation,'global')
dataInfo{4,1} = ['Planar Fit Max/Min Wind: [',num2str(info.PF.globalCalcMinWind),' ',num2str(info.PF.globalCalcMaxWind),']'];
end
dataInfo(5:8,ii) = localTableInfo; clear localTableInfo
fprintf('table %g loaded successfully!\n',ii)
end
end
% find max number of days spanned by any data table. Possible number of days is 0, 1, or 2
numDays = nan(size(data));
startSerialDay = nan(size(data));
endSerialDay = nan(size(data));
for ii = 1:numel(data)
if isempty(data{ii})
numDays(ii) = 0;
else
startSerialDay(ii) = datenum(data{ii}(1,1),1,0) + data{ii}(1,2);
endSerialDay(ii) = datenum(data{ii}(end-1,1),1,0) + data{ii}(end-1,2);
numDays(ii) = endSerialDay(ii) - startSerialDay(ii)+1;
end
end
[maxNumDaysForAnyTable, index] = max(numDays);
% iterate through all data tables, create non-existent tables and complete incomplete tables
for ii = 1:numel(data)
% find table name
tableName = tableNames{ii};
% find table in siteInfo.m
tableNumber = strcmp(tableName,info.tableNames);
if numDays(ii) == 0 % if the table does not exist
sampleFrequency = info.tableScanFrequency(tableNumber);
expectedTableColumns = info.tableNumberOfColumns(tableNumber);
data{ii} = completeTableCreate(startSerialDay(index),endSerialDay(index),sampleFrequency,expectedTableColumns);
elseif numDays(ii) < maxNumDaysForAnyTable % if the table is incomplete
if startSerialDay(ii) > startSerialDay(index) % first day is missing
% create filler data for first day
sampleFrequency = info.tableScanFrequency(tableNumber);
fillerData = completeTableCreate(startSerialDay(index),startSerialDay(index),sampleFrequency,size(data{ii},2));
% concatnate filler date with full data
data{ii} = vertcat(fillerData,data{ii});
elseif endSerialDay(ii) < endSerialDay(index) % second day is missing
% create filler data for second day
sampleFrequency = info.tableScanFrequency(tableNumber);
fillerData = completeTableCreate(endSerialDay(index),endSerialDay(index),sampleFrequency,size(data{ii},2));
% concatnate filler date with full data
data{ii} = vertcat(data{ii},fillerData);
end
end
end
assignin('base','dataInfo',dataInfo);
end