Home > atmlab > datasets > CollocatedMicrowaveOnlyIWP.m

CollocatedMicrowaveOnlyIWP

PURPOSE ^

SYNOPSIS ^

This is a script file.

DESCRIPTION ^

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

DOWNLOAD ^

CollocatedMicrowaveOnlyIWP.m

SOURCE CODE ^

0001 classdef CollocatedMicrowaveOnlyIWP < HomemadeDataset
0002     % Class holding functionality for Gerrits most primivite IWP dataset
0003     %
0004     % This class performs two major roles:
0005     %
0006     % - Reading and locating granules, mostly inherited from SatDataset
0007     % - Retrieving IWP
0008     %
0009     % The second task depends on the Neural Network Toolbox, tested with
0010     % version 7.0.3 (R2012a).
0011     %
0012     % TODO:
0013     %
0014     %   - Add info on data used for training
0015     %   - Study performance using LESS input data
0016     %   - fix problem with discontinuities per lat (and lza?)
0017     %   - use a land-water-mask?
0018     
0019     % $Id: CollocatedMicrowaveOnlyIWP.m 8516 2013-06-26 21:33:48Z gerrit $
0020     
0021     properties (Transient)
0022         nets = struct();
0023         nchannels = 3;
0024         data;
0025         localcols;
0026     end
0027        
0028     properties (Transient, GetAccess = private, SetAccess = private)
0029        selection; 
0030        maxsize = 20000; % max number of entries for neural net
0031        retrieval_limits;
0032 %        target_limits = ...
0033 %             struct(...
0034 %                 'retrieval', struct(...
0035 %                     'MEAN_ROIWP', [eps inf], ... % positive
0036 %                     'NO_ROIWP', [10 inf], ... % at least 10
0037 %                     'FRAC1_ROIWP', [1 1], ... % fully cloudy
0038 %                     'CV_ROIWP', [0 1]), ... % sd <= mean
0039 %                 'classification', struct());
0040             
0041        fromfile;
0042     end
0043     
0044     properties (Transient, Constant)
0045         %version = 0.6;
0046         changelog = sprintf([...
0047             'version 0.0: initial version, negative IWP, linear all\n' ...
0048             'version 0.1: cf [0, 1], log-IWP retrieved, more descriptive atts\n', ...
0049             'version 0.2: more descriptive atts, cf -> cf100, improved training sel\n' ...
0050             'version 0.3: added lza, pos, line, fromfile; apply more lims; apply same lims to retrieved as to trained (afap)\n', ...
0051             'version 0.4: more flexible approach, store more fields, retrieve globally with many networks\n', ...
0052             'version 0.5: made global, better error checks\n', ...
0053             'version 0.6: don''t store when flagged, fix bug for nhpolar ...\n']);
0054         lims_always = struct('B_BT', [100 400]);
0055     end
0056     
0057     methods
0058         function self = CollocatedMicrowaveOnlyIWP(varargin)
0059             self = self@HomemadeDataset(varargin{:});
0060             self.setcols();
0061             self.setmembers();
0062             self.reader = @(varargin) (satreaders.netcdf_dataset(self, varargin{:}));
0063             self.version = 0.6;
0064         end
0065         
0066         function getdata(self, start_date, end_date)
0067             d = datasets;
0068             
0069 %            self.retrieval_limits = ...
0070 %                struct('LAT1', [-30 30], ...
0071 %                       'POS1', [40 50], ...
0072 %                       'B_BT', [0 400]);
0073             
0074             [M, c] = d.CollocatedDataset_mhs_cpr.read(...
0075                 start_date, end_date, 'noaa18', ...
0076                 {'LAT1', 'LON1', 'POS1', 'TIME1', 'B_BT', 'B_LZA', ...
0077                  'NO_ROIWP', 'MEAN_ROIWP', 'CV_ROIWP', 'FRAC1_ROIWP', 'FRAC10_ROIWP', 'FRAC100_ROIWP', 'FRAC1000_ROIWP'}, ...
0078                  self.lims_always);
0079              
0080              self.data = M;
0081              self.localcols = c;
0082              if size(M, 1) > self.maxsize
0083                  self.selection = unique(round(linspace(1, size(M, 1), self.maxsize)));
0084              else
0085                  self.selection = 1:size(M, 1);
0086              end
0087         end
0088         
0089         function make_and_train_network(self, name, source, kind, target, varargin)
0090             % name: (string)
0091             % source: e.g. {'B_LAT', 'B_BT', 3:5, 'B_POS'}
0092             % kind: @fitnet, @patternnet, ...
0093             % target: (string), fieldname in data to train against
0094             % lims_retrieval: (optional) structure, default empty
0095             % lims_overall: (optional), structure, default empty
0096             % {transform, inverse_transform} (optional): function_handle
0097             % for target, e.g. log10, and its inverse
0098             
0099             
0100             narginchk(5, 8);
0101             [lims_retrieval, lims_overall, transform] = optargs(varargin, {struct(), struct(), {@(x)x, @(x)x}});
0102             rqre_datatype(name, @ischar);
0103             rqre_datatype(source, @iscell);
0104             rqre_datatype(target, @ischar);
0105             rqre_datatype(kind, @isfunction_handle);
0106             rqre_datatype(lims_retrieval, @isstruct);
0107             rqre_datatype(lims_overall, @isstruct);
0108             rqre_datatype(transform, @iscell);
0109             rqre_datatype(transform{1}, @isfunction_handle);
0110             rqre_datatype(transform{2}, @isfunction_handle);
0111             
0112             lims = catstruct(lims_retrieval, lims_overall);
0113             % fill source with numbers where not present, so that each
0114             % string is followed by a 1, e.g.
0115             % {'LAT1', 'BT', 3:5} --> {'LAT1', 1, 'BT', 3:5}
0116             isaname = cellfun(@ischar, source);
0117             namewithoutno = diff([isaname true])==0;
0118             filler = cell(size(source));
0119             filler(namewithoutno) = {1};
0120             source = [source; filler];
0121             source = source(:).';
0122             source(cellfun(@isempty, source)) = [];
0123             sstruct = struct(source{:});
0124             % get appropiate columns in self.data
0125             allpos = cellfun(@(X) self.localcols.(X)(sstruct.(X)), fieldnames(sstruct), 'UniformOutput', false);
0126             allpos = [allpos{:}];
0127             
0128             self.needsbox();
0129             if isempty(self.data)
0130                 error(['atmlab:' mfilename ':nodata'], ...
0131                     'No data found. Did you run .getdata(...)?');
0132             end
0133 
0134             dat = self.data;
0135             dat = dat(collocation_restrain(dat, limstruct2limmat(lims, self.localcols)), :);
0136                         
0137             if size(dat, 1) == 0
0138                 logtext(atmlab('OUT'), 'Cannot train network, no data, stopping now\n');
0139                 return
0140             elseif size(dat, 1) > self.maxsize
0141                 subsel = unique(round(linspace(1, size(dat, 1), self.maxsize)));
0142                 logtext(atmlab('OUT'), 'Reducing %d -> %d points before training\n', size(dat, 1), self.maxsize);
0143                 dat = dat(subsel, :);
0144             end
0145 
0146             sourcedat = dat(:, allpos); % might include BT, lat, angle...
0147             targetdat = dat(:, self.localcols.(target));
0148             
0149             net = kind(6);
0150                         
0151             net.trainParam.showWindow = false;
0152             logtext(atmlab('OUT'), 'Training %s(data)...\n', func2str(transform{1}));
0153             net = train(net, sourcedat.', transform{1}(targetdat).');
0154             logtext(atmlab('OUT'), 'Trained\n');
0155 
0156             self.nets.(name) = net;
0157             self.nets.(name).userdata.name = name;
0158             self.nets.(name).userdata.target = target;
0159             self.nets.(name).userdata.kind = func2str(kind);
0160             self.nets.(name).userdata.source = source; % store cell-array, makes me feel safer than struct because of order
0161             self.nets.(name).userdata.lims = ...
0162                 struct('retrieval', lims_retrieval, ...
0163                        'training', lims_overall);
0164             % store with func2str, otherwise entire workspace is stored
0165             % along with it. This is not only too large, it also causes
0166             % problems with 'self', which will be recreated upon
0167             % deserialization, causing "already exists" errors.
0168             self.nets.(name).userdata.transform = {func2str(transform{1}) func2str(transform{2})};
0169             
0170         end
0171         
0172         function [M, c] = retrieve(self, input_data, cols)
0173            % get whatever can be retrieved from all known nets
0174            %
0175            % for each row, go through each network, and if conditions are
0176            % met, retrieve
0177            %
0178            % cols fields must be as <network>.userdata.source
0179                      
0180            % single row impl.
0181            netnames = fieldnames(self.nets);
0182            M = zeros(size(input_data, 1), length(netnames));
0183            for i = 1:length(netnames)
0184                netname = netnames{i};
0185                net = self.nets.(netname);
0186                % apply same limitations as upon retrieval
0187                limmat = limstruct2limmat(catstruct(self.lims_always, net.userdata.lims.retrieval), cols);
0188                % logical true for rows where conditions are OK
0189                OK = collocation_restrain(input_data, limmat);
0190                % initialise matrix
0191                M(:, i) = nan(size(input_data, 1), net.numOutputs);
0192                c.(netname) = i;
0193                
0194                fnms = net.userdata.source(1:2:end);
0195                src = net.userdata.source;
0196                sstruct = struct(src{:});
0197                colnos = cellfun(@(X) cols.(X)(sstruct.(X)), fnms, 'UniformOutput', false);
0198                % apply the network
0199                retr = net(input_data(OK, [colnos{:}]).');
0200                % inverse transform
0201                ivt = str2func(net.userdata.transform{2});
0202                M(OK, i) = ivt(retr);
0203            end
0204         end
0205 
0206         %{
0207         function make_retrieval_net(self, lims)
0208             % prepare neural network with BT and IWP
0209             %
0210             % takes BT, IWP. Warning; each row is a channel, each column a
0211             % collocation. This is opposite of Collocations Toolbox.
0212             
0213             self.needsbox();
0214             if isempty(self.data)
0215                 error(['atmlab:' mfilename ':nodata'], ...
0216                     'No data found. Did you run .getdata(...)?');
0217             end
0218             
0219             dat = self.data;
0220             dat = dat(collocation_restrain(dat, limstruct2limmat(self.target_limits.retrieval, self.localcols)), :);
0221             dat = dat(collocation_restrain(dat, limstruct2limmat(lims, self.localcols)), :);
0222             
0223             if size(dat, 1) > self.maxsize
0224                 subsel = unique(round(linspace(1, size(dat, 1), self.maxsize)));
0225                 dat = dat(subsel, :);
0226                 logtext(atmlab('OUT'), 'Reducing to %d points before training\n', self.maxsize);
0227             end
0228             
0229             BT = dat(:, self.localcols.B_BT(3:5));
0230             IWP = dat(:, self.localcols.MEAN_ROIWP);
0231 
0232             net = fitnet(6);
0233             net.trainParam.showWindow = false;
0234             logtext(atmlab('OUT'), 'Training...\n');
0235             net = train(net, BT.', log10(IWP).');
0236             logtext(atmlab('OUT'), 'Trained\n');
0237             
0238             self.nets.retrieval = net;
0239             self.nets.retrieval.userdata.retrieval_limits = self.retrieval_limits;
0240             self.nets.retrieval.userdata.training_limits = self.training_limits;
0241         end
0242         
0243         function make_classification_net(self)
0244             % prepare neural net with BT and cloudfrac
0245             
0246             self.needsbox();
0247             BT = self.data(self.selection, self.localcols.B_BT(3:5)).';
0248             cloudfrac100 = self.data(self.selection, self.localcols.FRAC100_ROIWP).';
0249             net = patternnet(6);
0250             net.trainParam.showWindow = false;
0251             logtext(atmlab('OUT'), 'Training...\n');
0252             net = train(net, BT, cloudfrac100);
0253             logtext(atmlab('OUT'), 'Trained\n');
0254             self.nets.classification100 = net;
0255             self.nets.classification100.userdata.retrieval_limits = self.retrieval_limits;
0256             self.nets.classification100.userdata.training_limits = self.training_limits;
0257         end
0258 %}
0259         
0260         function makenets(self)
0261             % make many nets, quite hardcoded
0262             
0263             latrange.antarctic = [-90 -60];
0264             latrange.shmidlat = [-60 -30];
0265             latrange.tropical = [-30 30];
0266             latrange.nhmidlat = [30 60];
0267             latrange.nhpolar = [60 90];
0268             
0269             angrange.nadir = [0 5];
0270             angrange.smallang = [5 15];
0271             angrange.midang = [15 25];
0272             angrange.bigang = [25 35];
0273             
0274             train_iwp.MEAN_ROIWP = [realmin realmax];
0275             train_iwp.NO_ROIWP = [10 intmax];
0276             train_iwp.FRAC1_ROIWP = [1 1];
0277             train_iwp.CV_ROIWP = [0 1];
0278 
0279             train_cf = struct();
0280             
0281             lrfn = fieldnames(latrange);
0282             arfn = fieldnames(angrange);
0283             for i = 1:length(lrfn)
0284                 for j = 1:length(arfn)
0285                     lr = lrfn{i};
0286                     ar = arfn{j};
0287                     retr_lims.LAT1 = latrange.(lr);
0288                     retr_lims.B_LZA = angrange.(ar);
0289                                         
0290                     logtext(atmlab('OUT'), 'Training %s %s FIWP\n', lr, ar);
0291 
0292                     self.make_and_train_network(...
0293                         [lr '_' ar '_fiwp'], ...
0294                         {'B_BT', 3:5}, ...
0295                         @fitnet, ...
0296                         'MEAN_ROIWP', ...
0297                         retr_lims, ...
0298                         train_iwp, ...
0299                         {@log10, @(x)10.^x});
0300                                         
0301                     logtext(atmlab('OUT'), 'Training %s %s CF100\n', lr, ar);
0302 
0303                     self.make_and_train_network(...
0304                         [lr '_' ar '_fc100'], ...
0305                         {'B_BT', 3:5}, ...
0306                         @patternnet, ...
0307                         'FRAC100_ROIWP', ...
0308                         retr_lims, ...
0309                         train_cf, ...
0310                         {@(x)x, @(x)x});
0311                 end
0312             end
0313 
0314         end
0315         function storenets(self, fl)
0316             self.needsbox();
0317             self.fromfile = fullfile(self.basedir, fl);
0318             logtext(atmlab('OUT'), 'Storing networks to %s\n', self.fromfile);
0319             fns = fieldnames(self.nets);
0320             for i = 1:length(fns)
0321                 if isa(self.nets.(fns{i}), 'network')
0322                     self.nets.(fns{i}).userdata.stored = self.fromfile;
0323                 end
0324             end
0325             nt = self.nets; %#ok<NASGU>
0326             save(self.fromfile, 'nt');
0327         end
0328         
0329         function loadnets(self, fl)
0330             self.needsbox();
0331             self.fromfile = fullfile(self.basedir, fl);
0332             logtext(atmlab('OUT'), 'Loading networks from %s\n', self.fromfile);
0333             self.nets = loadvar(self.fromfile, 'nt');
0334         end
0335         
0336         function retrieve_and_store_gran(self, dt, spec)
0337             self.needsbox();
0338             if isempty(self.fromfile)
0339                 error(['atmlab:' mfilename ':nonet'], ...
0340                     'First get network from or store network to file!');
0341             end
0342             d = datasets;
0343             ds = d.(amsub_or_mhs(spec));
0344             grandata = ds.read_granule(dt, spec);
0345             %graninfo = ds.find_info_from_granule(ds.find_granule_by_datetime(dt, spec));
0346             grandata.pos = repmat((1:size(grandata.lat, 2)), [size(grandata.lat, 1), 1]);
0347             grandata.line = repmat((1:size(grandata.lat, 1)).', [1 size(grandata.lat, 2)]);
0348             grandata.time = repmat(grandata.time, [1 90]);
0349             
0350             mytb = reshape(grandata.tb, [numel(grandata.lat) size(grandata.tb, 3)]);
0351             
0352             % need LAT1, B_LZA, B_BT
0353             cols.LAT1 = 1;
0354             cols.B_LZA = 2;
0355             cols.B_BT = 3:7; % for retrieve, have to provide all, even if only 3 used, because of storing of limitations struct
0356             M_in = zeros(numel(grandata.lat), 7);
0357             M_in(:, cols.LAT1) = grandata.lat(:);
0358             M_in(:, cols.B_LZA) = grandata.lza(:);
0359             M_in(:, cols.B_BT) = mytb;
0360             logtext(atmlab('OUT'), 'Retrieving values\n');
0361             [M_retr ~] = self.retrieve(M_in, cols);
0362             fiwp = nanmean(M_retr(:, 1:2:size(M_retr, 2)), 2);
0363             cf100 = nanmean(M_retr(:, 2:2:size(M_retr, 2)), 2);
0364             ciwp = uint16(fiwp .* cf100);
0365             %cf100 = self.nets.classification100(mytb(:, 3:5).');
0366             %fiwp = 10.^self.nets.retrieval(mytb(:, 3:5).');
0367             %ciwp = cf100 .* fiwp;
0368             logtext(atmlab('OUT'), 'Post-processing\n');
0369             unflagged = isfinite(fiwp) & isfinite(cf100);
0370             M = zeros(numel(grandata.lat), max(structfun(@(x)max(x), self.cols)));
0371             M(:, self.cols.mhs_bt_345) = mytb(:, 3:5);
0372             M(:, self.cols.lat) = grandata.lat(:);
0373             M(:, self.cols.lon) = grandata.lon(:);
0374             M(:, self.cols.time) = grandata.time(:);
0375             M(:, self.cols.cf100) = cf100;
0376             M(:, self.cols.fiwp) = fiwp;
0377             M(:, self.cols.ciwp) = ciwp;
0378             M(:, self.cols.lza) = grandata.lza(:);
0379             M(:, self.cols.pos) = grandata.pos(:);
0380             M(:, self.cols.line) = grandata.line(:);
0381             M = M(unflagged, :);
0382 %            storelims.lat = self.nets.retrieval.userdata.retrieval_limits.LAT1;
0383 %            storelims.pos = self.nets.retrieval.userdata.retrieval_limits.POS1;
0384 %            storelims.mhs_bt_345 = self.nets.retrieval.userdata.retrieval_limits.B_BT;
0385 %            M = M(collocation_restrain(M, limstruct2limmat(storelims, self.cols)), :);
0386             M = sortrows(M, [self.cols.time, self.cols.pos]);
0387             info.title = 'MW-only IWP from ANN trained with CPR collocs';
0388             info.version = sprintf('%03.1f', self.version);
0389             info.history = self.changelog;
0390             info.source = 'Retrieved from collocated CPR/MHS MW-only-trained ANN';
0391             info.nnet = self.fromfile;
0392 %            info.stored_lims = struct2string_compact(storelims);
0393             % FIXME: more additional info
0394             logtext(atmlab('OUT'), 'Storing\n');
0395             self.store(dt, spec, M, info);
0396         end
0397         
0398         function setcols(self)
0399             self.cols.lat = 1;
0400             self.cols.lon = 2;
0401             self.cols.time = 3;
0402             self.cols.cf100 = 4;
0403             self.cols.fiwp = 5;
0404             self.cols.ciwp = 6;
0405             self.cols.lza = 7;
0406             self.cols.mhs_bt_345 = 7:9;
0407             self.cols.pos = 10;
0408             self.cols.line = 11;
0409         end
0410         
0411         function setmembers(self)
0412             self.members.lat.type = 'float';
0413             self.members.lat.atts.long_name = 'Latitude';
0414             self.members.lat.atts.valid_range = [-90 90];
0415             self.members.lat.atts.units = 'degrees_north';
0416             
0417             self.members.lon.type = 'float';
0418             self.members.lon.atts.long_name = 'Longitude';
0419             self.members.lon.atts.units = 'degrees_east';
0420             self.members.lon.atts.valid_range = [-180 180];
0421             
0422             self.members.time.type = 'int';
0423             self.members.time.atts.long_name = 'Measurement time';
0424             self.members.time.atts.units = 'Seconds since 1970-01-01 00:00:00';
0425             
0426             self.members.cf1.type = 'float';
0427             
0428             self.members.cf10.type = 'float';
0429             
0430             self.members.cf100.type = 'float';
0431             self.members.cf100.atts.long_name = 'Cloud fraction >= 100 g/m^2';
0432             self.members.cf100.atts.valid_range = [0 1];
0433             self.members.cf100.atts.description = ...
0434                 ['Estimate of fraction of footprint with cloud ' ...
0435                  '> 100 g/m^2. Retrieved with ANN trained against ' ...
0436                  'number of CPR > 100 g/m^2 within AMSUB/MHS ' ...
0437                  'footprint.'];
0438              
0439             self.members.fiwp.type = 'float';
0440             self.members.fiwp.atts.long_name = 'Full uncorrected Ice Water Path';
0441             self.members.fiwp.atts.description = ...
0442                 ['Uncorrected Ice Water Path. Does not take into' ...
0443                  'account whether a cloud is present or not.'];
0444             self.members.fiwp.atts.units = 'g/m^2';
0445             
0446             self.members.ciwp.type = 'int';
0447             self.members.ciwp.atts.units = 'g/m^2';
0448             self.members.ciwp.atts.long_name = 'CF-corrected IWP';
0449             self.members.ciwp.atts.description = ...
0450                 ['CF-Corrected Ice Water Path. This is the product ' ...
0451                  'of fiwp and cf100. This is a more conservative ' ...
0452                  'estimate of IWP than fiwp, guessing closer to 0 ' ...
0453                  'if presence of a cloud is not sure.'];
0454              
0455             self.members.lza.type = 'float';
0456             self.members.lza.atts.long_name = 'Local zenith angle';
0457             self.members.lza.atts.description = ...
0458                 ['Local zenith angle from AMSU-B/MHS granule. See ' ...
0459                  'KLM User Guide for details.'];
0460             self.members.lza.atts.units = 'degrees';
0461             
0462             self.members.mhs_bt_345.type = 'float';
0463             self.members.mhs_bt_345.dims = {'CHANNELS', 3};
0464             self.members.mhs_bt_345.atts.long_name = 'MHS brightness temperatures for the used channel 3, 4 and 5';
0465             self.members.mhs_bt_345.atts.units = 'Kelvin';
0466             
0467             self.members.pos.type = 'byte';
0468             self.members.pos.atts.long_name = 'AMSU-B/MHS scanline position';
0469             
0470             self.members.line.type = 'int';
0471             self.members.line.atts.long_name = 'AMSU-B/MHS scanline number';
0472         end
0473         
0474     end
0475     
0476     methods (Access = private, Static)
0477         function needsbox()
0478             v = ver('nnet');
0479             if isempty(v)
0480                 ST = dbstack;
0481                 error(['atmlab:' mfilename ':needsNNet'], ...
0482                     ['Method ' ST(2).name ' needs the Neural Network toolbox']);
0483             end
0484         end
0485     end
0486     
0487 end

Generated on Mon 15-Sep-2014 13:31:28 by m2html © 2005