Home > atmlab > collocations > Collapser.m

Collapser

PURPOSE ^

SYNOPSIS ^

This is a script file.

DESCRIPTION ^

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

DOWNLOAD ^

Collapser.m

SOURCE CODE ^

0001 classdef Collapser < AssociatedDataset
0002     % collapse small footprint upon large footprint
0003     %
0004     % This class can be used for collocations where the primary has a
0005     % significantly larger footprint than the secondary, such as between
0006     % MHS and CloudSat or between MHS and AVHRR. It can be used e.g. for
0007     % calculation of arbitrary statistics, such as mean, standard
0008     % deviation, etc., or for selecting a single small-footprint for each
0009     % large-footprint. Whereas the original collocations contain one entry
0010     % for each of the smaller footprints --- thus repeating many times the
0011     % entry for the larger footprint --- a dataset with this class contains
0012     % at most one entry for each of the larger footprints. This is usually
0013     % the desirable behaviour.
0014     %
0015     % The class is normally associated with an <a href="matlab:help AssociatedDataset">AssociatedDataset</a> that works on
0016     % the full-size --- often this is <a href="matlab:help FieldCopier">FieldCopier</a>.
0017     %
0018     % For each primary footprint, the class copies data for the primary and
0019     % gathers data for the secondary. The data for the secondary is then
0020     % input to processing-functions, that can do arbitrary processing.
0021     % For example, processing functions may calculate the mean, the standard
0022     % deviation, the secondary closest to the primary, etc. Any
0023     % function-handle can be passed. The class can either pass all secondary
0024     % footprints, or a sub-set based on prescribed limitations (also with
0025     % function-handles). If no secondary footprints meet the requirements,
0026     % the primary footprint is not selected either. Limitations can be
0027     % either global (applying to all processors) or local (applying to a
0028     % single processor).
0029     %
0030     % Information on fields to be copied, fields to be processed, processor
0031     % functions, local limitatiors, global limitations, and how to store
0032     % fields, is all contained in a structure passed on to the constructor.
0033     % See <a href="matlab:help Collapser/Collapser">constructor documentation</a> for details.
0034     %
0035     % Collapser Properties:
0036     %
0037     %   fieldstruct -           Structure with complete field information
0038     %   overall_limitators -    Limitations applied to all fields
0039     %  (remaining properties inherited from <a href="matlab:help AssociatedDataset">AssociatedDataset</a>.
0040     %   Use <a href="matlab:properties Collapser">properties</a> for a complete listing)
0041     %   vectorised -            Determine if processors/limitators can be
0042     %                           vectorised
0043     %
0044     % Collapser Methods:
0045     %
0046     %   Collapser -             Create Collapser object
0047     %  (remaining methods from <a href="matlab:help AssociatedDataset">AssociatedDataset</a>)
0048     %
0049     % Example:
0050     %
0051     %   >> mfcs.RO_ice_water_path.limitators = {@(X)(X>0)};
0052     %   >> mfcs.RO_ice_water_path.processors.MEAN = @mean;
0053     %   >> mfcs.RO_ice_water_path.stored.NO.type = 'single';
0054     %   >> global_lims = {@(X)(X(:, mhscpr.cols.DIST)<7.5), @(X)(abs(X(:, mhscpr.cols.INT))<600)};
0055     %   % f from <a href="matlab:help FieldCopier">FieldCopier</a> example
0056     %   >> mfc = Collapser(f, mfcs, global_lims, ...
0057     %           'name', 'MyLittleThing', 'basedir', '/some/path', ...
0058     %           'subdir', '$YEAR4/$MONTH/$DAY', ...
0059     %           'filename', 'mean_collocations_$SAT.nc.gz');
0060     %
0061     % See also: AssociatedDataset (abstract superclass), FieldCopier,
0062     %           CollocatedDataset, SatDataset, Collapser (constructor).
0063     %
0064     % Don't forget the Collocation User's Guide
0065     
0066     % TODO:
0067     %  - store result of limitators as bitfield?
0068     %  - store limitators in NetCDF props
0069     
0070     properties (Transient, SetAccess = protected)
0071         %members = []; % See <a href="matlab:help AssociatedDataset.members">AssociatedDataset.members</a>
0072         parent = []; % See <a href="matlab:help AssociatedDataset.parent">AssociatedDataset.parent</a>
0073         dependencies = {}; % See <a href="matlab:help AssociatedDataset.dependencies">AssociatedDataset.dependencies</a>
0074         
0075         % Structure with all fields information
0076         %
0077         % The members of this fieldstruct must correspond to fieldnames
0078         % contained by the <a href="matlab:help AssociatedDataset.parent">parent</a> dataset. The value of each entry
0079         % in this struct is itself a structure, as follows:
0080         %
0081         % entry_name (structure with name corresponding to parent fieldname)
0082         %
0083         %   limitators  cell-array of function_handle
0084         %
0085         %       Depending on the value of the collapser property
0086         %       'vectorised' (defaults to false), this is either in
0087         %       non-vectorised or in vectorised form.
0088         %
0089         %       In non-vectorised form, each limitator is called for every
0090         %       primary footprint. It receives as input the subset of secondary
0091         %       footprints that fall within the primary footprint. Each
0092         %       limitator must return a logical 1-D vector with the same
0093         %       length as the number of footprints in the input. Any later
0094         %       processing functions are applied only to those collocations
0095         %       for which ALL limitators return true.
0096         %
0097         %       In vectorised form, each limitator is called once per
0098         %       granule.  As an argument, it receives a p*N*q ndarray,
0099         %       where p is the maximum number of secondaries to ever
0100         %       occur inside a primary, N is the number of primaries, and q
0101         %       is the number of values per measurement for this field.  Note that for each primary with
0102         %       s<p secondaries, the last p-s values will be masked with
0103         %       nans.  The limitator shall return a logical ndarray of the
0104         %       same size as its input, with true for values to be used for
0105         %       the processing.
0106         %
0107         %   processors  structure
0108         %
0109         %       A structure with a field for each processing.
0110         %       The field is the processors name, such as MEAN,
0111         %       STD, etc. The value is a function handle.
0112         %
0113         %       The function handle can be either in vectorised or
0114         %       non-vectorised form, depending on the value of the
0115         %       collapser property 'vectorised', which defaults to false
0116         %       (behaviour prior to atmlab-2-1-370).  In both cases,  it
0117         %       receives two inputs.  The first input relates only to the
0118         %       data that is to be collapsed, while the second input
0119         %       provides the 'core', with all fields.
0120         %
0121         %       In non-vectorised form, the function is called once for
0122         %       each primary footprint. It takes as input (1) the secondary
0123         %       footprints corresponding to the primary
0124         %       footprint, after global and local limitators
0125         %       are applied, and (2) corresponding core data. The function handle must return a
0126         %       single, scalar value, or a single vector of values
0127         %       if the input data are 2-D.
0128         %
0129         %       In vectorised form, the function handle is called once for
0130         %       each granule.  It receives as input (1) a p*N*q ndarray,
0131         %       as for the limitators above, and (2) corresponding core.  Beyond data that were already
0132         %       masked to begin with (again, see description at
0133         %       limitators), data are masked where any limitator has
0134         %       returned false.  The function shall return a N*q array (for
0135         %       scalar input data, q=1).
0136         %
0137         %       The final members field will prepend each field
0138         %       with the name of its processor, e.g.
0139         %       MEAN_RO_ice_water_path, STD_RO_ice_water_path.
0140         %
0141         %   stored      structure
0142         %
0143         %       This structure contains information on how each
0144         %       field is stored. Fieldnames should be the same
0145         %       as for processors. Values are like for the <a href="matlab:help FieldCopier/fieldstruct_primary">Fieldcopier</a>.
0146         %       By default, no attributes are stored and all
0147         %       fields are stored as single (float).
0148         %
0149         %   incore      logical (scalar boolean)
0150         %
0151         %       If set to 'true', this field is not taken from the
0152         %       AssociatedDataset, but from the AssociatedDataset's parent,
0153         %       the CollocatedDataset or the core. This can be used to e.g.
0154         %       calculate an average position or to store a vector of
0155         %       line-numbers in the collapsed dataset.
0156         %
0157         % It is assumed that additional dimensions (no. channels, profile
0158         % height, etc.) for the collapsed field corresponds to the one for
0159         % the original field, which makes sense for mean, std, etc.  It
0160         % might not always be so; if it is not, one should explicitly set
0161         % (field).stored.(processor).profile = false
0162         %
0163         % For examples, see the definitions in define_datasets and/or
0164         % explore the various collapsed datasets defined in atmlab.
0165         fieldstruct = []; 
0166         % Cell array of global limitators.
0167         %
0168         % These can work either in vectorised or non-vectorised form,
0169         % depending on the value of the 'vectorised' property.
0170         %
0171         % In non-vectorised form (the default),
0172         % Global limitators are applied to the subset of secondary
0173         % footprints corresponding to a shared primary footprint before
0174         % going into any field-specific processing. It is suitable to use
0175         % for limiting distance or time for a certain averaging function.
0176         % Each function handle receives as input a matrix
0177         % of collocations, e.g. Nx14 when there are N
0178         % secondary inside the primary, and shall
0179         % return a logical of Nx1. All limitators are
0180         % processed sequentially and are applied to
0181         % all fields.
0182         %
0183         % In vectorised form, each global limitator is called exactly once
0184         % for each granule.  It receives a p*N*q ndarray, where p is the
0185         % maximum number of secondaries ever inside a primary, N is the
0186         % number of primaries for the granule, and q corresponds to
0187         % fields (usually q=1).  To conserve memory, global limitators in
0188         % vectorised form must indicate what fields they shall be using, as
0189         % a subset from fields in the core (CollocatedDataset.cols).  Thus,
0190         % rather than a simple cell array of function handles, it's a cell
0191         % array of 2-element cell arrays, such as {{{'DIST'}, @(X)(X<7.5)}}.
0192         % The function handles shall return a logical of
0193         % size pxNx1 instructing what primaries to use, and what primaries to
0194         % skip altogether.
0195         overall_limitators = {}; % List of global limitators
0196     end
0197     
0198     properties (Transient)
0199         % Determine if processors and limitators can be vectorised
0200         %
0201         % Collapsers can be very slow, because user-provided,
0202         % Matlab-written functions are called for every primary footprint.
0203         % For example, processing a single granule of MHS/AVHRR
0204         % collocations can take more than an hour.  In some - but not all -
0205         % cases, limitators and processors can be vectorised, and
0206         % processing speed can be increased by several orders of magnitude.
0207         % This requires
0208         vectorised = false;
0209     end
0210         
0211     % $Id: Collapser.m 8740 2013-11-06 13:17:18Z seliasson $
0212     methods
0213         %% constructor
0214         function self = Collapser(ad, fieldstruct, overall_limitators, varargin)
0215             % constructor for Collapser class
0216             %
0217             % This constructs a <a href="matlab:help Collapser">Collapser</a>
0218             %
0219             % FORMAT
0220             %
0221             %   cl = Collapser(ad, fs, global_lims, ...)
0222             %
0223             % IN
0224             %
0225             %   ad      AssociatedDataset
0226             %
0227             %       <a href="matlab:help AssociatedDataset">AssociatedDataset</a> from which this Collapser should
0228             %       take its data.
0229             %
0230             %   fs      structure
0231             %
0232             %       Structure giving full information on fields to process
0233             %       and how to process them. See <a href="matlab:help Collapser/fieldstruct">property docs</a> for details.
0234             %
0235             %   global_lims     cell array
0236             %
0237             %       See <a href="matlab:help Collapser/overall_limitators">property documentation</a>.
0238             %
0239             % OUT
0240             %
0241             %   Collapser object --- see <a href="matlab:help Collapser">class documentation</a>.
0242             %
0243             % EXAMPLE
0244             %
0245             %   See <a href="matlab:help Collapser">class docs</a>.
0246 
0247             cd = ad.parent;
0248             dependency = ad;
0249             
0250             % sort out which ones are just copied, which one processed
0251             % smartly
0252             
0253             self = self@AssociatedDataset(cd, {dependency}, varargin{:}); % call parent constructor
0254             
0255             % make sure all members have at least 'processors',
0256             % 'limiters', 'stored' and 'incore'; set appropiate
0257             % values where they don't
0258             fields = fieldnames(fieldstruct);
0259             for i = 1:length(fields)
0260                 field = fields{i};
0261                 if ~isfield(fieldstruct.(field), 'processors')
0262                     error(['atmlab:' mfilename], 'No processors specified for %s', field);
0263                 end
0264                 if ~isfield(fieldstruct.(field), 'limitators');
0265                     fieldstruct.(field).limitators = {@(x)true(size(x, 1), 1)};
0266                 end
0267                 if ~isfield(fieldstruct.(field), 'incore')
0268                     fieldstruct.(field).incore = false;
0269                 end
0270                 if fieldstruct.(field).incore
0271                     fieldstruct.(field).origin = cd;
0272                 else
0273                     fieldstruct.(field).origin = ad;
0274                 end
0275                 % populate one or more 'members' for this field
0276                 procnames = fieldnames(fieldstruct.(field).processors);
0277                 for pi = 1:length(procnames)
0278                     procname = procnames{pi};
0279                     newfieldname = [upper(procname) '_' field];
0280                     newfield = fieldstruct.(field);
0281                     newfield = rmfield(newfield, 'processors');
0282                     newfield = rmfield(newfield, 'limitators');
0283                     newfield.orig_name = field;
0284                     if isfield(newfield.stored.(procname), 'type')
0285                         newfield.type = newfield.stored.(procname).type;
0286                     else
0287                         newfield.type = 'float';
0288                     end
0289                     if (isfield(fieldstruct, field) && ...
0290                         isfield(fieldstruct.(field), 'stored') && ...
0291                         isfield(fieldstruct.(field).stored, procname))
0292                         warning('off', 'catstruct:DuplicatesFound');
0293                         newfield = catstruct(newfield, fieldstruct.(field).stored.(procname));
0294                       
0295                     end
0296                     self.members.(newfieldname) = newfield;
0297                 end
0298             end
0299             
0300             self.fieldstruct = fieldstruct;
0301             self.overall_limitators = overall_limitators;
0302 
0303             % add core members
0304             self.members.FIRST.type = 'int';
0305             self.members.FIRST.atts.long_name = 'First corresponding row in overlap';
0306             
0307             self.members.LAST.type = 'int';
0308             self.members.LAST.atts.long_name = 'Last corresponding row in overlap';
0309             
0310         end
0311         
0312         %% overload parent methods
0313         
0314         function [M, M_cols] = merge_matrix(self, M_core, cols_core, M_self, cols_self)
0315             M = [M_core(M_self(:, cols_self.FIRST), :) M_self];
0316             M_cols = self.merge_new_cols(M_core, cols_core, cols_self);
0317         end
0318         
0319     end
0320     
0321     methods (Access = {?SatDataset})
0322         %% implementation abstract methods
0323         
0324         function args = primary_arguments(~, ~)
0325             args = {};
0326         end
0327         
0328         function args = secondary_arguments(~, ~)
0329             args = {};
0330         end
0331         
0332         function bool = needs_primary_data(~, ~)
0333             bool = false;
0334         end
0335         
0336         function bool = needs_secondary_data(~, ~)
0337             bool = false;
0338         end
0339         
0340         function fields = fields_needed_for_dependency(self, fields, ~)
0341             fields = unique(struct2cell(structfun(@(e) safegetfield(e, 'orig_name', ''), getfields(self.members, fields{:}), 'UniformOutput', false)));
0342             fields = fields(~cellfun(@isempty, fields));
0343             % FIXME: the above is insufficient in case the collapser is
0344             % extended with fields from the core.  Consider something like
0345             % below, although this does not yet work.  Comment out for now.
0346             %{
0347             fields_needed = {};
0348             for i = 1:length(fields_considered)
0349                 field = fields_considered{i};
0350                 if isfield(self.members.(field), 'orig_name') % FIRST, LAST etc. don't have this
0351                     orig_name = self.members.(field).orig_name;
0352                     if ~isfield(self.members.(field), 'incore') || ~self.members.(field).incore
0353                         fields_needed = [fields_needed orig_name]; %#ok<AGROW>
0354                     end
0355                 end
0356             end
0357             fields_needed = unique(fields_needed);
0358             %}
0359         end
0360         
0361         function [M, localcols] = process_granule(self, processed_core, ~, ~, ~, ~, ~, ~, deps, varargin)
0362             
0363             
0364             [deps_cols, memnames] = optargs(varargin, {{self.dependencies{1}.cols}, 'all'});
0365             M_data = deps{1};
0366             clear deps; % otherwise duplicating potentially a lot of data
0367 %             deps_cols = cell2struct(deps_cols, cellfun(@(X) X.name, self.dependencies, 'UniformOutput', false), 2);
0368 %            deps_cols = deps_cols{1};
0369             cols_data = deps_cols{1};
0370             
0371             %%%
0372             
0373             % Nomenclature note:
0374             %
0375             % - member is one geophysical quantity, e.g. IWP, LWP, etc.
0376             % - field is a resulting collapsed quantity, e.g. MEAN_IWP,
0377             % NO_LWP, etc.
0378             
0379             % check that sizes are the same
0380             assert(size(processed_core, 1)==size(M_data, 1), ...
0381                 ['atmlab:' mfilename ':SizeError'], ...
0382                 ['To average fields, core must have same no. of rows as associated. ' ...
0383                 'Core has ' num2str(size(processed_core, 1)) ' rows, ' ...
0384                 'associated has ' num2str(size(M_data, 1)) ' rows :(.']);
0385             ad = self.dependencies{1};
0386             cd = ad.parent;
0387             % split by primary footprint and find first and last index
0388             % corresponding to each footprint
0389             [uniques, firsts] = unique(processed_core(:, [cd.cols.START1 cd.cols.LINE1 cd.cols.POS1]), 'rows', 'first');
0390             lasts = [firsts(2:end)-1; size(processed_core, 1)];
0391             
0392             % put sizes in self.members in order to get self.cols
0393             if isequal(memnames, 'all')
0394                 memnames = fieldnames(self.members);
0395             else
0396                 rqre_subset(memnames, fieldnames(self.members));
0397                 memnames = intersect(fieldnames(self.members), memnames);
0398                 memnames = union(memnames, {'FIRST', 'LAST'});
0399             end
0400             
0401             for mi = 1:length(memnames)
0402                 memname = memnames{mi};
0403                 % special cases 'FIRST' and 'LAST' don't have any
0404                 % corresponding data in the corresponding
0405                 % additional-dataset, so no copying is to be done either
0406                 if any(strcmp(memname, {'FIRST', 'LAST'}))
0407                     continue
0408                 end
0409                 mem = self.members.(memname);
0410                 origmem = mem.origin.members.(mem.orig_name);
0411                 if isfield(origmem, 'dims') && ~isfield(mem, 'dims')
0412                     mem.dims = origmem.dims;
0413                 end
0414                 % NB: commented out this 2013-03-25 as it caused flag
0415                 % CV_ROIWP.atts.missing_value to be reset.
0416 %                 w = warning('off', 'catstruct:DuplicatesFound');
0417 %                 self.members.(memname) = catstruct(...
0418 %                     self.members.(memname).origin.members.(mem.orig_name), ...
0419 %                     self.members.(memname));
0420 %                 warning(w);
0421                 fields{mi} = self.members.(memname).orig_name;
0422             end
0423             self.members2cols();
0424             fields = unique(fields(~cellfun(@isempty, fields)));
0425             localcols = self.get_cols_from_bro(memnames, deps_cols);
0426 %             if isequal(fields, 'all');
0427 %                 fields = fieldnames(self.fieldstruct);
0428 %                 self.set_cols_from_bro();
0429 %                 localcols = self.cols;
0430 %             else
0431 %                 localcols = self.get_cols_from_bro(fields, deps_cols);
0432 %             end
0433             
0434             nfields = max(cell2mat(struct2cell(localcols).'));
0435             
0436             % this flag is only used until the end of the function.  It signals
0437             % that the collapser gave no data for a particular footprint, but
0438             % only when this is due to the global limitator.  This data is
0439             % then removed, so this flag should never be exposed to the calling
0440             % function.
0441             %
0442             % When the footprint itself is valid (i.e. global limitator returns
0443             % some valid footprints) but the data are not (i.e. individual
0444             % limitators return nothing or data are flagged to begin with), data
0445             % are set to missing_value.
0446             flag_notset = -realmax(); % hopefully, no real data has this value!
0447             flag_globlimfailed = .9*-realmax();
0448             flag_allflagged = +realmax();
0449             M = flag_notset*ones(size(uniques, 1), nfields);
0450             
0451             % cache some things outside the loops to speed it up a bit
0452             % (done with profiler; code in this loop may be executed
0453             % millions of types for processing a single day)
0454             
0455             %fields = fieldnames(self.fieldstruct);
0456             n_overall_limitators = length(self.overall_limitators);
0457             has_overall_limitators = n_overall_limitators>0;
0458             answers = cell(1, n_overall_limitators);
0459 %             field_has_limitators = cellfun(@(fname)~isempty(self.fieldstruct.(fname).limitators), fields);
0460 %             field_limitators = cellfun(@(fname) self.fieldstruct.(fname).limitators, fields, 'UniformOutput', false);
0461             
0462             for k = 1:length(fields)
0463                 fname = fields{k};
0464                 procnames = fieldnames(self.fieldstruct.(fname).processors);
0465                 
0466                 field_limitators{k} = self.fieldstruct.(fname).limitators;
0467                 field_has_limitators(k) = ~isempty(self.fieldstruct.(fname).limitators);
0468                 all_procnames{k} = procnames;
0469                 for pi = 1:length(procnames)
0470                     procname = procnames{pi};
0471                     proccers{k}{pi} =  self.fieldstruct.(fname).processors.(procname); 
0472                 end
0473             end
0474             
0475             n_orig = size(processed_core, 1);
0476             if isempty(processed_core)
0477                 logtext(atmlab('OUT'), 'core seems empty, nothing to do!\n');
0478                 M = zeros(0, nfields);
0479             elseif self.vectorised
0480                 
0481                 % start with the easy bit
0482                                     
0483                 M(:, localcols.FIRST) = processed_core(firsts, cd.cols.INDEX);
0484                 M(:, localcols.LAST) = processed_core(lasts, cd.cols.INDEX);
0485                     
0486                 %% Sophisticated vectorisation...
0487                 
0488                 % create a matrix where each column contains the indices
0489                 % corresponding to one of the primaries; no. of rows
0490                 % corresponds to the largest occurding no. of secondaries in a
0491                 % primary, so most columns will have lots of data flagged
0492                 max_no_prim_in_sec = max(firsts(2:end)-firsts(1:end-1));
0493                 
0494                 logtext(atmlab('OUT'), ...
0495                     ['%s %s Commencing vectorised collapsing. ' ...
0496                      ' %d primaries with up to %d secondaries per primary.\n'], ...
0497                      class(self), self.name, length(uniques), max_no_prim_in_sec);
0498                 % use a signed int for rarr; double is too large, single too
0499                 % imprecise, but I need negatives for flagged data
0500                 % NB: rarr stands for Re-ARRanged
0501                 rarr = bsxfun(@plus, vec2row(int32(firsts)), vec2col(int32(0:max_no_prim_in_sec-1)));
0502                 % flag data that shouldn't be there
0503                 rarrgt = bsxfun(@gt, rarr, vec2row(lasts));
0504                 rarr(rarrgt) = -1;
0505                 % now we have the array with indices in 'rarr'.  Get
0506                 % corresponding data from data matrix, i.e. like this:
0507                 %  1           2           3           4           6           8          10          12          13          15
0508                 % -1          -1          -1           5           7           9          11          -1          14          16
0509                 % -1          -1          -1          -1          -1          -1          -1          -1          -1          17
0510                 % -1          -1          -1          -1          -1          -1          -1          -1          -1          -1
0511                 % -1          -1          -1          -1          -1          -1          -1          -1          -1          -1
0512                 
0513                 % we will arrange two 3-D data matrices in this style: one
0514                 % for the core, one for the data.  The core contains fields
0515                 % needed by global limitators and fields needed by
0516                 % processors.  The data contains only fields needed by
0517                 % processors.
0518                 core_fields_needed_by_limitators = cellfun(@(X) X{1}, self.overall_limitators, 'UniformOutput', false);
0519                 proc_field_is_in_core = cellfun(@(X) self.fieldstruct.(X).incore, fields);
0520                 core_fields_needed_by_proccers = fields(proc_field_is_in_core);
0521                 core_fields_needed = union([core_fields_needed_by_limitators{:}], ...
0522                                            core_fields_needed_by_proccers);
0523                 data_fields_needed = fields(~proc_field_is_in_core);
0524                 Mrar_core = nan(max_no_prim_in_sec, length(firsts), length(core_fields_needed), 'single');
0525                 Mrar_data = nan(max_no_prim_in_sec, length(firsts), sum(cellfun(@(f) length(cols_data.(f)), data_fields_needed)), 'single');
0526                 % we can afford to loop to max_no_prim_in_sec because it's only
0527                 % a few dozen at most.  This loop takes 9.4 seconds for a 56 x
0528                 % 228391 x 15 array...
0529                 for i = 1:max_no_prim_in_sec
0530                     % copy over where 'rarr' is not flagged.  FIXME: this
0531                     % should of course be properly selected
0532                     n = 1;
0533                     for k = 1:length(data_fields_needed)
0534                         field = data_fields_needed{k};
0535                         fieldsize = length(cols_data.(field));
0536                         Mrar_data(i, rarr(i, :)>0, n:(n+fieldsize-1)) = M_data(rarr(i, rarr(i, :)>0), cols_data.(field));
0537                         Mrar_data_cols.(field) = n:(n+fieldsize-1);
0538                         n = n + fieldsize;
0539                     end
0540                     for k = 1:length(core_fields_needed)
0541                         field = core_fields_needed{k};
0542                         Mrar_core(i, rarr(i, :)>0, k) = processed_core(rarr(i, rarr(i, :)>0), self.parent.cols.(field));
0543                         Mrar_core_cols.(field) = k;
0544                     end
0545                     %                 Mrar(i, rarr(i, :)>0, 1:5) = M_data(rarr(i, rarr(i, :)>0), cols_data.AVHRR_Y);
0546                     %                 Mrar(i, rarr(i, :)>0, 6) = M_data(rarr(i, rarr(i, :)>0), cols_data.AVHRR_FLAG_3AB);
0547                     %Mrar(i, rarr(i, :)>0, :) = processed_core(rarr(i, rarr(i, :)>0), :);
0548                 end
0549                 % now we have a p*N*q ndarray, with p the max. no. of
0550                 % secondary in primary, N the no. of primaries, and q the
0551                 % no. of fields.  This replaces M_data that we clear for
0552                 % memory reasons.
0553                 clear M_data;
0554                 clear processed_core;
0555                 % go through the global limitators:
0556                 overall_global_lim = true(max_no_prim_in_sec, length(firsts));
0557                 for ii = 1:n_overall_limitators
0558                     lim_inp_fields = self.overall_limitators{ii}{1};
0559                     limitator = self.overall_limitators{ii}{2};
0560                     coreplanes = cell2mat(struct2cell(getfields(Mrar_core_cols, lim_inp_fields{:})));
0561                     this_global_lim = limitator(Mrar_core(:, :, coreplanes));
0562                     if ~isequal(size(this_global_lim), size(overall_global_lim))
0563                         error(['atmlab:' mfilename ':wrongsize'], ...
0564                             ['This is %s %s speaking.  I was processing vectorised ' ...
0565                              'data when I received a wrongly-sized logical from global ' ...
0566                              'limitator %s.  I got: %s.  But I wanted: %s.  Giving up!'], ...
0567                              class(self), self.name, func2str(limitator), ...
0568                              num2str(size(this_global_lim)), num2str(size(overall_global_lim)));
0569                     end
0570                     overall_global_lim = overall_global_lim & this_global_lim;
0571                 end
0572                 % mask those data out of Mrar_data.  overall_global_lim is
0573                 % just a logical so hopefully I can get away with repmat
0574                 overall_global_lim = repmat(overall_global_lim, [1, 1, size(Mrar_data, 3)]);
0575                 Mrar_data(~overall_global_lim) = NaN;
0576                 %clear overall_global_lim;
0577                 
0578                 for k = 1:length(fields)
0579                     fname = fields{k};
0580                     if self.fieldstruct.(fname).incore
0581                         M_source = Mrar_core(:, :, Mrar_core_cols.(fname));
0582                     else
0583                         M_source = Mrar_data(:, :, Mrar_data_cols.(fname));
0584                     end
0585                     for pi = 1:length(all_procnames{k})
0586                         procname = all_procnames{k}{pi};
0587                         procfname = [procname '_' fname];
0588                         if ~isfield(localcols, procfname)
0589                             % probably processor is already there; in
0590                             % any case, it was not asked for
0591                             continue
0592                         end
0593 
0594                         capsed = proccers{k}{pi}(M_source);
0595                         expsize = [size(uniques, 1), length(localcols.(procfname))];
0596                         if ~isequal(size(capsed), expsize)
0597                             error(['atmlab:' mfilename ':wrongsize'], ...
0598                                 ['This is %s %s speaking.  I was processing vectorised ' ...
0599                                  'data when I received wrongly sized data from processor ' ...
0600                                  '%s for field %s.  I expected %s, but received %s.  Giving up!'], ...
0601                                  class(self), self.name, procname, fname, num2str(expsize), num2str(size(capsed)));
0602                         end
0603                         M(:, localcols.(procfname)) = capsed;
0604                     end
0605                 end
0606 
0607                 % for consistency with old method, remove completely
0608                 % instances where the overall global limitators prevented
0609                 % data
0610                 M(all(all(~overall_global_lim, 1), 3), :) = []; 
0611                 self.cols = localcols;
0612             else
0613                 %% proceed the old, slow way (OK for small data amounts)
0614                 
0615                 % keep track of time for logging purposes (progress etc.)
0616                 t = 0;
0617                 logtext(atmlab('OUT'), 'Processing %d primary footprints, %d fields, %d total stats, %d values/collocation\n', ...
0618                     size(uniques, 1), length(fields), length(memnames), nfields);
0619                 j = 0; % counter increases only when there is data
0620                 for i = 1:size(uniques, 1) % need to be done in loop due to mean/std/etc.
0621                     tic;
0622                     first = firsts(i);
0623                     last = lasts(i);
0624                     M_coll_part = processed_core(first:last, :);
0625                     M_data_part = M_data(first:last, :);
0626                     
0627                     j = j + 1;
0628                     
0629                     % limitation to all, e.g. for distances
0630                     if ~has_overall_limitators;
0631                         lim_for_all_fields = true(size(M_data_part, 1), 1);
0632                     else
0633                         % 2nd version is much faster.  This was still often the
0634                         % slowest part of the function, applying the overall
0635                         % limitators, for cases with few fields.
0636                         %answers = cellfun(@(X)(X(:)), cellfun(@(f)f(M_coll_part), self.overall_limitators, 'UniformOutput', false), 'UniformOutput', false);
0637                         for ii = 1:n_overall_limitators
0638                             answers{ii} = flat(self.overall_limitators{ii}(M_coll_part));
0639                         end
0640                         
0641                         lim_for_all_fields = all(horzcat(answers{:}), 2);
0642                         if ~any(lim_for_all_fields)
0643                             M(j, :) = flag_globlimfailed;
0644                             continue
0645                         end
0646                     end
0647                     
0648                     %                 M(j, self.cols.FIRST) = processed_core(first, cd.cols.INDEX);
0649                     %                 M(j, self.cols.LAST) = processed_core(last, cd.cols.INDEX);
0650                     M(j, localcols.FIRST) = processed_core(first, cd.cols.INDEX);
0651                     M(j, localcols.LAST) = processed_core(last, cd.cols.INDEX);
0652                     
0653                     % for each field, apply limitations and call processing
0654                     % function on sub-limited set, if any are actually left
0655                     % also keep track if we had any valid fields here
0656                     anyvalidfields = false;
0657                     for k = 1:length(fields)
0658                         fname = fields{k};
0659                         if self.fieldstruct.(fname).incore
0660                             M_source = M_coll_part;
0661                         else
0662                             M_source = M_data_part;
0663                         end
0664                         
0665                         % iteratively apply limitations, only if all
0666                         % limitations return true (including
0667                         % lim_for_all_fields) the collocation is selected for
0668                         % further average-processing
0669                         if ~field_has_limitators(k)
0670                             limsel = true(size(M_source, 1), 1);
0671                         else
0672                             limsel = lim_for_all_fields;
0673                             limmers = field_limitators{k};
0674                             
0675                             for li = 1:length(limmers)
0676                                 limmer = limmers{li};
0677                                 %                             limhere = limmer(M_source(:, self.fieldstruct.(fname).origin.cols.(fname)));
0678                                 limhere = limmer(M_source(:, cols_data.(fname)));
0679                                 limsel = limsel(:) & limhere(:);
0680                             end
0681                         end
0682                         
0683                         procnames = all_procnames{k};
0684                         if any(limsel)
0685                             % apply all processors and store in appropiate
0686                             % place
0687                             for pi = 1:length(procnames)
0688                                 procname = procnames{pi};
0689                                 if ~isfield(localcols, [procname '_' fname])
0690                                     % probably processor is already there; in
0691                                     % any case, it was not asked for
0692                                     continue
0693                                 end
0694                                 %proccer = self.fieldstruct.(fname).processors.(procname);
0695                                 %                             capsed = proccers{k}{pi}(M_source(limsel, self.fieldstruct.(fname).origin.cols.(fname)));
0696                                 capsed = proccers{k}{pi}(M_source(limsel, cols_data.(fname)), M_coll_part(limsel, :));
0697                                 
0698                                 %if ~isequal(size(capsed), [1 length(self.cols.([procname '_' fname]))])
0699                                 if ~isequal(size(capsed), [1, length(localcols.([procname '_' fname]))])
0700                                     error(['atmlab:' mfilename ':mismatch'], ...
0701                                         ['Error in collapsing %s %s %s. Processor ', ...
0702                                         'returned wrongly-sized field. Got [%d, %d]. ', ...
0703                                         'Expected [%d, %d].'], ...
0704                                         self.name, fname, procname, size(capsed, 1), ...
0705                                         size(capsed, 2), 1, length(localcols.([procname '_' fname])));
0706                                     %                                size(capsed, 2), 1, length(self.cols.([procname '_' fname])));
0707                                 end
0708                                 M(j, localcols.([procname '_' fname])) = capsed;
0709                                 %                            M(j, self.cols.([procname '_' fname])) = capsed;
0710                                 anyvalidfields = true;
0711                             end
0712                         else
0713                             % set all processors for this field to missing_val
0714                             for pi = 1:length(procnames)
0715                                 procname = procnames{pi};
0716                                 if isfield(localcols, [procname '_' fname])
0717                                     %M(j, self.cols.([procname '_' fname])) = self.fieldstruct.(fname).stored.(procname).atts.missing_value;
0718                                     M(j, localcols.([procname '_' fname])) = self.fieldstruct.(fname).stored.(procname).atts.missing_value;
0719                                 end
0720                             end
0721                         end
0722                     end
0723                     % remove if any are valid at all % (do you mean "if none are valid?")
0724                     % FIXME: make this optional
0725                     if ~anyvalidfields
0726                         M(j, :) = flag_allflagged;
0727                     end
0728                     t = t + toc;
0729                     if t > 600
0730                         logtext(atmlab('OUT'), 'Done %d/%d\n', ...
0731                             i, size(uniques, 1));
0732                         t = 0;
0733                     end
0734                 end
0735                 
0736                 % remove remaining part of M, e.g. where I did too much
0737                 % pre-allocation or no relevant data was found
0738                 rest = all(M==flag_globlimfailed | M==flag_allflagged, 2);
0739                 % FIXME: what to do with the other flags?
0740                 M(rest, :) = [];
0741                 %%%
0742             end
0743             logtext(atmlab('OUT'), '%d collocations -> %d collapsed and valid collocations\n', ...
0744                 n_orig, size(M, 1));
0745 
0746         end
0747         
0748         %% overload parent methods
0749         
0750         function S = merge_struct(varargin)
0751             error(['atmlab:' mfilename ':notimplemented'], ...
0752                 'Collocating collapsed datasets is not implemented yet');
0753         end
0754         
0755         function C = get_mergefields(self) %#ok<MANU>
0756             C = {'FIRST'};
0757         end
0758         
0759         function new = concatenate(self, old_core_result, old_additional_result, new_additional_result)
0760             if isempty(new_additional_result)
0761                 new = old_additional_result;
0762                 return
0763             end
0764             if ~isempty(old_core_result)
0765                 correction = old_core_result(end, self.parent.cols.INDEX);
0766                 new_first = new_additional_result(:, self.cols.FIRST) + correction;
0767                 new_last = new_additional_result(:, self.cols.LAST) + correction;
0768                 new_additional_result(:, self.cols.FIRST) = new_first;
0769                 new_additional_result(:, self.cols.LAST) = new_last;
0770             end
0771             if isempty(old_additional_result)
0772                 new = new_additional_result;
0773             else
0774                 new = [old_additional_result; new_additional_result];
0775             end
0776         end
0777         
0778         function [S, strattr] = read_homemade_granule(self, file, varargin)
0779             info = self.find_info_from_granule(file);
0780             dt = [str2double(info.year), str2double(info.month), str2double(info.day)];
0781             extra_fields = optargs(varargin, {{}});
0782             core_fields = {'LAT1', 'LON1', 'TIME1'};%, 'FIRST', 'LAST'};
0783             all_fields = union(extra_fields, core_fields);
0784             [M, c] = self.parent.read(dt, dt, info.satname, ...
0785                 all_fields, ...
0786                 struct(), ...
0787                 {}, ...
0788                 [self.dependencies, {self}]);
0789             %self.parent.read_homemade_granule(self.parent.find_granule_by_datetime([2007, 1, 1], 'noaa18'), {})
0790             if isempty(M)
0791                 S = cell2struct(repmat({[]}, [1, length(all_fields)]), all_fields, 2);
0792                 S.lat = [];
0793                 S.lon = [];
0794                 S.epoch = [];
0795                 S.time = [];
0796                 S.version = 0;
0797                 return
0798             end
0799             % need to call cast_fields_back for each dataset-type
0800             % individually
0801             [fields_core, additionals, additional_fields] = self.parent.deal_fields(all_fields, self.parent.associated);
0802             S = self.parent.cast_fields_back(M, getfields(c, fields_core{:}));
0803             for i = 1:length(additionals)
0804                 S = catstruct(S, ...
0805                               additionals{i}.cast_fields_back(M, ...
0806                                                               getfields(c, ...
0807                                                               additional_fields{i}{:})));
0808             end
0809 %            from_parents = intersect(all_fields, fieldnames(self.parent.members));
0810 %            from_self = intersect(all_fields, fieldnames(self.members));
0811 %            S1 = self.parent.cast_fields_back(M, getfields(c, from_parents{:}));
0812 %            S2 = self.cast_fields_back(M, getfields(c, from_self{:}));
0813 %            S = catstruct(S1, S2);
0814 %            S = self.cast_fields_back(M, getfields(c, all_fields{:}));
0815             S.lat = S.LAT1;
0816             S.lon = S.LON1;
0817             [year, month, day, hour, minute, second] = unixsecs2date(S.TIME1);
0818             S.epoch = min(date2unixsecs(year, month, day));
0819             S.time = S.TIME1 - S.epoch;
0820             S.version = 0;
0821             %error('Not implemented');
0822         end
0823         
0824         function set_cols_from_bro(self)
0825             % Set cols-structure based on brothers cols
0826             %
0827             % When a collapsed dataset is processed, the AssociatedDataset
0828             % it belongs to has always been processed already, so from its
0829             % cols-structure can be inferred all the expected sizes.  Use
0830             % this to set or update the local cols-structure.
0831             self.cols = self.get_cols_from_bro(self, fieldnames(self.members));
0832         end
0833         
0834         function cc = get_cols_from_bro(self, members, deps_cols)
0835             % get cols-structure based on brothers cols
0836             %
0837             % When a collapsed dataset is processed, the AssociatedDataset
0838             % it belongs to has always been processed already, so from its
0839             % cols-structure can be inferred all the expected sizes.  Use
0840             % this to set or update the local cols-structure.
0841             n = 1;
0842             for i = 1:length(members)
0843                 field = members{i};
0844                 if isfield(self.members.(field), 'origin') && ...
0845                         ~(isfield(self.members.(field), 'profile') && self.members.(field).profile==false)
0846                     % if it has an origin, it should have an orig_name
0847                     if isempty(fieldnames(self.members.(field).origin.cols))
0848                         ncols = length(deps_cols{...
0849                             strcmp(self.members.(field).origin.name, ...
0850                                 cellfun(@(X) X.name, ...
0851                                     self.dependencies, ...
0852                                     'UniformOutput', false))}.(self.members.(field).orig_name));
0853                     else
0854                         ncols = length(self.members.(field).origin.cols.(self.members.(field).orig_name));
0855                     end
0856                     % FIXME/TODO: should reuse existing dimension names...
0857                     self.members.(field).dims = {sprintf('HEIGHT_%s', field), ncols};
0858                 else % should be FIRST, LAST... ncols==1
0859                     ncols = 1;
0860                 end
0861                 cc.(field) = n:(n+ncols-1);
0862                 n = n + ncols;
0863             end
0864         end
0865         
0866     end
0867     
0868     methods (Static, Access = {?SatDataset})
0869                 
0870         function do = redo_all(software_version)
0871             % redo_all(software_version)
0872             %
0873             % overload and return true if some changes require that a
0874             % dataset must be overwritten (overwrite=1) even if requested
0875             % to be appended (overwrite=2)
0876             vers = cellfun(@str2num, strsplit(software_version(8:end), '.'));
0877             if length(vers) < 3 % shouldn't happen but redo
0878                 do = true;
0879                 return
0880             end
0881             major = vers(1);
0882             minor = vers(2);
0883             micro = vers(3);
0884             if (major < 2) || ...
0885                     (major == 2 && minor < 1) || ...
0886                     (major == 2 && minor == 1 && micro < 337)
0887                 logtext(atmlab('OUT'), ...
0888                     'Found pre-fix collapsed granule, overwriting completely\n');
0889                 do = true;
0890             else
0891                 do = false;
0892             end
0893         end
0894     end
0895     
0896 end

Generated on Mon 15-Sep-2014 13:31:28 by m2html © 2005