Home > atmlab > datasets > HomemadeDataset.m

HomemadeDataset

PURPOSE ^

SYNOPSIS ^

This is a script file.

DESCRIPTION ^

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

DOWNLOAD ^

HomemadeDataset.m

SOURCE CODE ^

0001 classdef HomemadeDataset < SatDataset
0002     % Any SatDataset that is locally created and stored.
0003     %
0004     % This class is an 'intermediate' class between <a href="matlab:help SatDataset">SatDataset</a>
0005     % and <a href="matlab:help CollocatedDataset">CollocatedDataset</a>, but also parent to <a href="matlab:help AssociatedDataset">AssociatedDataset</a>.
0006     % It is unlikely you wish to instantiate this class directly.
0007     %
0008     % This class contains methods and properties that are related to
0009     % storing data to disk and reading data in the same (NetCDF) format
0010     % again from disk. It inherits from <a href="matlab:help SatDataset">SatDataset</a> and is parent to
0011     % <a href="matlab:help CollocatedDataset">CollocatedDataset</a> and <a href="matlab:help AssociatedDataset">AssociatedDataset</a>.
0012     %
0013     %
0014     % HomemadeDataset Properties:
0015     %
0016     %   cols -  Structure describing columns for matrix-stored data
0017     %   mattype - Type to use internally. Normally 'double'.
0018     %  (remaining properties inherited from <a href="matlab:help SatDataset">SatDataset</a>.
0019     %   Use <a href="matlab:properties HomemadeDataset">properties</a> for a complete listing)
0020     %
0021     % HomemadeDataset Methods:
0022     %
0023     %  Constructor:
0024     %
0025     %   HomemadeDataset -   Create HomemadeDataset object
0026     %
0027     %  I/O:
0028     %
0029     %   store -             Store <a href="matlab:help HomemadeDataset/cols">cols</a>-described matrix to NetCDF
0030     %   read_single_day -   Read a single day of stored data
0031     %  (remaining methods inherited from <a href="matlab:help SatDataset">SatDataset</a>)
0032     %
0033     % See also: SatDataset (superclass), CollocatedDataset (subclass),
0034     %           AssociatedDataset (abstract subclass)
0035     %
0036     % Don't forget the Collocation's User Guide.
0037 
0038     % Note on implicit and explicit constructor calling:
0039     % http://www.mathworks.se/help/matlab/matlab_oop/creating-subclasses--syntax-and-techniques.html
0040     
0041     % $Id: HomemadeDataset.m 8750 2013-12-07 18:14:32Z seliasson $
0042     
0043     
0044     properties
0045         % structure describing columns of (internally) stored data
0046         %
0047         % The cols-structure describes columns of data passed around
0048         % internally as a matrix. For example, mid-level methods like
0049         % <a href="matlab:help CollocatedDataset/collocate_date">CollocatedDataset.collocate_date</a>
0050         % return a matrix, whose columns are then described by the
0051         % cols-member.
0052         cols = struct;
0053         
0054 %        members;
0055         
0056         % flag controlling how to consider granules already existing
0057         %
0058         % This flag, defaulting to 'false', controls how the toolkit treats
0059         % files that are already there.  For example, by default,
0060         % collocations are not rerun and associateddatasets are not
0061         % recalculated if files already exist.  The exact behaviour depends
0062         % on the class.  Usually, the meaning is straightforward: false
0063         % means do not overwrite, true means do so.  This can be set per
0064         % dataset, so if one wants to use the core but regenerate
0065         % additionals, one can set the property for the additionals to
0066         % true.
0067         %
0068         % Since atmlab-2-1-334, the value for FieldCopier and Collapser
0069         % (NOTE: FIXME, ALL AssociatedDataset?) can have other values.
0070         % Note that these work ONLY when processing is delayed; when
0071         % processing is done along with the CollocatedDataset, the system
0072         % will always regenerate additionals.
0073         %
0074         % overwrite = 2     read original files, and check per field, i.e.
0075         %                   extend them without changing existing fields
0076         %
0077         % overwrite = {'cell', 'array', 'of', 'fields'}
0078         %
0079         %                   read original files, redo all fields in the
0080         %                   cell array, keep others untouched.
0081         overwrite = false;
0082         
0083         % Set diskcache to store results of '.read' or others to disk.
0084         %
0085         % Runnig <a href="matlab:help CollocatedDataset/read">.read()</a> can take considerable time. Therefore, it is worthwhile
0086         % to cache the results. If 'pcd' is set to a <a href="matlab:help PersistentCachedData">PersistentCachedData</a>,
0087         % .read() will cache the results and read from disk if possible.
0088         % WARNING: DO NOT SET PCD TO A DIRECTORY YOU ARE OTHERWISE
0089         % USING! The caching mechanism may remove files from this
0090         % directory, so be very careful. Caching works by calculating a key
0091         % (almost certainly unique) based on input arguments and storing
0092         % the results in a .mat-file in the cache-directory.
0093         %
0094         % Example use:
0095         % >> d = datasets;
0096         % >> d.collocation_mhs_amsub.pcd = PersistentCachedData('/local/gerrit/cache');
0097         % >> [M, c] = d.collocation_mhs_amsub.read([2006 4 25], [2006 4 26], {'noaa18', 'noaa16'}, {'LAT1', 'LAT2', 'BT1'});
0098         % (later)
0099         % >> [M, c] = d.collocation_mhs_amsub.read([2006 4 25], [2006 4 26], {'noaa18', 'noaa16'}, {'LAT1', 'LAT2', 'BT1'});
0100         % 14-May-2012 11:37:33.707:PersistentCachedData.get_entry:54:Reading from persistent cache: /local/gerrit/cache/aff3735816ae90b11a4ddfd571f88e3b.mat
0101         pcd;
0102         
0103         % Type to use for internal storage
0104         %
0105         % Internally, the collocation toolkit passes around collocations as
0106         % matrices.  To be on the safe side, this normally uses the type
0107         % 'double'.  If you are low on memory, or trying to store a lot of
0108         % data and/or have a lot of collocations, you can set this to
0109         % 'single'.  Do not set it to any non-floating point type unless
0110         % you want severe loss of precision and crashing code.
0111         mattype = 'double';
0112 
0113         % Users may set this for various purposes.  Among others, it's used
0114         % by strrep.
0115         version;
0116     end
0117     
0118     properties (Dependent = true)
0119         % contains detailed information on how data are stored etc.
0120         %
0121         % To be documented in more detail.
0122         members;
0123     end
0124     
0125     properties (Access = protected)
0126         % for internal usage
0127         ownmembers;
0128         dynamic_members = false;
0129     end
0130     
0131     methods
0132         
0133         %% overload parent methods
0134         
0135         function self = HomemadeDataset(varargin)
0136             self = self@SatDataset(varargin{:});
0137             %{
0138             args = varargin;
0139             
0140             % extract 'members' because SatDataset is not allowed to set
0141             % HomemadeDataset private property, and it must be private in
0142             % order for subclasses to redefine it etc.
0143             setmem = false;
0144             if any(strcmp(args, 'members'))
0145                 setmem = true;
0146                 i = find(strcmp(args, 'members'));
0147                 mem = args{i+1};
0148                 args = [args(1:i-1) args(i+2:end)];
0149             end
0150             self = self@SatDataset(args{:});
0151             if setmem
0152                 self.members = mem;
0153             end
0154             %}
0155                    
0156             
0157             if ~any(strcmp(varargin, 'reader')) % set reader
0158                 self.reader = @self.read_homemade_granule;
0159             end
0160              
0161             if isequal(self.granule_duration, [])
0162                 self.granule_duration = 86400;
0163             end
0164         end
0165         
0166         %% implement new methods
0167         
0168         function [fn, global_atts] = store(self, date, spec, data, varargin)
0169             % store Write collocation data to netcdf file
0170             %
0171             % Write collocation data for date in data to a netcdf file.
0172             % The filename is determined from self, date, spec.
0173             % This is a relatively low-level function and not normally
0174             % called directly; rather call <a href="matlab:help CollocatedDataset/collocate_and_store_date">CollocatedDataset.collocate_and_store_date</a>
0175             %
0176             % FORMAT
0177             %
0178             %   [fn, global_atts] = obj.store(date, spec, data[, info])
0179             %
0180             % IN
0181             %
0182             %   date        vector [year month day] to which data corresponds
0183             %   spec        string / cellstr, satellite (or so)
0184             %   data        actual data, columns described by <a href="matlab:help HomemadeDataset/cols">self.cols</a>
0185             %               unless 'localcols' argument is given
0186             %   info        (optional) struct with more info to put in NetCDF
0187             %   localcols   structure describing columns of data
0188             %
0189             % OUT
0190             %
0191             %   fn          file data was written to
0192             %   atts        global attributes that were written to file
0193             %
0194             % $Id: HomemadeDataset.m 8750 2013-12-07 18:14:32Z seliasson $
0195             
0196             if ~all(isfinite(data(:)))
0197                 warning(['atmlab:' mfilename ':format'], ...
0198                     'Found nans or infs in data. Data should be finite. I''ll do my best.');
0199             end
0200             
0201             fn = self.find_granule_by_datetime(date, spec);
0202             
0203             [info, localcols] = optargs(varargin, {struct(), self.cols});
0204 
0205             %year = date(1);
0206             %month = date(2);
0207             %day = date(3);
0208             
0209             % create or append to the file
0210             
0211             if exist(fn, 'file')
0212                 switch self.overwrite
0213                     case 0
0214                         error(['atmlab:' mfilename ':fileexists'], ...
0215                             ['You really shouldn''t tell me not to overwrite ' ...
0216                              'and then tell me to store data where a file already exists.']);
0217                     case 1
0218                         newfile = true;
0219                     otherwise
0220                         % FIXME: should add a line to 'history' global
0221                         % attribute
0222                         newfile = false;
0223                 end
0224             else
0225                 newfile = true;
0226             end
0227             
0228 %             if strcmp(fn(end-1:end), 'gz') % take off this part
0229 %                 fn(end-2:end) = '';
0230 %             end
0231             
0232             outdir = fileparts(fn);
0233             if ~exist(outdir, 'dir')
0234                 logtext(atmlab('OUT'), 'Creating %s\n', outdir);
0235                 mkdir(outdir);
0236             end
0237             
0238             %% Estimate size
0239             
0240             ncollocs = size(data, 1);            
0241             nbytes = self.linesize(localcols) * ncollocs;
0242             logtext(atmlab('OUT'), 'Will write %d collocations, %s of uncompressed data (not including header)\n', ncollocs, nbytes2string(nbytes));
0243             
0244             
0245             
0246             if newfile
0247                 % temporary filename, later compressed written to final place
0248                 temp_out = tempname(atmlab('WORK_AREA'));                
0249                 logtext(atmlab('OUT'), 'Writing %s\n', temp_out);
0250                 ncid = netcdf.create(temp_out, 'NC_CLOBBER'); % overwrite existing
0251             else
0252                 logtext(atmlab('OUT'), 'Appending to %s\n', fn);
0253                 temp_out = uncompress(fn, atmlab('WORK_AREA'), struct('unidentified', 'error'));
0254                 ncid = netcdf.open(temp_out, 'WRITE');
0255                 netcdf.reDef(ncid); % put in header define mode
0256             end
0257             cleanupObj = onCleanup(@() self.cleanup(temp_out, ncid));
0258             
0259             if newfile
0260                 % define the dimensions
0261                 
0262                 dim_collocs = netcdf.defDim(ncid, 'Collocations', ncollocs);
0263                 
0264                 % put global attributes
0265                 
0266                 global_atts = struct();
0267                 global_atts.Conventions = 'CF-1.4';
0268                 global_atts.title = 'Collocations';
0269                 global_atts.date = iso_timestamp();
0270                 global_atts.institution = ['Department of Computer Science, Electrical and Space Engineering, Division of Space Technology, Lule' char(unicode2native('å')) ' University of Technology, Kiruna, Sweden'];
0271                 global_atts.source = 'Collocation codes, part of atmlab';
0272                 global_atts.references = 'Holl et al.  (2010); John et al. (2012)';
0273                 global_atts.contact = 'gerrit.holl@gmail.com';
0274                 global_atts.software_version = atmlab_version;
0275                 global_atts.id = [atmlab_version() ' -- ' iso_timestamp() ' -- ' fn];
0276                 global_atts.license = ...
0277                     ['This dataset is made available under the ' ...
0278                      'Open Data Commons Attribution License (ODC-By) v1.0 ' ...
0279                      'whose full text can be found at ' ...
0280                      'http://opendatacommons.org/licenses/by/1.0/. ' ...
0281                      'Any rights in individual contents of the dataset are ' ...
0282                      'licensed under the Open Data Commons Attribution License (ODC-By) v1.0 ' ...
0283                      'whose text can be found at http://opendatacommons.org/licenses/by/1.0/.'];
0284                 
0285                 % add caller-contributed ones
0286                 %warning('off', 'catstruct:DuplicatesFound');
0287                 inf_fields = fieldnames(info);
0288                 for i = 1:length(inf_fields)
0289                     fldnm = inf_fields{i};
0290                     global_atts.(fldnm) = info.(fldnm);
0291                 end
0292                 %global_atts = catstruct(global_atts, info);
0293                 %warning('on', 'catstruct:DuplicatesFound');
0294                 addncattributes(ncid, global_atts)
0295             else
0296                 dim_collocs = netcdf.inqDimID(ncid, 'Collocations');
0297             end
0298             
0299             %% define variables, variable attributes, additional dimensions
0300             
0301             vars = fieldnames(localcols);
0302             
0303             varids = zeros(size(vars));
0304             dims = struct();
0305             for j = 1:length(vars)
0306                 varname = vars{j};
0307                 type = self.members.(varname).type;
0308                 if isfield(self.members.(varname), 'atts')
0309                     atts = self.members.(varname).atts;
0310                 else
0311                     atts = struct();
0312                 end
0313                 
0314                 % check if we have other dimensions besides the length
0315                 if isfield(self.members.(varname), 'dims') && size(data, 1)>0
0316                     dimname = self.members.(varname).dims{1};
0317                     dimsize = self.members.(varname).dims{2};
0318                     try
0319                         if ~isfield(dims, dimname)
0320                             dims.(dimname) = netcdf.defDim(ncid, dimname, dimsize);
0321                         end
0322                     catch ME
0323                         switch ME.identifier
0324                             case {'MATLAB:netcdf:defDim:nameIsAlreadyInUse', 'MATLAB:imagesci:netcdf:libraryFailure'}
0325                                 % no problem
0326                                 dims.(dimname) = netcdf.inqDimID(ncid, dimname);
0327                             otherwise
0328                                 ME.rethrow();
0329                         end
0330                     end
0331                     thisdim = [dim_collocs dims.(dimname)];
0332                 else
0333                     thisdim = dim_collocs;
0334                 end
0335                 % define variable and put attributes
0336                 try
0337                     % if this doesn't fail, the variable already exists
0338                     varid = netcdf.inqVarID(ncid, varname);
0339                     % while we're at it, let's check the dimensions
0340                     [~, ~, dids] = netcdf.inqVar(ncid, varid);
0341                     [~, stored_n_collocs] = netcdf.inqDim(ncid, dids(1));
0342                     if size(data, 1) ~= stored_n_collocs
0343                         errstr = sprintf(['NetCDF file contains data for variable %s ' ...
0344                             'with a different size than the data I''m trying to store. ' ...
0345                             ' Stored no. of entries is %d, whereas new size is %d entries. '], ...
0346                             varname, stored_n_collocs, size(data, 1));
0347                         if self.overwrite == 2 && isa(self, 'Collapser')
0348                             errstr = [errstr, ...
0349                                 sprintf([' Probably the original collapsing ' ...
0350                                 'is from the era where entries where removed if ' ...
0351                                 'all data were flagged, even if the collocation was ' ...
0352                                 'otherwise valid.  When additional data is added that ' ...
0353                                 'is not flagged, this results in a different no. of ' ...
0354                                 'collocations.  The only solution is to set %s.overwrite ' ...
0355                                 'to 1.  Sorry about that :('], self.name)];
0356                         else
0357                             errstr = [errstr, ...
0358                                 sprintf([' I''m not sure why I''m even ' ...
0359                                 'here in the first place, because I''m not trying ' ...
0360                                 'to extend a Collapser (I''m a %s in overwrite ' ...
0361                                 'mode %d'], class(self), self.overwrite)];
0362                         end
0363                         error(['atmlab:' mfilename ':wrongsize'], errstr);
0364                     end
0365                     varids(j) = -1;
0366                 catch ME
0367                     switch ME.identifier
0368                         case 'MATLAB:imagesci:netcdf:libraryFailure'
0369                             varid = netcdf.defVar(ncid, varname, type, thisdim);
0370                             varids(j) = varid;
0371                             for k = fieldnames(atts)'
0372                                 netcdf.putAtt(ncid, varid, k{1}, atts.(k{1}));
0373                             end
0374                             
0375                         otherwise
0376                             ME.rethrow();
0377                     end
0378                 end
0379 
0380             end
0381             
0382             %% write data
0383             
0384             % end define mode
0385             
0386             netcdf.endDef(ncid);
0387             
0388             if isempty(data)
0389                 logtext(atmlab('OUT'), 'No data, NetCDF file will be dataless\n');
0390             else
0391                 % put vars
0392                 logtext(atmlab('OUT'), 'Writing: ');
0393                 
0394                 for j = 1:length(vars(:).')
0395                     varname = vars{j};
0396                     
0397                     varid = varids(j);
0398                     if varid == -1
0399                         fprintf(atmlab('OUT'), '(not: %s) ', varname);
0400                         continue
0401                     end
0402                     
0403                     fprintf(atmlab('OUT'), '%s ', varname);
0404                     if any(any(~isfinite(data(:, localcols.(varname)))))
0405                         if ~isfield(self.members.(varname).atts, 'missing_value')
0406                             error(['atmlab:' mfilename ':missingmissing'], ...
0407                                 ['I found nonfinite values for field %s, ' ...
0408                                 'but no missing_value is defined for ' ...
0409                                 '%s.members.%s.atts.missing_value. Please define.'], ...
0410                                 varname, self.name, varname);
0411                         end
0412                         % need to flag only the relevant channels.  Matlabs
0413                         % lacks views, so I have to do this in a really
0414                         % cumbersome way.
0415                         chanmask = false(size(data));
0416                         chanmask(:, localcols.(varname)) = true;
0417                         invmask = ~isfinite(data);
0418                         data(chanmask&invmask) = self.members.(varname).atts.missing_value;
0419                         %data(any(~isfinite(data(:, localcols.(varname)))), localcols.(varname)) = self.members.(varname).atts.missing_value;
0420                     end
0421                     
0422                     netcdf.putVar(ncid, varid, data(:, localcols.(varname)));
0423                 end
0424                 fprintf(atmlab('OUT'), '\n');
0425             end
0426             
0427             logtext(atmlab('OUT'), 'Finalising\n');
0428             logtext(atmlab('OUT'), 'Gzipping to %s and removing uncompressed\n', fn);
0429             netcdf.close(ncid);
0430             gzipped_filename = gzip(temp_out, outdir);
0431             if ~isequal(gzipped_filename{1}, fn)
0432                 movefile(gzipped_filename{1}, fn);
0433             end
0434             logtext(atmlab('OUT'), 'Done\n');
0435             
0436         end
0437                 
0438         function fields = list_fields(self)
0439             % return valid fields in this dataset
0440             fields = fieldnames(self.members);
0441         end
0442              
0443         function S = cast_fields_back(self, M, c)
0444             % Cast columns from data-matrix to 'stored' types
0445             %
0446             % The collocation toolkit internally passes around data in a
0447             % matrix.  To be on the safe side, this uses double, because
0448             % all other types (except (u)int64) fit.  However, sometimes
0449             % this is not desirable, and this method can be used to cast
0450             % types back to their types, based on the type to which they
0451             % should be stored.
0452             %
0453             % FORMAT
0454             %
0455             %   varargout = hd.cast_fields_back(M, c,)
0456             %
0457             % IN
0458             %
0459             %   M   matrix containing data
0460             %   c   structure describing columns by name
0461             %
0462             % OUT
0463             %
0464             %   S   structure with data in smaller type
0465             
0466             S = struct();
0467             fields = fieldnames(c);
0468             for i = 1:length(fields)
0469                 field = fields{i};
0470                 newtype = type_nc2ml(self.members.(field).type);
0471                 S.(field) = cast(M(:, c.(field)), newtype);
0472             end
0473         end
0474         
0475         function nbytes = linesize(self, localcols)
0476             % Calculate the stored size (in bytes) per collocation
0477             nbytes = 0;
0478             sizes = struct(...
0479                 'float', 4, ...
0480                 'double', 8, ...
0481                 'short', 2, ...
0482                 'int', 4, ...
0483                 'byte', 1);
0484             fields = fieldnames(localcols);
0485             for f = vec2row(fields)
0486                 tp = self.members.(f{1}).type;
0487                 width = length(localcols.(f{1}));
0488                 nbytes = nbytes + sizes.(tp) * width;
0489             end
0490         end
0491         
0492         function first = granule_first_line(varargin)
0493             first = int32(1);
0494         end
0495         
0496         %% getters/setters
0497         function mem = get.members(self)
0498             % members getter, see doc for property members
0499             
0500             if self.dynamic_members
0501                 mem = self.dynamically_get_members();
0502             else
0503                 mem = self.ownmembers;
0504             end
0505         end
0506         
0507         function set.members(self, val)
0508             if self.dynamic_members
0509                 error(['atmlab:' mfilename ':readonly'], ...
0510                     ['The property ''members'' for class %s is dynamically calculated. ' ...
0511                      'Therefore, you cannot set the members property for %s.'], ...
0512                      class(self), self.name);
0513             else
0514                 self.ownmembers = val;
0515             end
0516         end
0517         
0518     end
0519     
0520     methods (Access = protected)
0521         
0522         %% overload parent methods
0523         function s_out = repvars(self, s, datevec, spec)
0524             s = repvars@SatDataset(self, s, datevec, spec);
0525             if isempty(self.version)
0526                 s_out = s;
0527             else
0528                 s_out = strrep_multi(s, ...
0529                     '$VERSION', strrep(self.version, '.', '_'));
0530             end
0531         end
0532                     
0533         function matches = infofit(self, is, datevec, spec)
0534             matches = infofit@SatDataset(self, is, datevec, spec);
0535             if isfield(is, 'version') && ~isempty(self.version)
0536                 matches = matches && isequal(is.version, strrep(self.version, '.', '_'));
0537             elseif isempty(self.version)
0538                 matches = true;
0539             else
0540                 matches = false;
0541             end
0542         end
0543 
0544         %% implement new methods
0545         % those are for internal use, user uses
0546         % CollocatedDataset.read
0547         function [M, localcols, attr] = read_single_day(self, date, spec, fields)
0548             % Read collocation 'fields' for 'date', 'spec'.
0549             %
0550             % Low-level function, not normally called directly. To read
0551             % collocated data, use <a href="matlab:help CollocatedDataset/read">CollocatedDataset.read</a>.
0552             %
0553             % Reads a single day of data.
0554             %
0555             % FORMAT
0556             %
0557             %   [M, localcols, attr] = ds.read_single_day(date, spec, fields)
0558             %
0559             % IN
0560             %
0561             %   date    datevec     datevec for which to read data
0562             %   spec    various     satellite(s)
0563             %   fields  cellstr     fields to read from data.
0564             %
0565             % Note:
0566             %
0567             % 'fields' may be 'all', in which case all fields are read.
0568             % No guarantee about the order, but this information is
0569             % returned via 'localcols'. If you need consistence of order
0570             % with actual collocating, consider passing
0571             % fieldnames(self.cols) as fields.
0572             %
0573             % OUT
0574             %
0575             %   M           matrix      contains requested data
0576             %   localcols   struct      describes data columns
0577             %   attr        struct      contains NetCDF global attributes
0578             %
0579             % See also: HomemadeDataset/read_homemade_granule
0580             
0581             fn = self.find_granule_by_datetime(date, spec);
0582             logtext(atmlab('OUT'), 'Gunzipping and reading %s\n', fn);
0583             tmp = tempname(atmlab('WORK_AREA'));
0584             c = onCleanup(@()delete(tmp));
0585             exec_system_cmd(['gunzip -c ' fn '>' tmp]); % 3x faster than ML's gunzip
0586             
0587             if ischar(fields)
0588                 if strcmp(fields, 'all')
0589                     data = loadncfile(tmp);
0590                     attr = data.global_attributes;
0591                     fields = fieldnames(data);
0592                 else
0593                     error(['atmlab:' mfilename ':invalid'], ...
0594                         ['Invalid ''fields'' argument: ' fields]);
0595                 end
0596             else
0597                 [data, attr] = loadncvar(tmp, fields);
0598             end
0599             
0600             % find n. of columns to allocate, keep order
0601             data_fields = intersect_unsorted(fields, fieldnames(self.members));
0602             %n_columns = sum(cellfun(@(ff) length(self.cols.(ff)), data_fields));
0603             n_columns = sum(cellfun(@(ff) size(data.(ff), 2),  data_fields));
0604             n_rows = max(cellfun(@(ff) size(data.(ff), 1), fields));
0605             M = nan*zeros(n_rows, n_columns);
0606             if isempty(M)
0607                 localcols = struct();
0608                 return;
0609             end
0610             n = 1;
0611             % copy data to matrix
0612             for i = 1:length(data_fields)
0613                 fld = data_fields{i};
0614                 %n_local_cols = length(self.cols.(fld));
0615                 n_local_cols = size(data.(fld), 2);
0616                 range_local_cols = n:(n+n_local_cols-1);
0617                 D = data.(fld);
0618                 
0619                 M(:, range_local_cols) = D;
0620 
0621                 % verify data validity; should be done on writing but
0622                 % wasn't always in the past, so still needed here
0623                 if any(isfield(self.members.(fld).atts, {'valid_range', 'valid_min', 'valid_max'}))
0624                     if isfield(self.members.(fld).atts, 'valid_range')
0625                         lo = self.members.(fld).atts.valid_range(1);
0626                         hi = self.members.(fld).atts.valid_range(2);
0627                     else
0628                         % fallbacks
0629                         if isinteger(D)
0630                             getlow = @intmin;
0631                             gethi = @intmax;
0632                         elseif isfloat(D)
0633                             getlow = @realmin;
0634                             gethi = @realmax;
0635                         else
0636                             error(['atmlab:' mfilename ':unknown'], ...
0637                                 'Non-numeric type unsupported, %s is %s', fld, class(fld));
0638                         end
0639                         lo = getlow(class(data.(fld)));
0640                         hi = gethi(class(data.(fld)));
0641                         if isfield(self.members.(fld).atts, 'valid_min')
0642                             lo = self.members.(fld).atts.valid_min;
0643                         end
0644                         if isfield(self.members.(fld).atts, 'valid_max')
0645                             hi = self.members.(fld).atts.valid_max;
0646                         end
0647                     end
0648                     %
0649                     % FIXME: test that lo < data < hi || data==missing
0650                     wrong = (D < lo | D > hi);
0651                     if isfield(self.members.(fld).atts, 'missing_value')
0652                         wrong = wrong & (D ~= self.members.(fld).atts.missing_value);
0653                     end
0654                     if any(wrong)
0655                         error(['atmlab:' mfilename ':invalid'], ...
0656                             ['Encountered invalid data for %s %s at %s.  Data for field %s ' ...
0657                              'must be in range %g -- %g or flagged.  Found ' ...
0658                              'value %g instead.  Perhaps an artefact from before ' ...
0659                              'the proper use of flags.  Suggest to recollocate ' ...
0660                              'or redo AssociatedDataset.'], ...
0661                              class(self), self.name, datestr(datenum(date), 'yyyy-mm-dd'), ...
0662                                 fld, lo, hi, D(find(wrong, 1)));
0663                     end
0664                 end
0665                     
0666                 % set flagged to nan
0667                 if isfield(self.members.(fld).atts, 'missing_value')
0668                     flagged = M(:, range_local_cols) == self.members.(fld).atts.missing_value;
0669                     M(all(flagged, 2), range_local_cols) = nan;
0670                 end
0671                     
0672                 localcols.(fld) = range_local_cols;
0673                 n = n + n_local_cols;
0674 
0675             end
0676             if isempty(data_fields)
0677                 localcols = struct();
0678             end
0679 
0680         end
0681                
0682         function pos2re(self)
0683             % convert self.{basedir,subdir,filename} to self.re
0684             %
0685             % This method attempts to convert
0686             % self.{basedir,subdir,filename) to self.re at a best-effort
0687             % basis.
0688             r = [fullfile(self.basedir, self.subdir, self.filename) ...
0689                  '|' self.filename];
0690             self.re = strrep_multi(r, ...
0691                 '$YEAR4', '(?<year>\d{4})', ...
0692                 '$MONTH', '(?<month>\d{2})', ...
0693                 '$DAY', '(?<day>\d{2})', ...
0694                 '$YEAR2', '(?<year>\d{2})', ...
0695                 '$DOY', '(?<doy>\d{3})', ...
0696                 '$HOUR', '(?<hour>\d{2})', ...
0697                 '$MINUTE', '(?<minute>\d{2})', ...
0698                 '$SAT', '(?<satname>[a-z0-9]*)', ...
0699                 '$VERSION', '(?<version>[a-z0-9_]{3,4})');
0700             % However, we don't need it to locate granules. It's only used
0701             % by find_info_from_granule.
0702             self.tryre = false;
0703         end           
0704     end
0705     
0706     methods (Access = {?SatDataset})
0707         function [S, strattr] = read_homemade_granule(self, file, varargin)
0708             % internal usage, reader for any granule made by this toolkit
0709             %
0710             % hd.read_homemade_granule(file, [fields])
0711             
0712             fields  = optargs(varargin, {{}});
0713             
0714             [S, strattr] = loadncvar(file, fields);
0715            
0716             % get additional stuff
0717             info = self.find_info_from_granule(file);
0718             
0719             if isfield(info, 'doy')
0720                 date = dayofyear_inverse(str2double(info.year), str2double(info.doy));
0721                 date = [date.year date.month date.day];
0722             else
0723                 date = [str2double(info.year) str2double(info.month) str2double(info.day)];
0724             end
0725             S.epoch = round(date2unixsecs(date(1), date(2), date(3)));
0726             
0727             S.path = file;
0728             
0729             if isfield(strattr, 'version')
0730                 S.version = strattr.version;
0731             end
0732             
0733             if isfield(S, 'lat')
0734                 S = MaskInvalidGeoTimedataWithNaN(S);
0735             end
0736         end   
0737         
0738     end
0739         
0740     % static/private are used like subfunctions
0741     methods (Static, Access = protected)
0742         function cleanup(temp_out, ncid)
0743             % remove temporary files and close NetCDF
0744             logtext(atmlab('OUT'), 'Cleaning up\n');
0745             try
0746                 netcdf.close(ncid);
0747             catch ME
0748                 switch ME.identifier
0749                     case {'MATLAB:netcdf:inq:notNetcdfID', 'MATLAB:netcdf:close:notNetcdfID', ...
0750                             'MATLAB:netcdf:close:ebadid:notNetcdfID', 'MATLAB:imagesci:netcdf:libraryFailure'} % already closed
0751                     otherwise
0752                         delete(temp_out);
0753                         ME.rethrow();
0754                 end
0755             end
0756             delete(temp_out);
0757         end
0758     end
0759     
0760 end

Generated on Mon 15-Sep-2014 13:31:28 by m2html © 2005