Project

General

Profile

Statistics
| Branch: | Tag: | Revision:

pycama / src / pycama / File.py @ 841:49ca3a4a5dbe

History | View | Annotate | Download (37.1 KB)

1 74:b66004a0f8d8 Maarten
#!/usr/bin/env python3
2 5:3bfdb29d0ed0 maarten
# -*- coding: utf-8 -*-
3
4 326:90635f5b6aac Maarten
# Copyright 2016-2017 Maarten Sneep, KNMI
5
#
6
# Redistribution and use in source and binary forms, with or without
7
# modification, are permitted provided that the following conditions are met:
8
#
9
# 1. Redistributions of source code must retain the above copyright notice,
10
#    this list of conditions and the following disclaimer.
11
#
12
# 2. Redistributions in binary form must reproduce the above copyright notice,
13
#    this list of conditions and the following disclaimer in the documentation
14
#    and/or other materials provided with the distribution.
15
#
16
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
20
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27
## \file File.py
28
#  The file handling class.
29
#  Represents a single file, but tracks which files have been opened before to avoid duplication.
30
# @author Maarten Sneep
31
32 5:3bfdb29d0ed0 maarten
import sys
33
import os
34 232:46c8d10dffd7 Maarten
import re
35 5:3bfdb29d0ed0 maarten
import logging
36 8:f4067d056bb0 Maarten
import datetime
37 267:66d125675305 Maarten
import hashlib
38 600:109d30735633 Maarten
import uuid
39 607:707b1c869300 Maarten
import warnings
40
warnings.filterwarnings("ignore", category=FutureWarning)
41 5:3bfdb29d0ed0 maarten
42 143:07d7fb245706 Maarten
import h5py
43 267:66d125675305 Maarten
import netCDF4
44 74:b66004a0f8d8 Maarten
import numpy as np
45 5:3bfdb29d0ed0 maarten
46 94:8b9435871886 Maarten
from .utilities import CAMAException
47
48 326:90635f5b6aac Maarten
## Create a callable object to find variables within a HDF5 file
49
#
50
#  This is a companion class for the call to the visititems() method
51
#  in h5py groups objects. This method is used to select the desired Dataset
52
#  in the file.
53 143:07d7fb245706 Maarten
class NameMatcher(object):
54 326:90635f5b6aac Maarten
    ## The constructor
55
    #
56 335:d887855826de Maarten
    # @param name The name of the element to search for
57
    # @param cls The class of the element to search for. Default is 'Dataset', i.e. a netCDF4 variable.
58 143:07d7fb245706 Maarten
    def __init__(self, name, cls="Dataset"):
59
        self.name = name
60
        self.cls = cls
61
        # self.logger = logging.getLogger('PyCAMA')
62 326:90635f5b6aac Maarten
63 335:d887855826de Maarten
    ## Split of the name of the element from the full path.
64
    # @param name The full name including the path to the element.
65
    # @return the last element in the path (i.e. the name of the element).
66
    def basename(self, name):
67
        return name.split('/')[-1]
68
69
    ## The callable interface
70
    #
71
    # The entry point when iterating over all elements in the file.
72
    # @param name The full name of the element, including the full hdf-5 path.
73
    #        The `pycama.File.NameMatcher.basename()` routine is used to obtain the name of the element itself.
74
    # @param obj The object to compare. The name of the class of this object is compared to the cls instance variable.
75
    # @return obj if the element shall be used, None otherwise.
76 143:07d7fb245706 Maarten
    def __call__(self, name, obj):
77 335:d887855826de Maarten
        varname = self.basename(name)
78 143:07d7fb245706 Maarten
        if varname == self.name and obj.__class__.__name__ == self.cls:
79
            # self.logger.debug("Testing for '{0}', received '{1}' (match)".format(self.name, varname))
80
            return obj
81
        else:
82
            # self.logger.debug("Testing for '{0}', received '{1}' (no match)".format(self.name, varname))
83
            return None
84 326:90635f5b6aac Maarten
85 335:d887855826de Maarten
## A file wrapper.
86
#
87
# Maintains (and closes!) h5py file references, and returns basic metadata (time, orbit, ...)
88
#
89
# The class object maintains a list of files that have been opened already, to avoid duplicate file references.
90 5:3bfdb29d0ed0 maarten
class File(object):
91 335:d887855826de Maarten
    ## Class variable to keep track of the files that have been opened.
92
    reported_files = {}
93 326:90635f5b6aac Maarten
94 335:d887855826de Maarten
    ## The constructor
95
    #
96
    #  @param f Full path of hte file to open.
97
    #  @param mode The mode for the file. Defaults to read-only.
98
    #  @param product The type of the product as requested.
99 310:620a95375824 Maarten
    def __init__(self, f, mode='r', product=None):
100 335:d887855826de Maarten
        ## The name of the file
101 5:3bfdb29d0ed0 maarten
        self.name = os.path.basename(f)
102 335:d887855826de Maarten
        ## The full path of the file.
103 5:3bfdb29d0ed0 maarten
        self.path = f
104
        self.reported_files[f] = self
105 335:d887855826de Maarten
        ## The reading mode
106 5:3bfdb29d0ed0 maarten
        self.mode = mode
107 335:d887855826de Maarten
        ## Has this file been opened before? Needed for progress messages.
108 5:3bfdb29d0ed0 maarten
        self.been_opened = False
109 841:49ca3a4a5dbe maarten
        self.time_warning_given = False
110 335:d887855826de Maarten
        ## The name of the product as requested. Needed for product mapping (offline use).
111 310:620a95375824 Maarten
        self.request_product = product
112 335:d887855826de Maarten
        ## the file reference.
113 5:3bfdb29d0ed0 maarten
        self._ref = None
114 335:d887855826de Maarten
        ## the orbit number
115 5:3bfdb29d0ed0 maarten
        self._orbit = None
116 476:9988c9258094 Maarten
        ## number of scanlines
117
        self._scanline = None
118 335:d887855826de Maarten
        ## The reference time. A datetime.datetime object.
119 5:3bfdb29d0ed0 maarten
        self._reference_time = None
120 335:d887855826de Maarten
        ## The validity start time. A datetime.datetime object.
121 5:3bfdb29d0ed0 maarten
        self._validity_start = None
122 335:d887855826de Maarten
        ## The validity end time. A datetime.datetime object.
123 5:3bfdb29d0ed0 maarten
        self._validity_end = None
124 335:d887855826de Maarten
        ## The product name as read from the input file.
125 5:3bfdb29d0ed0 maarten
        self._product = None
126 335:d887855826de Maarten
        ## The band on which the geolocation is based.
127 5:3bfdb29d0ed0 maarten
        self._band = None
128 335:d887855826de Maarten
        ## The bounding box attributes
129 5:3bfdb29d0ed0 maarten
        self._bbox = None
130 335:d887855826de Maarten
        ## The (unique) file id.
131 5:3bfdb29d0ed0 maarten
        self._id = None
132 335:d887855826de Maarten
        ## Dictionary with fall-back OMI metadata. Used only for OMI files.
133 232:46c8d10dffd7 Maarten
        self._omi_core_metadata = None
134 740:9809f94372a2 Maarten
        ## Number of succesfull pixels in granule.
135
        self._f_fraction_succesful = None
136 335:d887855826de Maarten
        ## Ensure access to the logger.
137 5:3bfdb29d0ed0 maarten
        self.logger = logging.getLogger('PyCAMA')
138 326:90635f5b6aac Maarten
139 232:46c8d10dffd7 Maarten
        try:
140 740:9809f94372a2 Maarten
            if self.isfile:
141
                self.parse_coreMetadataOMI()
142 232:46c8d10dffd7 Maarten
        except RuntimeError:
143
            pass
144 326:90635f5b6aac Maarten
145 310:620a95375824 Maarten
        if self.request_product is None:
146
            self.request_product = self.product
147 326:90635f5b6aac Maarten
148 335:d887855826de Maarten
    ## Close an open file reference
149
    #
150
    # Note that this does not invalidate the object, it just closes the file
151
    # reference to avoid exceeding the operating system user limit for the
152
    # number of open files.
153 5:3bfdb29d0ed0 maarten
    def close(self):
154 8:f4067d056bb0 Maarten
        """Close an open file"""
155 5:3bfdb29d0ed0 maarten
        if self._ref is not None:
156
            self._ref.close()
157
            self._ref = None
158 326:90635f5b6aac Maarten
159 335:d887855826de Maarten
    ## Symmetry method for pycama.File.File.close()
160 295:7c32c0a48a80 Maarten
    def open(self):
161
        return self.ref
162 326:90635f5b6aac Maarten
163 740:9809f94372a2 Maarten
    @property
164
    def size(self):
165
        return os.path.getsize(self.path)
166
167 335:d887855826de Maarten
    ## Read-only property referencing a hdf5 file.
168
    #
169
    # Opens the input file after checking it has a non-zero length.
170
    # If this is the first time this file is opened an info-level message is
171
    # written to the log file.
172
    #
173
    # @throws CAMAException in case of an error (zero file size or an `h5py` error.)
174 5:3bfdb29d0ed0 maarten
    @property
175
    def ref(self):
176 8:f4067d056bb0 Maarten
        """The file reference"""
177 5:3bfdb29d0ed0 maarten
        if self._ref is None:
178 289:dbd3e9a591c4 Maarten
            if os.path.getsize(self.path) == 0:
179
                raise CAMAException("File '{0}' has length zero".format(self.name))
180 5:3bfdb29d0ed0 maarten
            if not self.been_opened:
181 287:94de881ee69e Maarten
                self.logger.info("Opening file '%s'", self.name)
182 5:3bfdb29d0ed0 maarten
                self.been_opened = True
183 106:ad4f1a9e5582 Maarten
            try:
184 143:07d7fb245706 Maarten
                self._ref = h5py.File(self.path, self.mode)
185 289:dbd3e9a591c4 Maarten
            except (RuntimeError, OSError) as err:
186 841:49ca3a4a5dbe maarten
                raise CAMAException("Error on {0}: '{1}'".format(self.path, str(err)))
187 5:3bfdb29d0ed0 maarten
        return self._ref
188 326:90635f5b6aac Maarten
189 335:d887855826de Maarten
    ## Find a reference to a variable in a given file (recursively in groups)
190
    #
191
    #  Returns None if the variable isn't found.
192
    # @param varname Name of the variable.
193
    # @param group Group to start searching from. Default is to use
194
    #        <tt>/PRODUCT</tt> for S5P L2 files, <tt>/HDFEOS/SWATHS/<i>swath</i></tt>
195
    #        for OMI files and simply <tt>/</tt> for other files.
196
    # @return Reference (`h5py.Dataset`) to the requested variable or None.
197
    #
198
    # Uses a pycama.File.NameMatcher object with
199 368:bded6da28f8c Maarten
    # [`h5py.Group.visititems()`](http://docs.h5py.org/en/latest/high/group.html?highlight=visititems#Group.visititems).
200 5:3bfdb29d0ed0 maarten
    def find_variable(self, varname, group=None):
201 295:7c32c0a48a80 Maarten
        if self._ref is None:
202
            self.open()
203 326:90635f5b6aac Maarten
204 5:3bfdb29d0ed0 maarten
        if group is None:
205 232:46c8d10dffd7 Maarten
            try:
206
                group = self.ref['PRODUCT']
207
            except (AttributeError, TypeError, KeyError):
208
                try:
209
                    # perhaps an OMI file?
210
                    swath = list(self.ref['/HDFEOS/SWATHS/'].keys())[0]
211
                    group = self.ref['/HDFEOS/SWATHS/{0}'.format(swath)]
212
                except (AttributeError, TypeError, KeyError):
213
                    group = self.ref['/']
214 326:90635f5b6aac Maarten
215 143:07d7fb245706 Maarten
        matcher = NameMatcher(varname, "Dataset")
216 730:f01d09485e87 Maarten
        try:
217
            rval = group.visititems(matcher)
218
        except:
219
            return None
220 326:90635f5b6aac Maarten
221 143:07d7fb245706 Maarten
        return rval
222 326:90635f5b6aac Maarten
223 335:d887855826de Maarten
    ## Find the names of the dimensions of a variable.
224
    #
225
    #  @param varname Name of the variable.
226
    #  @param group Group to start searching from, same as in pycama.File.File.find_variable() method.
227
    #
228
    #  Extract the names of the dimensions of a variable, either directly using the h5py interface,
229
    #  or falling back to the netCDF4 interface for dimensions that do not have a variable with the
230
    #  same name in the same location (unlinked dimensions).
231
    #
232
    # @note This method may close and reopen the file, invalidating all variable references.
233 267:66d125675305 Maarten
    def dimension_names(self, varname, group=None):
234
        var = self.find_variable(varname, group)
235
        unlinked_dims = False
236
        dim_names = []
237
        for i,n in enumerate(var.dims):
238
            dim_name = list(n.keys())[0]
239
            unlinked_dims = unlinked_dims or dim_name.startswith('This is a netCDF dimension but not a netCDF variable.')
240
            dim_names.append(dim_name)
241 326:90635f5b6aac Maarten
242 267:66d125675305 Maarten
        if unlinked_dims:
243
            self.logger.debug("Unlinked netCDF dimensions in '%s'", varname)
244
            self.logger.debug("Opening '%s' as netCDF4 file", self.path)
245
            variable_location = var.name
246
            self.close()
247
            with netCDF4.Dataset(self.path, 'r') as ncref:
248
                dim_names = list(ncref[variable_location].dimensions)
249 295:7c32c0a48a80 Maarten
            self.open()
250 267:66d125675305 Maarten
        return dim_names
251 326:90635f5b6aac Maarten
252 335:d887855826de Maarten
    ## Get the outline of the geolocations.
253
    #
254
    # The outline of all data in the granule. This is copied from the `eop:multiExtentOf`
255
    # metadata (originating from L1B eventually).
256
    # The data is ordered in a counter clockwise direction.
257
    #
258
    # In case this attribute is missing (UPAS, QA4ECV), we use the latitude and
259
    # longitude centers of the pixels to approximate this attribute.
260
    # @return Dictionary with latitude and longitude arrays describing the outline of the granule.
261
    #
262 74:b66004a0f8d8 Maarten
    def outline(self):
263
        try:
264 232:46c8d10dffd7 Maarten
            grp_list = '/METADATA/EOP_METADATA/om:featureOfInterest/eop:multiExtentOf/gml:surfaceMembers/gml:exterior'
265
            grp = self.ref[grp_list]
266 267:66d125675305 Maarten
            posList = grp.attrs['gml:posList'].decode('ASCII')
267 232:46c8d10dffd7 Maarten
            p = np.asarray([float(v) for v in posList.split()])
268 295:7c32c0a48a80 Maarten
            self.close()
269 232:46c8d10dffd7 Maarten
            return {'latitude': p[0::2], 'longitude': p[1::2]}
270 267:66d125675305 Maarten
        except (AttributeError, TypeError, KeyError, IOError, ValueError):
271 254:ef469b67b220 Maarten
            self.logger.warning("Attribute 'gml:posList' not found or empty, using fallback")
272 232:46c8d10dffd7 Maarten
            if self._omi_core_metadata is not None:
273 244:d38fcd8ea74d Maarten
                lat = self.find_variable('Latitude')[:]
274
                lon = self.find_variable('Longitude')[:]
275 232:46c8d10dffd7 Maarten
            else:
276 257:75d3b5e9046c Maarten
                # QA4ECV
277 244:d38fcd8ea74d Maarten
                lat = self.find_variable('latitude')[0, ...]
278
                lon = self.find_variable('longitude')[0, ...]
279 368:bded6da28f8c Maarten
            if len(lat) == 0 or len(lon) != len(lat):
280
                return {'latitude': np.asarray([], dtype=np.float32),
281
                        'longitude': np.asarray([], dtype=np.float32)}
282 244:d38fcd8ea74d Maarten
            lat_outline = []
283
            lon_outline = []
284
            for i in range(0,lon.shape[0],10):
285
                lat_outline.append(lat[i, 0])
286
                lon_outline.append(lon[i, 0])
287
            for i in range(0,lon.shape[1],10):
288
                lat_outline.append(lat[lat.shape[0]-1, i])
289
                lon_outline.append(lon[lon.shape[0]-1, i])
290
            for i in range(lon.shape[0]-1, 0, -10):
291
                lat_outline.append(lat[i, lat.shape[1]-1])
292
                lon_outline.append(lon[i, lon.shape[1]-1])
293
            for i in range(lon.shape[1]-1, 0, -10):
294
                lat_outline.append(lat[0, i])
295
                lon_outline.append(lon[0, i])
296 295:7c32c0a48a80 Maarten
            self.close()
297 326:90635f5b6aac Maarten
            return {'latitude': np.asarray(lat_outline, dtype=np.float32),
298 244:d38fcd8ea74d Maarten
                    'longitude': np.asarray(lon_outline, dtype=np.float32)}
299 326:90635f5b6aac Maarten
300 335:d887855826de Maarten
    ## Inspect the input pointer to find which irradiance file was used to produce the data.
301
    #
302
    #  Search the elements in the `gmd:lineage` metadata to find the irradiance input.
303
    #  @return matching attribute(s) as a string, or None if the correct attribute could not be found.
304
    #
305
    #  This method requires that the `gmd:description` contains the string 'irradiance',
306
    #  and that the `gmi:processedLevel` attribute is equal to 'L1B'.
307
    #
308
    # @note UPAS does not set this attribute (or any of the input pointer really) at this moment.
309 74:b66004a0f8d8 Maarten
    def irradiance(self):
310 232:46c8d10dffd7 Maarten
        try:
311
            grp_list = '/METADATA/ISO_METADATA/gmd:dataQualityInfo/gmd:lineage/gmd:processStep'
312
            grp = self.ref[grp_list]
313 326:90635f5b6aac Maarten
314 232:46c8d10dffd7 Maarten
            irrad = []
315
            for src in [g for g in grp.keys() if g.startswith('gmd:source#') and g.__class__.__name__ != "Dataset"]:
316
                src_grp = grp[src]
317 187:49b68341d863 maarten
                try:
318 232:46c8d10dffd7 Maarten
                    description = src_grp.attrs['gmd:description'].decode("utf-8")
319 187:49b68341d863 maarten
                except IOError:
320
                    continue
321 326:90635f5b6aac Maarten
322 232:46c8d10dffd7 Maarten
                try:
323
                    plevel = src_grp['gmi:processedLevel'].attrs['gmd:code'].decode("utf-8")
324
                except IOError:
325
                    continue
326 326:90635f5b6aac Maarten
327 232:46c8d10dffd7 Maarten
                if ('irradiance' in description and plevel == 'L1B'):
328
                    try:
329
                        irrad.append(os.path.basename(src_grp['gmd:sourceCitation/gmd:alternateTitle#1'].attrs['gmx:FileName'].decode("utf-8")))
330
                    except IOError:
331
                        continue
332 295:7c32c0a48a80 Maarten
            self.close()
333 232:46c8d10dffd7 Maarten
            return ", ".join(irrad) if len(irrad) > 0 else None
334
        except (AttributeError, TypeError, KeyError):
335
            # perhaps an OMI file
336
            if self._omi_core_metadata is not None:
337
                irrad = [v for v in self._omi_core_metadata['INPUTPOINTER'] if 'IRR' in v]
338 295:7c32c0a48a80 Maarten
                self.close()
339 232:46c8d10dffd7 Maarten
                return ", ".join(irrad) if len(irrad) > 0 else None
340
            else:
341 257:75d3b5e9046c Maarten
                try:
342
                    # QA4ECV
343
                    irrad = self.ref.attrs['irradiance_file'].decode('ASCII')
344 295:7c32c0a48a80 Maarten
                    self.close()
345 257:75d3b5e9046c Maarten
                    return irrad
346
                except (AttributeError, TypeError, KeyError):
347 295:7c32c0a48a80 Maarten
                    self.close()
348 257:75d3b5e9046c Maarten
                    return None
349 326:90635f5b6aac Maarten
350 335:d887855826de Maarten
    ## Extract the full input pointer.
351
    #
352
    #  @return A dictionary with all file names referred to in the input pointer.
353
    #  The file names are stored in lists, as there may be more files per input type.
354
    #  The keys of the dictionary are the product keys, extracted from the file name.
355
    #
356
    #  @note Uses `os.path.basename()` to cut off any directory paths (there sould not be any).
357
    #  @note Expects the correct file names, i.e. conforming to the file name convention.
358
    #  NISE is treated separately, and accepts either the original NSIDC file names or
359
    # names conforming to the S5P file name convention.
360 129:fe0bb2ff7372 Maarten
    def input_pointer(self):
361 232:46c8d10dffd7 Maarten
        try:
362
            grp_list = '/METADATA/ISO_METADATA/gmd:dataQualityInfo/gmd:lineage/gmd:processStep'
363
            grp = self.ref[grp_list]
364 326:90635f5b6aac Maarten
365 232:46c8d10dffd7 Maarten
            input_pointer = {}
366
            for src in [g for g in grp.keys() if g.startswith('gmd:source#') and g.__class__.__name__ != "Dataset"]:
367
                src_grp = grp[src]
368
                lst = []
369
                for src2 in [g for g in src_grp['gmd:sourceCitation'].keys() if g.startswith('gmd:alternateTitle#')]:
370
                    try:
371
                        lst.append(os.path.basename(src_grp['gmd:sourceCitation/' + src2].attrs['gmx:FileName'].decode("utf-8")))
372
                    except (AttributeError, TypeError, KeyError, IOError):
373
                        self.logger.warning("Filename not set in metadata for {0}".format(src))
374
                        continue
375
                if len(lst) == 0:
376 187:49b68341d863 maarten
                    continue
377 335:d887855826de Maarten
                if 'SSMIS' in lst[-1]:
378
                    key = "AUX_NISE__"
379
                else:
380
                    key = lst[-1][9:19]
381 232:46c8d10dffd7 Maarten
                input_pointer[key] = lst
382
        except (AttributeError, TypeError, KeyError):
383
            # perhaps an OMI file
384
            if self._omi_core_metadata is not None:
385
                input_pointer = {"{0}".format(i): v for i,v in enumerate(self._omi_core_metadata['INPUTPOINTER'])}
386
            else:
387
                input_pointer = {}
388 295:7c32c0a48a80 Maarten
        self.close()
389 129:fe0bb2ff7372 Maarten
        return input_pointer
390 536:72569842013a Maarten
391 476:9988c9258094 Maarten
    ## return the length of the scanline dimension
392
    @property
393
    def scanline(self):
394
        if self._scanline is None:
395
            try:
396 481:372ebc072230 Maarten
                self._scanline = len(self.find_variable('scanline'))
397 476:9988c9258094 Maarten
            except KeyError:
398
                self._scanline = -1
399
        return self._scanline
400 536:72569842013a Maarten
401 335:d887855826de Maarten
    ## Return the time for the whole file
402
    #
403
    #  @param which The which argument (string) specifies the specific time to represent the file:
404
    #   * **ref** Use the `reference_time` (UTC midnight before the start of the granule). This is the default.
405
    #   * **start** Use the `time_coverage_start` time (the time of the begin of the first measurement in the granule).
406
    #   * **end** or **stop** Use the `time_coverage_end` time (the time of the end of the last measurement in the granule).
407
    #   * Use the middle between start and end in all other cases.
408
    # @return A datetime.datetime object.
409 5:3bfdb29d0ed0 maarten
    def time(self, which='ref'):
410
        if which == 'ref':
411
            return self.reference_time
412
        elif which == 'start':
413
            return self.validity_start
414
        elif which in ('stop', 'end'):
415
            return self.validity_end
416
        else:
417
            return self.validity_mid
418 326:90635f5b6aac Maarten
419 335:d887855826de Maarten
    ## Check if the granule has a time that falls between start and stop.
420
    #
421
    #  @param start datetime.datetime object for the begin of the reference period.
422
    #  @param stop  datetime.datetime object for the end of the reference period.
423
    #  @param which reference time to use (see pycama.File.File.time()).
424
    #  @return A boolean.
425 5:3bfdb29d0ed0 maarten
    def check_time_range(self, start, stop, which='ref'):
426 8:f4067d056bb0 Maarten
        """
427
        Return True if the file (at the specific reference time) falls between start and stop.
428 326:90635f5b6aac Maarten

429 8:f4067d056bb0 Maarten
        start and stop are both datetime objects, which as in self.time()
430
        """
431 720:e2a5f252ad73 Maarten
        try:
432
            t = self.time(which)
433
        except CAMAException:
434
            self.logger.warning("File '{0}' does not contain a time, removing from analysis.".format(self.name))
435
            return False
436
437 94:8b9435871886 Maarten
        if start is None and stop is None:
438 143:07d7fb245706 Maarten
            r = True
439
            if not self.been_opened:
440
                self.logger.debug("Use file, no time limits were given")
441 94:8b9435871886 Maarten
        elif start is None and stop is not None:
442
            r = t < stop
443 841:49ca3a4a5dbe maarten
            if not self.been_opened and r:
444
                self.logger.debug("Use file, '%s' is before '%s'", t, stop)
445
            elif not self.time_warning_given and not r:
446
                self.logger.info("Skip file, '%s' is after '%s'", t, stop)
447 94:8b9435871886 Maarten
        elif start is not None and stop is None:
448
            r = start <= t
449 841:49ca3a4a5dbe maarten
            if not self.been_opened and r:
450
                self.logger.debug("Use file, '%s' is after '%s'", t, start)
451
            elif not self.time_warning_given and not r:
452
                self.logger.info("Skip file, '%s' is before '%s'", t, start)
453 5:3bfdb29d0ed0 maarten
        else:
454 94:8b9435871886 Maarten
            r = start <= t < stop
455 841:49ca3a4a5dbe maarten
            if not self.been_opened and r:
456
                self.logger.debug("Use file, '%s' is between '%s' and '%s'", t, start, stop)
457
            elif not self.time_warning_given and not r:
458
                self.logger.info("Skip file, '%s' is not between '%s' and '%s'", t, start, stop)
459
        if not r:
460
            self.time_warning_given = True
461
462 5:3bfdb29d0ed0 maarten
        return r
463 326:90635f5b6aac Maarten
464 335:d887855826de Maarten
    ## Is this an actual file?
465
    #
466
    #  Does the self.path instance variable resolve to an actual file?
467
    #
468
    #  This is a boolean property.
469 5:3bfdb29d0ed0 maarten
    @property
470
    def isfile(self):
471 740:9809f94372a2 Maarten
        return os.path.isfile(self.path) and self.size > 0
472 326:90635f5b6aac Maarten
473 335:d887855826de Maarten
    ## The orbit number for the granule
474
    #
475
    #  This is a int property.
476 5:3bfdb29d0ed0 maarten
    @property
477
    def orbit(self):
478
        if self._orbit is None:
479
            try:
480 600:109d30735633 Maarten
                self._orbit = int(self.ref.attrs['orbit'])
481 232:46c8d10dffd7 Maarten
            except (AttributeError, TypeError, KeyError):
482
                # perhaps this is an OMI file.
483
                if self._omi_core_metadata is not None:
484
                    self._orbit = self._omi_core_metadata['orbit']
485
                else:
486
                    raise CAMAException("Attribute 'orbit' not found.")
487 295:7c32c0a48a80 Maarten
            self.close()
488 5:3bfdb29d0ed0 maarten
        return self._orbit
489 326:90635f5b6aac Maarten
490 335:d887855826de Maarten
    ## Reference time of file (UTC midnight before start of orbit).
491
    #
492
    # This is a datetime.datetime property.
493 5:3bfdb29d0ed0 maarten
    @property
494
    def reference_time(self):
495
        if self._reference_time is None:
496
            try:
497 187:49b68341d863 maarten
                ref_time = int(self.ref.attrs["time_reference_seconds_since_1970"])
498
                self._reference_time = datetime.datetime.utcfromtimestamp(ref_time)
499 326:90635f5b6aac Maarten
            except OSError:
500
                self.close()
501
                with netCDF4.Dataset(self.path, 'r') as ref:
502
                    ref_time = int(ref.time_reference_seconds_since_1970)
503
                    self._reference_time = datetime.datetime.utcfromtimestamp(ref_time)
504 232:46c8d10dffd7 Maarten
            except (AttributeError, TypeError, KeyError):
505
                try:
506
                    ref_time_str = self.ref.attrs["time_reference"].decode("utf-8")
507
                    if ref_time_str[19] == '.':
508
                        if ref_time_str.ensdwith("Z"):
509
                            fmt = "%Y%m%dT%H%M%S.%fZ"
510
                        else:
511
                            fmt = "%Y%m%dT%H%M%S.%f"
512 267:66d125675305 Maarten
                    elif '-' in ref_time_str:
513
                        if ref_time_str.endswith("Z"):
514
                            fmt = "%Y-%m-%dT%H:%M:%SZ"
515
                        else:
516
                            fmt = "%Y-%m-%dT%H:%M:%S"
517 187:49b68341d863 maarten
                    else:
518 232:46c8d10dffd7 Maarten
                        if ref_time_str.endswith("Z"):
519
                            fmt = "%Y%m%dT%H%M%SZ"
520
                        else:
521
                            fmt = "%Y%m%dT%H%M%S"
522
                    self._reference_time = datetime.datetime.strptime(ref_time_str, fmt)
523
                except (AttributeError, TypeError, KeyError):
524 267:66d125675305 Maarten
                    try:
525
                        self.logger.info("First fallback failed for reference_time, attempting 'time' variable")
526
                        t = self.find_variable('time')
527
                        t0 = datetime.datetime.strptime("2010-01-01 00:00:00", "%Y-%m-%d %H:%M:%S")
528
                        self._reference_time = t0 + datetime.timedelta(seconds=int(t[0]))
529
                    except (AttributeError, TypeError, KeyError):
530
                        # perhaps this is an OMI file.
531
                        if self._omi_core_metadata is not None:
532
                            self.logger.info("Second fallback failed for reference_time, attempting OMI metadata")
533
                            self._reference_time = self._omi_core_metadata['reference_time']
534
                        else:
535
                            raise CAMAException("Attribute 'time_reference' not found.")
536 295:7c32c0a48a80 Maarten
            self.close()
537 5:3bfdb29d0ed0 maarten
        return self._reference_time
538 326:90635f5b6aac Maarten
539 335:d887855826de Maarten
    ## The time of the first observation in the file
540
    #
541
    #  This is a datetime.datetime property.
542 5:3bfdb29d0ed0 maarten
    @property
543
    def validity_start(self):
544
        if self._validity_start is None:
545
            try:
546 187:49b68341d863 maarten
                val_start_str = self.ref.attrs['time_coverage_start'].decode("utf-8")
547 536:72569842013a Maarten
                self._validity_start = datetime.datetime.strptime(val_start_str[0:19], '%Y-%m-%dT%H:%M:%S')
548 326:90635f5b6aac Maarten
            except OSError:
549
                self.close()
550
                with netCDF4.Dataset(self.path, 'r') as ref:
551
                    val_start_str = ref.time_coverage_start
552 536:72569842013a Maarten
                    self._validity_start = datetime.datetime.strptime(val_start_str[0:19], '%Y-%m-%dT%H:%M:%S')
553 232:46c8d10dffd7 Maarten
            except (AttributeError, TypeError, KeyError):
554
                # perhaps this is an OMI file.
555
                if self._omi_core_metadata is not None:
556
                    self._validity_start = self._omi_core_metadata['validity_start']
557
                else:
558 267:66d125675305 Maarten
                    try:
559
                        self.logger.info("First fallback failed for time_coverage_start, attempting 'delta_time' variable")
560
                        dt = self.find_variable('delta_time')
561
                        if dt is None:
562
                            raise CAMAException("Variable 'delta_time' not found.")
563
                        dt_start = dt[0, 0]
564
                        seconds=int(dt_start//1000)
565
                        microseconds=int((dt_start%1000)*1000)
566
                        delta_t = datetime.timedelta(seconds=seconds, microseconds=microseconds)
567 326:90635f5b6aac Maarten
                        self._validity_start = self.reference_time + delta_t
568 267:66d125675305 Maarten
                        dt_end = dt[0, -1]
569
                        seconds=int(dt_end//1000)
570
                        microseconds=int((dt_end%1000)*1000)
571
                        delta_t = datetime.timedelta(seconds=seconds, microseconds=microseconds)
572
                        self._validity_end = self.reference_time + delta_t
573
                    except (AttributeError, TypeError, KeyError):
574
                        raise CAMAException("Attribute 'time_coverage_start' not found.")
575 295:7c32c0a48a80 Maarten
            self.close()
576 5:3bfdb29d0ed0 maarten
        return self._validity_start
577 326:90635f5b6aac Maarten
578 335:d887855826de Maarten
    ## The time of the end of the last observation in the file
579
    #
580
    #  This is a datetime.datetime property.
581 5:3bfdb29d0ed0 maarten
    @property
582
    def validity_end(self):
583
        if self._validity_end is None:
584
            try:
585 187:49b68341d863 maarten
                val_end_str = self.ref.attrs['time_coverage_end'].decode("utf-8")
586 536:72569842013a Maarten
                self._validity_end = datetime.datetime.strptime(val_end_str[0:19], '%Y-%m-%dT%H:%M:%S')
587 326:90635f5b6aac Maarten
            except OSError:
588
                self.close()
589
                with netCDF4.Dataset(self.path, 'r') as ref:
590
                    val_end_str = ref.time_coverage_end
591 536:72569842013a Maarten
                    self._validity_end = datetime.datetime.strptime(val_end_str[0:19], '%Y-%m-%dT%H:%M:%S')
592 232:46c8d10dffd7 Maarten
            except (AttributeError, TypeError, KeyError):
593
                # perhaps this is an OMI file.
594
                if self._omi_core_metadata is not None:
595
                    self._validity_end = self._omi_core_metadata['validity_end']
596
                else:
597 267:66d125675305 Maarten
                    try:
598
                        self.logger.info("First fallback failed for time_coverage_start, attempting 'delta_time' variable")
599
                        dt = self.find_variable('delta_time')
600
                        if dt is None:
601
                            raise CAMAException("Variable 'delta_time' not found.")
602
                        dt_start = dt[0, 0]
603
                        seconds=int(dt_start//1000)
604
                        microseconds=int((dt_start%1000)*1000)
605
                        delta_t = datetime.timedelta(seconds=seconds, microseconds=microseconds)
606 326:90635f5b6aac Maarten
                        self._validity_start = self.reference_time + delta_t
607 267:66d125675305 Maarten
                        dt_end = dt[0, -1]
608
                        seconds=int(dt_end//1000)
609
                        microseconds=int((dt_end%1000)*1000)
610
                        delta_t = datetime.timedelta(seconds=seconds, microseconds=microseconds)
611
                        self._validity_end = self.reference_time + delta_t
612
                    except (AttributeError, TypeError, KeyError):
613
                        raise CAMAException("Attribute 'time_coverage_start' not found.")
614 295:7c32c0a48a80 Maarten
            self.close()
615 5:3bfdb29d0ed0 maarten
        return self._validity_end
616 326:90635f5b6aac Maarten
617 740:9809f94372a2 Maarten
    ## Number of succesfully processed pixels
618
    #
619
    # This is an float property
620
    @property
621
    def fraction_of_successful_pixels(self):
622
        """
623
        Return number of succesfully processed pixels
624
        """
625
        if self._f_fraction_succesful is None:
626
            try:
627
                n = self.ref['/METADATA/QA_STATISTICS'].attrs['number_of_successfully_processed_pixels']
628
                total = self.ref['/METADATA/QA_STATISTICS'].attrs['number_of_groundpixels']
629
                self._f_fraction_succesful = int(n)/int(total)
630
            except (AttributeError, TypeError, KeyError):
631 764:b7076d841fad Maarten
                try:
632
                    # NP-BDx
633
                    if 'number_of_S5P_groundpixels_with_VCM' in self.ref['/METADATA/QA_STATISTICS'].attrs:
634
                        n = self.ref['/METADATA/QA_STATISTICS'].attrs['number_of_S5P_groundpixels_with_VCM']
635
                    elif 'number_of_S5P_groundpixels_with_ECM' in self.ref['/METADATA/QA_STATISTICS'].attrs:
636
                        n = self.ref['/METADATA/QA_STATISTICS'].attrs['number_of_S5P_groundpixels_with_ECM']
637
                    else:
638
                        n = 1
639
640
                    if 'number_of_S5P_groundpixels' in self.ref['/METADATA/QA_STATISTICS'].attrs:
641
                        total = self.ref['/METADATA/QA_STATISTICS'].attrs['number_of_S5P_groundpixels']
642
                    else:
643
                        total = 1
644
645
                    self._f_fraction_succesful = int(n)/int(total)
646
                except (AttributeError, TypeError, KeyError):
647
                    if self._omi_core_metadata is not None:
648
                        self._f_fraction_succesful = 1.0 - (self._omi_core_metadata["QAPERCENTMISSINGDATA"]/100)
649
                    else:
650
                        raise CAMAException("Succesfull pixel count not found.")
651 740:9809f94372a2 Maarten
            except ZeroDivisionError:
652
                self._f_fraction_succesful = 0.0
653
        return self._f_fraction_succesful
654
655 335:d887855826de Maarten
    ## The product identifier, the product short name.
656
    #
657
    # This is a string property.
658 5:3bfdb29d0ed0 maarten
    @property
659
    def product(self):
660 8:f4067d056bb0 Maarten
        """
661
        Return product identifier.
662
        """
663 5:3bfdb29d0ed0 maarten
        if self._product is None:
664
            try:
665 143:07d7fb245706 Maarten
                p = self.ref['/METADATA/GRANULE_DESCRIPTION'].attrs['ProductShortName']
666
                self._product = p.decode("utf-8").replace("L2__", "")
667 232:46c8d10dffd7 Maarten
            except (AttributeError, TypeError, KeyError):
668
                if self._omi_core_metadata is not None:
669
                    self._product = self._omi_core_metadata['SHORTNAME']
670
                else:
671 257:75d3b5e9046c Maarten
                    raise CAMAException("Product short name not found.")
672 295:7c32c0a48a80 Maarten
            self.close()
673 5:3bfdb29d0ed0 maarten
        return self._product
674 326:90635f5b6aac Maarten
675 335:d887855826de Maarten
    ## Extract the CoreMetadata from an OMI file and parse the contents.
676
    #
677
    #  The CoreMetadata string variables in OMI files have a somewhat arcane syntax,
678
    #  but some of the metadata we use here require that we extract some of what is in there.
679
    #
680
    #  This is not a generic parser for CoreMetadata objects in HDF-EOS files.
681
    #
682
    #  This method sets the `_omi_core_metadata` instance variable, which is used in other methods.
683 232:46c8d10dffd7 Maarten
    def parse_coreMetadataOMI(self):
684
        core_meta_lines = []
685
        core_meta = {}
686
        index = 0
687
        while True:
688
            try:
689
                v = self.ref['/HDFEOS INFORMATION/CoreMetadata.{0}'.format(index)]
690
                index += 1
691
            except (AttributeError, TypeError, KeyError):
692
                break
693
            core_meta_lines.extend(str(v.value, encoding='ascii').split('\n'))
694 295:7c32c0a48a80 Maarten
        self.close()
695 326:90635f5b6aac Maarten
696 232:46c8d10dffd7 Maarten
        if index == 0:
697
            self._omi_core_metadata = None
698
            raise RuntimeError("Not an OMI file")
699 326:90635f5b6aac Maarten
700 232:46c8d10dffd7 Maarten
        pattern = re.compile(r""" *(GROUP|END_GROUP|OBJECT|END_OBJECT|VALUE) *= (.+)""")
701 326:90635f5b6aac Maarten
702 232:46c8d10dffd7 Maarten
        multi_line = False
703
        for line in core_meta_lines:
704
            match = pattern.search(line)
705
            if match:
706
                key = match.group(1)
707
                value = match.group(2)
708
                if key == "OBJECT":
709
                    current_object = value
710
                    multi_line = (current_object in ("INPUTPOINTER",))
711
                elif key == "VALUE":
712
                    core_meta[current_object] = value.replace('"', '')
713
            elif multi_line and current_object in core_meta:
714
                core_meta[current_object] = core_meta[current_object] + line.replace('"', '').strip()
715 326:90635f5b6aac Maarten
716 232:46c8d10dffd7 Maarten
        if "INPUTPOINTER" in core_meta:
717
            core_meta["INPUTPOINTER"] = core_meta["INPUTPOINTER"].replace('(', '').replace(')', '').split(', ')
718
        if "ORBITNUMBER" in core_meta:
719
            core_meta["ORBITNUMBER"] = int(core_meta["ORBITNUMBER"])
720
            core_meta['orbit'] = core_meta["ORBITNUMBER"]
721
        if "RANGEBEGINNINGDATE" in core_meta and "RANGEBEGINNINGTIME" in core_meta:
722
            core_meta["RANGEBEGIN"] = datetime.datetime.strptime(core_meta["RANGEBEGINNINGDATE"] + "T" + core_meta["RANGEBEGINNINGTIME"], "%Y-%m-%dT%H:%M:%S.%f")
723
            core_meta["validity_start"] = core_meta["RANGEBEGIN"]
724
        if "RANGEENDINGDATE" in core_meta and "RANGEENDINGTIME" in core_meta:
725
            core_meta["RANGEEND"] = datetime.datetime.strptime(core_meta["RANGEENDINGDATE"] + "T" + core_meta["RANGEENDINGTIME"], "%Y-%m-%dT%H:%M:%S.%f")
726
            core_meta["validity_end"] = core_meta["RANGEEND"]
727
        if "EQUATORCROSSINGDATE" in core_meta and "EQUATORCROSSINGTIME" in core_meta:
728
            core_meta["EQUATORCROSSING"] = datetime.datetime.strptime(core_meta["EQUATORCROSSINGDATE"] + "T" + core_meta["EQUATORCROSSINGTIME"], "%Y-%m-%dT%H:%M:%S.%f")
729
            core_meta["reference_time"] = datetime.datetime.strptime(core_meta["EQUATORCROSSINGDATE"], "%Y-%m-%d")
730
        if "EQUATORCROSSINGLONGITUDE" in core_meta:
731
            core_meta["EQUATORCROSSINGLONGITUDE"] = float(core_meta["EQUATORCROSSINGLONGITUDE"])
732
        if "PRODUCTIONDATETIME" in core_meta:
733
            core_meta["PRODUCTIONDATETIME"] = datetime.datetime.strptime(core_meta["PRODUCTIONDATETIME"], "%Y-%m-%dT%H:%M:%S.000Z")
734 740:9809f94372a2 Maarten
        if "QAPERCENTMISSINGDATA" in core_meta:
735
            core_meta["QAPERCENTMISSINGDATA"] = float(core_meta["QAPERCENTMISSINGDATA"])/100.0
736 232:46c8d10dffd7 Maarten
        self._omi_core_metadata = core_meta
737 326:90635f5b6aac Maarten
738 335:d887855826de Maarten
    ## Return the band id of the L2 product (geolocation grid handling).
739
    #
740
    # This is an int property.
741 5:3bfdb29d0ed0 maarten
    @property
742
    def band(self):
743
        if self._band is None:
744
            try:
745 143:07d7fb245706 Maarten
                self._band = self.ref.attrs['geolocation_grid_from_band']
746 232:46c8d10dffd7 Maarten
            except (AttributeError, TypeError, KeyError):
747
                if self._omi_core_metadata is not None:
748
                    sensor_name = self._omi_core_metadata['ASSOCIATEDSENSORSHORTNAME']
749
                    self._band = 2 if sensor_name == "CCD Visible" else 1
750
                else:
751 257:75d3b5e9046c Maarten
                    self.logger.warning("Attribute 'geolocation_grid_from_band' not found.")
752 232:46c8d10dffd7 Maarten
                    self._band = -1
753 295:7c32c0a48a80 Maarten
            self.close()
754 5:3bfdb29d0ed0 maarten
        return self._band
755 326:90635f5b6aac Maarten
756 335:d887855826de Maarten
    ## Bounding box from metadata.
757
    #
758
    #  Returns tuple: (lon_min, lon_max, lat_min, lat_max)
759
    #
760
    #  Reads from data when metadata is unavailable.
761 5:3bfdb29d0ed0 maarten
    @property
762
    def bbox(self):
763
        if self._bbox is None:
764
            try:
765 326:90635f5b6aac Maarten
                self._bbox = (self.ref.attrs['geospatial_lon_min'],
766
                              self.ref.attrs['geospatial_lon_max'],
767
                              self.ref.attrs['geospatial_lat_min'],
768 143:07d7fb245706 Maarten
                              self.ref.attrs['geospatial_lat_max'])
769 335:d887855826de Maarten
                if (self._bbox[0] == self._bbox[1] and
770
                    self._bbox[0] == self._bbox[2] and
771
                    self._bbox[0] == self._bbox[3] and
772
                    self._bbox[0] == 0.0):
773
                    raise ValueError("Empty bounding box")
774
            except (AttributeError, TypeError, KeyError, ValueError):
775 232:46c8d10dffd7 Maarten
                if self._omi_core_metadata is not None:
776
                    swath = list(self.ref['/HDFEOS/SWATHS/'].keys())[0]
777
                    lat = self.ref['/HDFEOS/SWATHS/{0}/Geolocation Fields/Latitude'.format(swath)][:]
778
                    lon = self.ref['/HDFEOS/SWATHS/{0}/Geolocation Fields/Longitude'.format(swath)][:]
779
                    self._bbox = (np.min(lon), np.max(lon), np.min(lat), np.max(lat))
780
                else:
781 335:d887855826de Maarten
                    lat = self.find_variable('latitude')[:]
782
                    lon = self.find_variable('longitude')[:]
783
                    self._bbox = (np.min(lon), np.max(lon), np.min(lat), np.max(lat))
784 295:7c32c0a48a80 Maarten
            self.close()
785 5:3bfdb29d0ed0 maarten
        return self._bbox
786 326:90635f5b6aac Maarten
787 335:d887855826de Maarten
    ## Tracking ID
788
    #
789
    #  Produce a fixed and unique ID for the input granule.
790
    #  Read from file is possible, create "something" if not.
791 5:3bfdb29d0ed0 maarten
    @property
792
    def ID(self):
793
        if self._id is None:
794
            try:
795 600:109d30735633 Maarten
                if self.ref.attrs['tracking_id'] != b' ':
796
                    self._id = self.ref.attrs['tracking_id']
797
                else:
798
                    self._id = self.ref.attrs['id']
799 241:9af9e96c7677 Maarten
            except (AttributeError, TypeError, KeyError, OSError):
800 5:3bfdb29d0ed0 maarten
                try:
801 232:46c8d10dffd7 Maarten
                    self._id = self.ref.attrs['id']
802 241:9af9e96c7677 Maarten
                except (AttributeError, TypeError, KeyError, OSError):
803 232:46c8d10dffd7 Maarten
                    if self._omi_core_metadata is not None:
804
                        self._id = self._omi_core_metadata["LOCALGRANULEID"]
805
                    else:
806 267:66d125675305 Maarten
                        try:
807 600:109d30735633 Maarten
                            self._id = bytes(str(uuid.uuid4()), encoding='ASCII')
808 267:66d125675305 Maarten
                        except (AttributeError, TypeError, KeyError, OSError):
809
                            raise CAMAException("Tracking ID not found.")
810 295:7c32c0a48a80 Maarten
            self.close()
811 5:3bfdb29d0ed0 maarten
        return self._id
812 326:90635f5b6aac Maarten
813 335:d887855826de Maarten
    ## The middle of the validity period.
814
    #
815
    #  This is a datetime.datetime property.
816 5:3bfdb29d0ed0 maarten
    @property
817
    def validity_mid(self):
818
        return (self.validity_start + (self.validity_end - self.validity_start)//2)