Project

General

Profile

Statistics
| Branch: | Tag: | Revision:

pycama / src / pycama / File.py @ 841:49ca3a4a5dbe

History | View | Annotate | Download (37.1 KB)

1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3

    
4
# Copyright 2016-2017 Maarten Sneep, KNMI
5
#
6
# Redistribution and use in source and binary forms, with or without
7
# modification, are permitted provided that the following conditions are met:
8
#
9
# 1. Redistributions of source code must retain the above copyright notice,
10
#    this list of conditions and the following disclaimer.
11
#
12
# 2. Redistributions in binary form must reproduce the above copyright notice,
13
#    this list of conditions and the following disclaimer in the documentation
14
#    and/or other materials provided with the distribution.
15
#
16
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
20
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26

    
27
## \file File.py
28
#  The file handling class.
29
#  Represents a single file, but tracks which files have been opened before to avoid duplication.
30
# @author Maarten Sneep
31

    
32
import sys
33
import os
34
import re
35
import logging
36
import datetime
37
import hashlib
38
import uuid
39
import warnings
40
warnings.filterwarnings("ignore", category=FutureWarning)
41

    
42
import h5py
43
import netCDF4
44
import numpy as np
45

    
46
from .utilities import CAMAException
47

    
48
## Create a callable object to find variables within a HDF5 file
49
#
50
#  This is a companion class for the call to the visititems() method
51
#  in h5py groups objects. This method is used to select the desired Dataset
52
#  in the file.
53
class NameMatcher(object):
54
    ## The constructor
55
    #
56
    # @param name The name of the element to search for
57
    # @param cls The class of the element to search for. Default is 'Dataset', i.e. a netCDF4 variable.
58
    def __init__(self, name, cls="Dataset"):
59
        self.name = name
60
        self.cls = cls
61
        # self.logger = logging.getLogger('PyCAMA')
62

    
63
    ## Split of the name of the element from the full path.
64
    # @param name The full name including the path to the element.
65
    # @return the last element in the path (i.e. the name of the element).
66
    def basename(self, name):
67
        return name.split('/')[-1]
68

    
69
    ## The callable interface
70
    #
71
    # The entry point when iterating over all elements in the file.
72
    # @param name The full name of the element, including the full hdf-5 path.
73
    #        The `pycama.File.NameMatcher.basename()` routine is used to obtain the name of the element itself.
74
    # @param obj The object to compare. The name of the class of this object is compared to the cls instance variable.
75
    # @return obj if the element shall be used, None otherwise.
76
    def __call__(self, name, obj):
77
        varname = self.basename(name)
78
        if varname == self.name and obj.__class__.__name__ == self.cls:
79
            # self.logger.debug("Testing for '{0}', received '{1}' (match)".format(self.name, varname))
80
            return obj
81
        else:
82
            # self.logger.debug("Testing for '{0}', received '{1}' (no match)".format(self.name, varname))
83
            return None
84

    
85
## A file wrapper.
86
#
87
# Maintains (and closes!) h5py file references, and returns basic metadata (time, orbit, ...)
88
#
89
# The class object maintains a list of files that have been opened already, to avoid duplicate file references.
90
class File(object):
91
    ## Class variable to keep track of the files that have been opened.
92
    reported_files = {}
93

    
94
    ## The constructor
95
    #
96
    #  @param f Full path of hte file to open.
97
    #  @param mode The mode for the file. Defaults to read-only.
98
    #  @param product The type of the product as requested.
99
    def __init__(self, f, mode='r', product=None):
100
        ## The name of the file
101
        self.name = os.path.basename(f)
102
        ## The full path of the file.
103
        self.path = f
104
        self.reported_files[f] = self
105
        ## The reading mode
106
        self.mode = mode
107
        ## Has this file been opened before? Needed for progress messages.
108
        self.been_opened = False
109
        self.time_warning_given = False
110
        ## The name of the product as requested. Needed for product mapping (offline use).
111
        self.request_product = product
112
        ## the file reference.
113
        self._ref = None
114
        ## the orbit number
115
        self._orbit = None
116
        ## number of scanlines
117
        self._scanline = None
118
        ## The reference time. A datetime.datetime object.
119
        self._reference_time = None
120
        ## The validity start time. A datetime.datetime object.
121
        self._validity_start = None
122
        ## The validity end time. A datetime.datetime object.
123
        self._validity_end = None
124
        ## The product name as read from the input file.
125
        self._product = None
126
        ## The band on which the geolocation is based.
127
        self._band = None
128
        ## The bounding box attributes
129
        self._bbox = None
130
        ## The (unique) file id.
131
        self._id = None
132
        ## Dictionary with fall-back OMI metadata. Used only for OMI files.
133
        self._omi_core_metadata = None
134
        ## Number of succesfull pixels in granule.
135
        self._f_fraction_succesful = None
136
        ## Ensure access to the logger.
137
        self.logger = logging.getLogger('PyCAMA')
138

    
139
        try:
140
            if self.isfile:
141
                self.parse_coreMetadataOMI()
142
        except RuntimeError:
143
            pass
144

    
145
        if self.request_product is None:
146
            self.request_product = self.product
147

    
148
    ## Close an open file reference
149
    #
150
    # Note that this does not invalidate the object, it just closes the file
151
    # reference to avoid exceeding the operating system user limit for the
152
    # number of open files.
153
    def close(self):
154
        """Close an open file"""
155
        if self._ref is not None:
156
            self._ref.close()
157
            self._ref = None
158

    
159
    ## Symmetry method for pycama.File.File.close()
160
    def open(self):
161
        return self.ref
162

    
163
    @property
164
    def size(self):
165
        return os.path.getsize(self.path)
166

    
167
    ## Read-only property referencing a hdf5 file.
168
    #
169
    # Opens the input file after checking it has a non-zero length.
170
    # If this is the first time this file is opened an info-level message is
171
    # written to the log file.
172
    #
173
    # @throws CAMAException in case of an error (zero file size or an `h5py` error.)
174
    @property
175
    def ref(self):
176
        """The file reference"""
177
        if self._ref is None:
178
            if os.path.getsize(self.path) == 0:
179
                raise CAMAException("File '{0}' has length zero".format(self.name))
180
            if not self.been_opened:
181
                self.logger.info("Opening file '%s'", self.name)
182
                self.been_opened = True
183
            try:
184
                self._ref = h5py.File(self.path, self.mode)
185
            except (RuntimeError, OSError) as err:
186
                raise CAMAException("Error on {0}: '{1}'".format(self.path, str(err)))
187
        return self._ref
188

    
189
    ## Find a reference to a variable in a given file (recursively in groups)
190
    #
191
    #  Returns None if the variable isn't found.
192
    # @param varname Name of the variable.
193
    # @param group Group to start searching from. Default is to use
194
    #        <tt>/PRODUCT</tt> for S5P L2 files, <tt>/HDFEOS/SWATHS/<i>swath</i></tt>
195
    #        for OMI files and simply <tt>/</tt> for other files.
196
    # @return Reference (`h5py.Dataset`) to the requested variable or None.
197
    #
198
    # Uses a pycama.File.NameMatcher object with
199
    # [`h5py.Group.visititems()`](http://docs.h5py.org/en/latest/high/group.html?highlight=visititems#Group.visititems).
200
    def find_variable(self, varname, group=None):
201
        if self._ref is None:
202
            self.open()
203

    
204
        if group is None:
205
            try:
206
                group = self.ref['PRODUCT']
207
            except (AttributeError, TypeError, KeyError):
208
                try:
209
                    # perhaps an OMI file?
210
                    swath = list(self.ref['/HDFEOS/SWATHS/'].keys())[0]
211
                    group = self.ref['/HDFEOS/SWATHS/{0}'.format(swath)]
212
                except (AttributeError, TypeError, KeyError):
213
                    group = self.ref['/']
214

    
215
        matcher = NameMatcher(varname, "Dataset")
216
        try:
217
            rval = group.visititems(matcher)
218
        except:
219
            return None
220

    
221
        return rval
222

    
223
    ## Find the names of the dimensions of a variable.
224
    #
225
    #  @param varname Name of the variable.
226
    #  @param group Group to start searching from, same as in pycama.File.File.find_variable() method.
227
    #
228
    #  Extract the names of the dimensions of a variable, either directly using the h5py interface,
229
    #  or falling back to the netCDF4 interface for dimensions that do not have a variable with the
230
    #  same name in the same location (unlinked dimensions).
231
    #
232
    # @note This method may close and reopen the file, invalidating all variable references.
233
    def dimension_names(self, varname, group=None):
234
        var = self.find_variable(varname, group)
235
        unlinked_dims = False
236
        dim_names = []
237
        for i,n in enumerate(var.dims):
238
            dim_name = list(n.keys())[0]
239
            unlinked_dims = unlinked_dims or dim_name.startswith('This is a netCDF dimension but not a netCDF variable.')
240
            dim_names.append(dim_name)
241

    
242
        if unlinked_dims:
243
            self.logger.debug("Unlinked netCDF dimensions in '%s'", varname)
244
            self.logger.debug("Opening '%s' as netCDF4 file", self.path)
245
            variable_location = var.name
246
            self.close()
247
            with netCDF4.Dataset(self.path, 'r') as ncref:
248
                dim_names = list(ncref[variable_location].dimensions)
249
            self.open()
250
        return dim_names
251

    
252
    ## Get the outline of the geolocations.
253
    #
254
    # The outline of all data in the granule. This is copied from the `eop:multiExtentOf`
255
    # metadata (originating from L1B eventually).
256
    # The data is ordered in a counter clockwise direction.
257
    #
258
    # In case this attribute is missing (UPAS, QA4ECV), we use the latitude and
259
    # longitude centers of the pixels to approximate this attribute.
260
    # @return Dictionary with latitude and longitude arrays describing the outline of the granule.
261
    #
262
    def outline(self):
263
        try:
264
            grp_list = '/METADATA/EOP_METADATA/om:featureOfInterest/eop:multiExtentOf/gml:surfaceMembers/gml:exterior'
265
            grp = self.ref[grp_list]
266
            posList = grp.attrs['gml:posList'].decode('ASCII')
267
            p = np.asarray([float(v) for v in posList.split()])
268
            self.close()
269
            return {'latitude': p[0::2], 'longitude': p[1::2]}
270
        except (AttributeError, TypeError, KeyError, IOError, ValueError):
271
            self.logger.warning("Attribute 'gml:posList' not found or empty, using fallback")
272
            if self._omi_core_metadata is not None:
273
                lat = self.find_variable('Latitude')[:]
274
                lon = self.find_variable('Longitude')[:]
275
            else:
276
                # QA4ECV
277
                lat = self.find_variable('latitude')[0, ...]
278
                lon = self.find_variable('longitude')[0, ...]
279
            if len(lat) == 0 or len(lon) != len(lat):
280
                return {'latitude': np.asarray([], dtype=np.float32),
281
                        'longitude': np.asarray([], dtype=np.float32)}
282
            lat_outline = []
283
            lon_outline = []
284
            for i in range(0,lon.shape[0],10):
285
                lat_outline.append(lat[i, 0])
286
                lon_outline.append(lon[i, 0])
287
            for i in range(0,lon.shape[1],10):
288
                lat_outline.append(lat[lat.shape[0]-1, i])
289
                lon_outline.append(lon[lon.shape[0]-1, i])
290
            for i in range(lon.shape[0]-1, 0, -10):
291
                lat_outline.append(lat[i, lat.shape[1]-1])
292
                lon_outline.append(lon[i, lon.shape[1]-1])
293
            for i in range(lon.shape[1]-1, 0, -10):
294
                lat_outline.append(lat[0, i])
295
                lon_outline.append(lon[0, i])
296
            self.close()
297
            return {'latitude': np.asarray(lat_outline, dtype=np.float32),
298
                    'longitude': np.asarray(lon_outline, dtype=np.float32)}
299

    
300
    ## Inspect the input pointer to find which irradiance file was used to produce the data.
301
    #
302
    #  Search the elements in the `gmd:lineage` metadata to find the irradiance input.
303
    #  @return matching attribute(s) as a string, or None if the correct attribute could not be found.
304
    #
305
    #  This method requires that the `gmd:description` contains the string 'irradiance',
306
    #  and that the `gmi:processedLevel` attribute is equal to 'L1B'.
307
    #
308
    # @note UPAS does not set this attribute (or any of the input pointer really) at this moment.
309
    def irradiance(self):
310
        try:
311
            grp_list = '/METADATA/ISO_METADATA/gmd:dataQualityInfo/gmd:lineage/gmd:processStep'
312
            grp = self.ref[grp_list]
313

    
314
            irrad = []
315
            for src in [g for g in grp.keys() if g.startswith('gmd:source#') and g.__class__.__name__ != "Dataset"]:
316
                src_grp = grp[src]
317
                try:
318
                    description = src_grp.attrs['gmd:description'].decode("utf-8")
319
                except IOError:
320
                    continue
321

    
322
                try:
323
                    plevel = src_grp['gmi:processedLevel'].attrs['gmd:code'].decode("utf-8")
324
                except IOError:
325
                    continue
326

    
327
                if ('irradiance' in description and plevel == 'L1B'):
328
                    try:
329
                        irrad.append(os.path.basename(src_grp['gmd:sourceCitation/gmd:alternateTitle#1'].attrs['gmx:FileName'].decode("utf-8")))
330
                    except IOError:
331
                        continue
332
            self.close()
333
            return ", ".join(irrad) if len(irrad) > 0 else None
334
        except (AttributeError, TypeError, KeyError):
335
            # perhaps an OMI file
336
            if self._omi_core_metadata is not None:
337
                irrad = [v for v in self._omi_core_metadata['INPUTPOINTER'] if 'IRR' in v]
338
                self.close()
339
                return ", ".join(irrad) if len(irrad) > 0 else None
340
            else:
341
                try:
342
                    # QA4ECV
343
                    irrad = self.ref.attrs['irradiance_file'].decode('ASCII')
344
                    self.close()
345
                    return irrad
346
                except (AttributeError, TypeError, KeyError):
347
                    self.close()
348
                    return None
349

    
350
    ## Extract the full input pointer.
351
    #
352
    #  @return A dictionary with all file names referred to in the input pointer.
353
    #  The file names are stored in lists, as there may be more files per input type.
354
    #  The keys of the dictionary are the product keys, extracted from the file name.
355
    #
356
    #  @note Uses `os.path.basename()` to cut off any directory paths (there sould not be any).
357
    #  @note Expects the correct file names, i.e. conforming to the file name convention.
358
    #  NISE is treated separately, and accepts either the original NSIDC file names or
359
    # names conforming to the S5P file name convention.
360
    def input_pointer(self):
361
        try:
362
            grp_list = '/METADATA/ISO_METADATA/gmd:dataQualityInfo/gmd:lineage/gmd:processStep'
363
            grp = self.ref[grp_list]
364

    
365
            input_pointer = {}
366
            for src in [g for g in grp.keys() if g.startswith('gmd:source#') and g.__class__.__name__ != "Dataset"]:
367
                src_grp = grp[src]
368
                lst = []
369
                for src2 in [g for g in src_grp['gmd:sourceCitation'].keys() if g.startswith('gmd:alternateTitle#')]:
370
                    try:
371
                        lst.append(os.path.basename(src_grp['gmd:sourceCitation/' + src2].attrs['gmx:FileName'].decode("utf-8")))
372
                    except (AttributeError, TypeError, KeyError, IOError):
373
                        self.logger.warning("Filename not set in metadata for {0}".format(src))
374
                        continue
375
                if len(lst) == 0:
376
                    continue
377
                if 'SSMIS' in lst[-1]:
378
                    key = "AUX_NISE__"
379
                else:
380
                    key = lst[-1][9:19]
381
                input_pointer[key] = lst
382
        except (AttributeError, TypeError, KeyError):
383
            # perhaps an OMI file
384
            if self._omi_core_metadata is not None:
385
                input_pointer = {"{0}".format(i): v for i,v in enumerate(self._omi_core_metadata['INPUTPOINTER'])}
386
            else:
387
                input_pointer = {}
388
        self.close()
389
        return input_pointer
390

    
391
    ## return the length of the scanline dimension
392
    @property
393
    def scanline(self):
394
        if self._scanline is None:
395
            try:
396
                self._scanline = len(self.find_variable('scanline'))
397
            except KeyError:
398
                self._scanline = -1
399
        return self._scanline
400

    
401
    ## Return the time for the whole file
402
    #
403
    #  @param which The which argument (string) specifies the specific time to represent the file:
404
    #   * **ref** Use the `reference_time` (UTC midnight before the start of the granule). This is the default.
405
    #   * **start** Use the `time_coverage_start` time (the time of the begin of the first measurement in the granule).
406
    #   * **end** or **stop** Use the `time_coverage_end` time (the time of the end of the last measurement in the granule).
407
    #   * Use the middle between start and end in all other cases.
408
    # @return A datetime.datetime object.
409
    def time(self, which='ref'):
410
        if which == 'ref':
411
            return self.reference_time
412
        elif which == 'start':
413
            return self.validity_start
414
        elif which in ('stop', 'end'):
415
            return self.validity_end
416
        else:
417
            return self.validity_mid
418

    
419
    ## Check if the granule has a time that falls between start and stop.
420
    #
421
    #  @param start datetime.datetime object for the begin of the reference period.
422
    #  @param stop  datetime.datetime object for the end of the reference period.
423
    #  @param which reference time to use (see pycama.File.File.time()).
424
    #  @return A boolean.
425
    def check_time_range(self, start, stop, which='ref'):
426
        """
427
        Return True if the file (at the specific reference time) falls between start and stop.
428

429
        start and stop are both datetime objects, which as in self.time()
430
        """
431
        try:
432
            t = self.time(which)
433
        except CAMAException:
434
            self.logger.warning("File '{0}' does not contain a time, removing from analysis.".format(self.name))
435
            return False
436

    
437
        if start is None and stop is None:
438
            r = True
439
            if not self.been_opened:
440
                self.logger.debug("Use file, no time limits were given")
441
        elif start is None and stop is not None:
442
            r = t < stop
443
            if not self.been_opened and r:
444
                self.logger.debug("Use file, '%s' is before '%s'", t, stop)
445
            elif not self.time_warning_given and not r:
446
                self.logger.info("Skip file, '%s' is after '%s'", t, stop)
447
        elif start is not None and stop is None:
448
            r = start <= t
449
            if not self.been_opened and r:
450
                self.logger.debug("Use file, '%s' is after '%s'", t, start)
451
            elif not self.time_warning_given and not r:
452
                self.logger.info("Skip file, '%s' is before '%s'", t, start)
453
        else:
454
            r = start <= t < stop
455
            if not self.been_opened and r:
456
                self.logger.debug("Use file, '%s' is between '%s' and '%s'", t, start, stop)
457
            elif not self.time_warning_given and not r:
458
                self.logger.info("Skip file, '%s' is not between '%s' and '%s'", t, start, stop)
459
        if not r:
460
            self.time_warning_given = True
461
            
462
        return r
463

    
464
    ## Is this an actual file?
465
    #
466
    #  Does the self.path instance variable resolve to an actual file?
467
    #
468
    #  This is a boolean property.
469
    @property
470
    def isfile(self):
471
        return os.path.isfile(self.path) and self.size > 0
472

    
473
    ## The orbit number for the granule
474
    #
475
    #  This is a int property.
476
    @property
477
    def orbit(self):
478
        if self._orbit is None:
479
            try:
480
                self._orbit = int(self.ref.attrs['orbit'])
481
            except (AttributeError, TypeError, KeyError):
482
                # perhaps this is an OMI file.
483
                if self._omi_core_metadata is not None:
484
                    self._orbit = self._omi_core_metadata['orbit']
485
                else:
486
                    raise CAMAException("Attribute 'orbit' not found.")
487
            self.close()
488
        return self._orbit
489

    
490
    ## Reference time of file (UTC midnight before start of orbit).
491
    #
492
    # This is a datetime.datetime property.
493
    @property
494
    def reference_time(self):
495
        if self._reference_time is None:
496
            try:
497
                ref_time = int(self.ref.attrs["time_reference_seconds_since_1970"])
498
                self._reference_time = datetime.datetime.utcfromtimestamp(ref_time)
499
            except OSError:
500
                self.close()
501
                with netCDF4.Dataset(self.path, 'r') as ref:
502
                    ref_time = int(ref.time_reference_seconds_since_1970)
503
                    self._reference_time = datetime.datetime.utcfromtimestamp(ref_time)
504
            except (AttributeError, TypeError, KeyError):
505
                try:
506
                    ref_time_str = self.ref.attrs["time_reference"].decode("utf-8")
507
                    if ref_time_str[19] == '.':
508
                        if ref_time_str.ensdwith("Z"):
509
                            fmt = "%Y%m%dT%H%M%S.%fZ"
510
                        else:
511
                            fmt = "%Y%m%dT%H%M%S.%f"
512
                    elif '-' in ref_time_str:
513
                        if ref_time_str.endswith("Z"):
514
                            fmt = "%Y-%m-%dT%H:%M:%SZ"
515
                        else:
516
                            fmt = "%Y-%m-%dT%H:%M:%S"
517
                    else:
518
                        if ref_time_str.endswith("Z"):
519
                            fmt = "%Y%m%dT%H%M%SZ"
520
                        else:
521
                            fmt = "%Y%m%dT%H%M%S"
522
                    self._reference_time = datetime.datetime.strptime(ref_time_str, fmt)
523
                except (AttributeError, TypeError, KeyError):
524
                    try:
525
                        self.logger.info("First fallback failed for reference_time, attempting 'time' variable")
526
                        t = self.find_variable('time')
527
                        t0 = datetime.datetime.strptime("2010-01-01 00:00:00", "%Y-%m-%d %H:%M:%S")
528
                        self._reference_time = t0 + datetime.timedelta(seconds=int(t[0]))
529
                    except (AttributeError, TypeError, KeyError):
530
                        # perhaps this is an OMI file.
531
                        if self._omi_core_metadata is not None:
532
                            self.logger.info("Second fallback failed for reference_time, attempting OMI metadata")
533
                            self._reference_time = self._omi_core_metadata['reference_time']
534
                        else:
535
                            raise CAMAException("Attribute 'time_reference' not found.")
536
            self.close()
537
        return self._reference_time
538

    
539
    ## The time of the first observation in the file
540
    #
541
    #  This is a datetime.datetime property.
542
    @property
543
    def validity_start(self):
544
        if self._validity_start is None:
545
            try:
546
                val_start_str = self.ref.attrs['time_coverage_start'].decode("utf-8")
547
                self._validity_start = datetime.datetime.strptime(val_start_str[0:19], '%Y-%m-%dT%H:%M:%S')
548
            except OSError:
549
                self.close()
550
                with netCDF4.Dataset(self.path, 'r') as ref:
551
                    val_start_str = ref.time_coverage_start
552
                    self._validity_start = datetime.datetime.strptime(val_start_str[0:19], '%Y-%m-%dT%H:%M:%S')
553
            except (AttributeError, TypeError, KeyError):
554
                # perhaps this is an OMI file.
555
                if self._omi_core_metadata is not None:
556
                    self._validity_start = self._omi_core_metadata['validity_start']
557
                else:
558
                    try:
559
                        self.logger.info("First fallback failed for time_coverage_start, attempting 'delta_time' variable")
560
                        dt = self.find_variable('delta_time')
561
                        if dt is None:
562
                            raise CAMAException("Variable 'delta_time' not found.")
563
                        dt_start = dt[0, 0]
564
                        seconds=int(dt_start//1000)
565
                        microseconds=int((dt_start%1000)*1000)
566
                        delta_t = datetime.timedelta(seconds=seconds, microseconds=microseconds)
567
                        self._validity_start = self.reference_time + delta_t
568
                        dt_end = dt[0, -1]
569
                        seconds=int(dt_end//1000)
570
                        microseconds=int((dt_end%1000)*1000)
571
                        delta_t = datetime.timedelta(seconds=seconds, microseconds=microseconds)
572
                        self._validity_end = self.reference_time + delta_t
573
                    except (AttributeError, TypeError, KeyError):
574
                        raise CAMAException("Attribute 'time_coverage_start' not found.")
575
            self.close()
576
        return self._validity_start
577

    
578
    ## The time of the end of the last observation in the file
579
    #
580
    #  This is a datetime.datetime property.
581
    @property
582
    def validity_end(self):
583
        if self._validity_end is None:
584
            try:
585
                val_end_str = self.ref.attrs['time_coverage_end'].decode("utf-8")
586
                self._validity_end = datetime.datetime.strptime(val_end_str[0:19], '%Y-%m-%dT%H:%M:%S')
587
            except OSError:
588
                self.close()
589
                with netCDF4.Dataset(self.path, 'r') as ref:
590
                    val_end_str = ref.time_coverage_end
591
                    self._validity_end = datetime.datetime.strptime(val_end_str[0:19], '%Y-%m-%dT%H:%M:%S')
592
            except (AttributeError, TypeError, KeyError):
593
                # perhaps this is an OMI file.
594
                if self._omi_core_metadata is not None:
595
                    self._validity_end = self._omi_core_metadata['validity_end']
596
                else:
597
                    try:
598
                        self.logger.info("First fallback failed for time_coverage_start, attempting 'delta_time' variable")
599
                        dt = self.find_variable('delta_time')
600
                        if dt is None:
601
                            raise CAMAException("Variable 'delta_time' not found.")
602
                        dt_start = dt[0, 0]
603
                        seconds=int(dt_start//1000)
604
                        microseconds=int((dt_start%1000)*1000)
605
                        delta_t = datetime.timedelta(seconds=seconds, microseconds=microseconds)
606
                        self._validity_start = self.reference_time + delta_t
607
                        dt_end = dt[0, -1]
608
                        seconds=int(dt_end//1000)
609
                        microseconds=int((dt_end%1000)*1000)
610
                        delta_t = datetime.timedelta(seconds=seconds, microseconds=microseconds)
611
                        self._validity_end = self.reference_time + delta_t
612
                    except (AttributeError, TypeError, KeyError):
613
                        raise CAMAException("Attribute 'time_coverage_start' not found.")
614
            self.close()
615
        return self._validity_end
616

    
617
    ## Number of succesfully processed pixels
618
    #
619
    # This is an float property
620
    @property
621
    def fraction_of_successful_pixels(self):
622
        """
623
        Return number of succesfully processed pixels
624
        """
625
        if self._f_fraction_succesful is None:
626
            try:
627
                n = self.ref['/METADATA/QA_STATISTICS'].attrs['number_of_successfully_processed_pixels']
628
                total = self.ref['/METADATA/QA_STATISTICS'].attrs['number_of_groundpixels']
629
                self._f_fraction_succesful = int(n)/int(total)
630
            except (AttributeError, TypeError, KeyError):
631
                try:
632
                    # NP-BDx
633
                    if 'number_of_S5P_groundpixels_with_VCM' in self.ref['/METADATA/QA_STATISTICS'].attrs:
634
                        n = self.ref['/METADATA/QA_STATISTICS'].attrs['number_of_S5P_groundpixels_with_VCM']
635
                    elif 'number_of_S5P_groundpixels_with_ECM' in self.ref['/METADATA/QA_STATISTICS'].attrs:
636
                        n = self.ref['/METADATA/QA_STATISTICS'].attrs['number_of_S5P_groundpixels_with_ECM']
637
                    else:
638
                        n = 1
639
                        
640
                    if 'number_of_S5P_groundpixels' in self.ref['/METADATA/QA_STATISTICS'].attrs:
641
                        total = self.ref['/METADATA/QA_STATISTICS'].attrs['number_of_S5P_groundpixels']
642
                    else:
643
                        total = 1
644
                    
645
                    self._f_fraction_succesful = int(n)/int(total)
646
                except (AttributeError, TypeError, KeyError):
647
                    if self._omi_core_metadata is not None:
648
                        self._f_fraction_succesful = 1.0 - (self._omi_core_metadata["QAPERCENTMISSINGDATA"]/100)
649
                    else:
650
                        raise CAMAException("Succesfull pixel count not found.")
651
            except ZeroDivisionError:
652
                self._f_fraction_succesful = 0.0
653
        return self._f_fraction_succesful
654

    
655
    ## The product identifier, the product short name.
656
    #
657
    # This is a string property.
658
    @property
659
    def product(self):
660
        """
661
        Return product identifier.
662
        """
663
        if self._product is None:
664
            try:
665
                p = self.ref['/METADATA/GRANULE_DESCRIPTION'].attrs['ProductShortName']
666
                self._product = p.decode("utf-8").replace("L2__", "")
667
            except (AttributeError, TypeError, KeyError):
668
                if self._omi_core_metadata is not None:
669
                    self._product = self._omi_core_metadata['SHORTNAME']
670
                else:
671
                    raise CAMAException("Product short name not found.")
672
            self.close()
673
        return self._product
674

    
675
    ## Extract the CoreMetadata from an OMI file and parse the contents.
676
    #
677
    #  The CoreMetadata string variables in OMI files have a somewhat arcane syntax,
678
    #  but some of the metadata we use here require that we extract some of what is in there.
679
    #
680
    #  This is not a generic parser for CoreMetadata objects in HDF-EOS files.
681
    #
682
    #  This method sets the `_omi_core_metadata` instance variable, which is used in other methods.
683
    def parse_coreMetadataOMI(self):
684
        core_meta_lines = []
685
        core_meta = {}
686
        index = 0
687
        while True:
688
            try:
689
                v = self.ref['/HDFEOS INFORMATION/CoreMetadata.{0}'.format(index)]
690
                index += 1
691
            except (AttributeError, TypeError, KeyError):
692
                break
693
            core_meta_lines.extend(str(v.value, encoding='ascii').split('\n'))
694
        self.close()
695

    
696
        if index == 0:
697
            self._omi_core_metadata = None
698
            raise RuntimeError("Not an OMI file")
699

    
700
        pattern = re.compile(r""" *(GROUP|END_GROUP|OBJECT|END_OBJECT|VALUE) *= (.+)""")
701

    
702
        multi_line = False
703
        for line in core_meta_lines:
704
            match = pattern.search(line)
705
            if match:
706
                key = match.group(1)
707
                value = match.group(2)
708
                if key == "OBJECT":
709
                    current_object = value
710
                    multi_line = (current_object in ("INPUTPOINTER",))
711
                elif key == "VALUE":
712
                    core_meta[current_object] = value.replace('"', '')
713
            elif multi_line and current_object in core_meta:
714
                core_meta[current_object] = core_meta[current_object] + line.replace('"', '').strip()
715

    
716
        if "INPUTPOINTER" in core_meta:
717
            core_meta["INPUTPOINTER"] = core_meta["INPUTPOINTER"].replace('(', '').replace(')', '').split(', ')
718
        if "ORBITNUMBER" in core_meta:
719
            core_meta["ORBITNUMBER"] = int(core_meta["ORBITNUMBER"])
720
            core_meta['orbit'] = core_meta["ORBITNUMBER"]
721
        if "RANGEBEGINNINGDATE" in core_meta and "RANGEBEGINNINGTIME" in core_meta:
722
            core_meta["RANGEBEGIN"] = datetime.datetime.strptime(core_meta["RANGEBEGINNINGDATE"] + "T" + core_meta["RANGEBEGINNINGTIME"], "%Y-%m-%dT%H:%M:%S.%f")
723
            core_meta["validity_start"] = core_meta["RANGEBEGIN"]
724
        if "RANGEENDINGDATE" in core_meta and "RANGEENDINGTIME" in core_meta:
725
            core_meta["RANGEEND"] = datetime.datetime.strptime(core_meta["RANGEENDINGDATE"] + "T" + core_meta["RANGEENDINGTIME"], "%Y-%m-%dT%H:%M:%S.%f")
726
            core_meta["validity_end"] = core_meta["RANGEEND"]
727
        if "EQUATORCROSSINGDATE" in core_meta and "EQUATORCROSSINGTIME" in core_meta:
728
            core_meta["EQUATORCROSSING"] = datetime.datetime.strptime(core_meta["EQUATORCROSSINGDATE"] + "T" + core_meta["EQUATORCROSSINGTIME"], "%Y-%m-%dT%H:%M:%S.%f")
729
            core_meta["reference_time"] = datetime.datetime.strptime(core_meta["EQUATORCROSSINGDATE"], "%Y-%m-%d")
730
        if "EQUATORCROSSINGLONGITUDE" in core_meta:
731
            core_meta["EQUATORCROSSINGLONGITUDE"] = float(core_meta["EQUATORCROSSINGLONGITUDE"])
732
        if "PRODUCTIONDATETIME" in core_meta:
733
            core_meta["PRODUCTIONDATETIME"] = datetime.datetime.strptime(core_meta["PRODUCTIONDATETIME"], "%Y-%m-%dT%H:%M:%S.000Z")
734
        if "QAPERCENTMISSINGDATA" in core_meta:
735
            core_meta["QAPERCENTMISSINGDATA"] = float(core_meta["QAPERCENTMISSINGDATA"])/100.0
736
        self._omi_core_metadata = core_meta
737

    
738
    ## Return the band id of the L2 product (geolocation grid handling).
739
    #
740
    # This is an int property.
741
    @property
742
    def band(self):
743
        if self._band is None:
744
            try:
745
                self._band = self.ref.attrs['geolocation_grid_from_band']
746
            except (AttributeError, TypeError, KeyError):
747
                if self._omi_core_metadata is not None:
748
                    sensor_name = self._omi_core_metadata['ASSOCIATEDSENSORSHORTNAME']
749
                    self._band = 2 if sensor_name == "CCD Visible" else 1
750
                else:
751
                    self.logger.warning("Attribute 'geolocation_grid_from_band' not found.")
752
                    self._band = -1
753
            self.close()
754
        return self._band
755

    
756
    ## Bounding box from metadata.
757
    #
758
    #  Returns tuple: (lon_min, lon_max, lat_min, lat_max)
759
    #
760
    #  Reads from data when metadata is unavailable.
761
    @property
762
    def bbox(self):
763
        if self._bbox is None:
764
            try:
765
                self._bbox = (self.ref.attrs['geospatial_lon_min'],
766
                              self.ref.attrs['geospatial_lon_max'],
767
                              self.ref.attrs['geospatial_lat_min'],
768
                              self.ref.attrs['geospatial_lat_max'])
769
                if (self._bbox[0] == self._bbox[1] and
770
                    self._bbox[0] == self._bbox[2] and
771
                    self._bbox[0] == self._bbox[3] and
772
                    self._bbox[0] == 0.0):
773
                    raise ValueError("Empty bounding box")
774
            except (AttributeError, TypeError, KeyError, ValueError):
775
                if self._omi_core_metadata is not None:
776
                    swath = list(self.ref['/HDFEOS/SWATHS/'].keys())[0]
777
                    lat = self.ref['/HDFEOS/SWATHS/{0}/Geolocation Fields/Latitude'.format(swath)][:]
778
                    lon = self.ref['/HDFEOS/SWATHS/{0}/Geolocation Fields/Longitude'.format(swath)][:]
779
                    self._bbox = (np.min(lon), np.max(lon), np.min(lat), np.max(lat))
780
                else:
781
                    lat = self.find_variable('latitude')[:]
782
                    lon = self.find_variable('longitude')[:]
783
                    self._bbox = (np.min(lon), np.max(lon), np.min(lat), np.max(lat))
784
            self.close()
785
        return self._bbox
786

    
787
    ## Tracking ID
788
    #
789
    #  Produce a fixed and unique ID for the input granule.
790
    #  Read from file is possible, create "something" if not.
791
    @property
792
    def ID(self):
793
        if self._id is None:
794
            try:
795
                if self.ref.attrs['tracking_id'] != b' ':
796
                    self._id = self.ref.attrs['tracking_id']
797
                else:
798
                    self._id = self.ref.attrs['id']
799
            except (AttributeError, TypeError, KeyError, OSError):
800
                try:
801
                    self._id = self.ref.attrs['id']
802
                except (AttributeError, TypeError, KeyError, OSError):
803
                    if self._omi_core_metadata is not None:
804
                        self._id = self._omi_core_metadata["LOCALGRANULEID"]
805
                    else:
806
                        try:
807
                            self._id = bytes(str(uuid.uuid4()), encoding='ASCII')
808
                        except (AttributeError, TypeError, KeyError, OSError):
809
                            raise CAMAException("Tracking ID not found.")
810
            self.close()
811
        return self._id
812

    
813
    ## The middle of the validity period.
814
    #
815
    #  This is a datetime.datetime property.
816
    @property
817
    def validity_mid(self):
818
        return (self.validity_start + (self.validity_end - self.validity_start)//2)