Project

General

Profile

Statistics
| Branch: | Tag: | Revision:

pycama / src / pycama / ZonalPlot.py @ 835:d623d705abd3

History | View | Annotate | Download (23 KB)

1 703:adfe2d8b896b Maarten
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
4 810:24bcdf3e6692 maarten
# Copyright 2019-2020 Maarten Sneep, KNMI
5 703:adfe2d8b896b Maarten
#
6
# Redistribution and use in source and binary forms, with or without
7
# modification, are permitted provided that the following conditions are met:
8
#
9
# 1. Redistributions of source code must retain the above copyright notice,
10
#    this list of conditions and the following disclaimer.
11
#
12
# 2. Redistributions in binary form must reproduce the above copyright notice,
13
#    this list of conditions and the following disclaimer in the documentation
14
#    and/or other materials provided with the distribution.
15
#
16
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
20
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27
## @file ZonalPlot.py
28
#  @author Maarten Sneep
29
#
30
#  This file defines a subclass of the pycama.AnalysisAndPlot.AnalysisAndPlot class.
31
#  This subclass implements the zonal average for PyCAMA.
32
33
import math
34
import logging
35
36
import numpy as np
37
import netCDF4
38
import h5py
39
40
from .AnalysisAndPlot import AnalysisAndPlot
41
42
from .utilities import *
43
44
## Collect data for a zonal average
45
#
46
#  This is a subclass of the pycama.AnalysisAndPlot.AnalysisAndPlot class intended
47
#  for collecting data as a zonal mean and plotting the result. As for all
48
#  pycama.AnalysisAndPlot.AnalysisAndPlot subclasses the extracted data can be
49
#  stored into/read from a netCDF4 file.
50
#
51
#  The __init__ method is inherited.
52
class ZonalPlot(AnalysisAndPlot):
53
    ## Class specific preparation.
54
    #
55
    #  @param resolution Keyword parameter, float. Set the spatial resolution \f$\Delta\delta\f$ in degrees. Defaults to 0.5.
56
    #
57
    def setup(self, **kwargs):
58
        if 'zonal_resolution' in kwargs:
59
            self.resolution = math.ceil(360.0*kwargs['zonal_resolution'])/360.0
60
        elif 'resolution' in kwargs:
61
            self.resolution = math.ceil(360.0*kwargs['resolution'])/360.0
62
        else:
63
            self.resolution = 1.0
64
65
        ## The latitude grid is linear (equidistant) from south to north, with pixel centers at
66
        #  \f$\delta_c = -90.0 + \Delta\delta/2 + i\Delta\delta\f$, with \f$i=0, 1, \ldots, (180/\Delta\delta)-1\f$.
67
        #  This instance variable is an array with just the pixel centers.
68
        self.latitude_centers = None
69
        ## The number of latitudes in the grid, a simple integer, with value the
70
        #  lenght of the `latitude_centers` array.
71
        self.n_latitude = -1
72
        ## latitude boundaries, an [n_latitude, 2] array.
73
        self.latitude_bounds = None
74
75
        self.define_grid()
76
77
    ## Define the zonal average grid
78
    #
79
    #  This creates the grid following the definitions for each of the instance variables.
80
    def define_grid(self):
81
        lat_c = np.arange(-90.0+self.resolution/2, 90.0, self.resolution)
82
        self.latitude_centers = lat_c
83
        self.n_latitude = len(lat_c)
84
        lat_b = np.arange(-90.0, 90.0+self.resolution/2, self.resolution)
85
        self.latitude_bounds = np.transpose(np.asarray([lat_b[0:-1], lat_b[1:]]))
86
87
    ## A an input variable.
88
    #
89
    #  @param var a pycama.Variable.Variable instance.
90
    #
91
    #  This method reserves memory for the zonal average data, both the averaged data and the observation counter, for all pixels, and land/sea separately.
92
    def add_variable(self, var):
93
        for modifier in ("", "_land", "_sea"):
94
            name = var.name + modifier
95
            self.variables[name] = np.zeros((self.n_latitude,), dtype=np.float32)
96
            self.variables[name + '_count'] = np.zeros((self.n_latitude,), dtype=np.int32)
97
98
        super(self.__class__, self).add_variable(var)
99
        self.variables_meta[var.name]['noscanline'] = var.noscanline
100
        if hasattr(var, 'map_range'):
101
            self.variables_meta[var.name]['data_range'] = var.map_range
102
103
    ## Extract the required information from the input data.
104
    #
105
    #  Using the latitude bounds we find for each observation into which latitude band it should be sorted.
106
    #  In a loop over the latitude bands we select the observations for each band. If there are none we skip to the next latitude band.
107
    #  The longitudes are brought into the [-180, 180) range, and longitude bin boundaries are extracted for the latitude band.
108
    #
109
    #  The inner loop is over the variables. For each variable the valid data is selected (taking unsynchronized variable into account - I think),
110
    #  and the `np.histogram()` function is used to calculate the value for each longitude bin.
111
    #  The longitudes are used as primary histogram variable, with the data we're actually interested
112
    #  in the weights parameter. A separate call to `np.histogram()` is used to count the number of
113
    #  observations in each grid cell.
114
    #
115
    #  After extraction of the data the mean is taken by dividing the sum of the observations
116
    #  by the number of observations (where the latter is not 0).
117
    def process(self):
118
        lat = self.input_variables.variables['latitude'].aggregate_data
119 711:5e4605c42d9a Maarten
        try:
120
            surface_flags = np.asarray(self.input_variables.variables['surface_classification'].aggregate_data, dtype=np.uint8)
121
        except KeyError:
122
            surface_flags = None
123 706:4acedeea9329 Maarten
        bins = np.concatenate([self.latitude_bounds[:, 0], [self.latitude_bounds[-1, 1]]])
124
125
        variables = [v for v in self.input_variables.variables.values() if v.show and not v.noscanline]
126
        n_variables = len(variables)
127
        n_interval = max([(n_variables//20), 4])
128
        for i, v in enumerate(variables):
129
            if i > 0 and i % n_interval == 0:
130
                self.progress(100*(i+1)/n_variables)
131
            for surface in ('all', 'land', 'sea'):
132
                if surface == 'land':
133 711:5e4605c42d9a Maarten
                    if surface_flags is None:
134
                        continue
135 706:4acedeea9329 Maarten
                    sflag = (surface_flags & 3) == 0
136
                    name = v.name + '_land'
137
                elif surface == 'sea':
138 711:5e4605c42d9a Maarten
                    if surface_flags is None:
139
                        continue
140 706:4acedeea9329 Maarten
                    sflag = (surface_flags & 3) == 1
141
                    name = v.name + '_sea'
142
                else:
143 711:5e4605c42d9a Maarten
                    sflag = np.ones(v.aggregate_data.shape, dtype=np.bool)
144 706:4acedeea9329 Maarten
                    name = v.name
145
146
                try:
147
                    vselected = np.logical_and(
148 703:adfe2d8b896b Maarten
                                    np.logical_and(
149
                                        np.logical_and(
150 706:4acedeea9329 Maarten
                                            np.isfinite(v.aggregate_data),
151 703:adfe2d8b896b Maarten
                                            np.logical_not(v.aggregate_data.mask)),
152
                                            v.aggregate_data < float.fromhex('1.ep+122')),
153
                                            sflag)
154 706:4acedeea9329 Maarten
                except (AttributeError, IndexError):
155
                    vselected = np.logical_and(
156
                                    np.logical_and(
157
                                        np.isfinite(v.aggregate_data),
158 703:adfe2d8b896b Maarten
                                        v.aggregate_data < float.fromhex('1.ep+122')),
159
                                        sflag)
160 706:4acedeea9329 Maarten
                if np.any(vselected):
161
                    h = np.histogram(lat[vselected], bins=bins, weights=v.aggregate_data[vselected])[0]
162
                    c = np.histogram(lat[vselected], bins=bins)[0]
163 703:adfe2d8b896b Maarten
164 706:4acedeea9329 Maarten
                    self.variables[name] += h
165
                    self.variables[name + '_count'] += c
166 703:adfe2d8b896b Maarten
167
        # divide by number of points for mean
168
        for name in [n for n in self.variables.keys() if not n.endswith('_count')]:
169
            cidx = (self.variables[name + '_count'] != 0)
170
            self.variables[name][cidx] /= self.variables[name + '_count'][cidx]
171
172
    ## Merge data into a combined dataset.
173
    #
174
    #  @param other The object to be added to self,
175
    #               also an instance of the pycama.WorldPlot.WorldPlot class.
176
    def __iadd__(self, other):
177
        for name in [n for n in self.variables.keys() if not n.endswith('_count')]:
178
            self.logger.debug("{0}.__iadd__(): processing '{1}'".format(self.__class__.__name__, name))
179
180
            S = self.variables[name]
181
            C = self.variables[name + '_count']
182
            oS = other.variables[name]
183
            oC = other.variables[name + '_count']
184
            idx = C+oC > 0
185
            self.variables[name][idx] = (S[idx]*C[idx] + oS[idx]*oC[idx])/(C[idx]+oC[idx])
186
            self.variables[name + '_count'] = C + oC
187
188
    ## Read processed data from the input file, for specified time index.
189
    #
190
    #  @param fname NetCDF file with input data.
191
    #  @param time_index Time slice to read.
192 835:d623d705abd3 maarten
    def ingest(self, fname, time_index, exclude=None):
193 703:adfe2d8b896b Maarten
        self.logger.debug("{0}.ingest(): reading {1}".format(self.__class__.__name__, fname))
194
        self.time_index_in_output = time_index
195
        with h5py.File(fname, 'r') as ref:
196
            if self.storage_group_name not in ref.keys():
197
                self.logger.warning("Zonalplot data not in '%s'", os.path.basename(fname))
198
                return False
199
            grp = ref[self.storage_group_name]
200
            self.resolution = grp.attrs['resolution']
201
            self.n_latitude = len(grp['latitude'])
202
            self.latitude_centers = grp['latitude'][:]
203
            self.latitude_bounds = grp['latitude_bounds'][...]
204
205
            self.variables = {}
206
            self.variables_meta = {}
207
            for k in [name.decode("ASCII") for name in ref['variable_names'][:] if name.decode("ASCII") not in ('latitude', 'longitude')]:
208
                if k not in grp.keys():
209
                    self.logger.debug("Variable name {0} requested but not available".format(k))
210
                    continue
211 835:d623d705abd3 maarten
                if exclude is not None and k in exclude:
212
                    self.logger.debug("Variable name {0} is excluded".format(k))
213
                    continue
214 703:adfe2d8b896b Maarten
                for kk in (k, k+'_land', k+'_sea'):
215
                    self.logger.debug("{0}.ingest(): processing {1}".format(self.__class__.__name__, kk))
216
                    var = grp[kk]
217
                    self.variables[kk] = var[self.time_index_in_output, :]
218
                    count_var = grp[kk+'_count']
219
                    self.variables[kk+'_count'] = count_var[self.time_index_in_output, :]
220
221
                    if k != kk:
222
                        continue
223
224
                    self.variables_meta[k] = {}
225
                    self.variables_meta[k]['noscanline'] = False
226
                    try:
227
                        self.variables_meta[k]['units'] = var.attrs['units']
228
                        if isinstance(self.variables_meta[k]['units'], bytes):
229
                            self.variables_meta[k]['units'] = str(self.variables_meta[k]['units'], 'utf-8')
230
                    except (AttributeError, KeyError):
231
                        self.variables_meta[k]['units'] = "1"
232
                    try:
233
                        self.variables_meta[k]['title'] = var.attrs['long_name']
234
                        if isinstance(self.variables_meta[k]['title'], bytes):
235
                            self.variables_meta[k]['title'] = str(self.variables_meta[k]['title'], 'utf-8')
236
                    except (AttributeError, KeyError):
237
                        self.variables_meta[k]['title'] = k
238
239
                    try:
240
                        if isinstance(var.attrs['range'], (bytes, str)) and var.attrs['range'].lower() in (b'false', 'false'):
241
                            self.variables_meta[k]['data_range'] = False
242
                        else:
243
                            self.variables_meta[k]['data_range'] = var.attrs['range']
244
                    except (AttributeError, KeyError):
245
                        self.variables_meta[k]['data_range'] = None
246
247
                    try:
248
                        self.variables_meta[k]['color_scale'] = var.attrs['color_scale']
249
                        if isinstance(self.variables_meta[k]['color_scale'], bytes):
250
                            self.variables_meta[k]['color_scale'] = str(self.variables_meta[k]['color_scale'], 'utf-8')
251
                    except (AttributeError, KeyError):
252
                        self.variables_meta[k]['color_scale'] = 'nipy_spectral'
253
                    try:
254
                        self.variables_meta[k]['log_range'] = (var.attrs['log_range'].lower() == b'true'
255
                                                               if isinstance(var.attrs['log_range'], bytes) else
256
                                                               var.attrs['log_range'].lower() == 'true')
257
                    except (AttributeError, KeyError):
258
                        self.variables_meta[k]['log_range'] = False
259
        return True
260
261
    ## Write processed data to output netcdf file.
262
    #
263
    #  @param fname File to write to
264
    #  @param mode  Writing mode, defaults to append.
265
    #
266
    #  Write data (including extraction specific dimensions) to the group with
267
    #  the name given in the `storage_group_name` property ("`worldplot_data`" for this class).
268
    def dump(self, fname, mode='a'):
269
        compress={'compression':'gzip', 'compression_opts':3, 'shuffle':True, 'fletcher32':True}
270
        with h5py.File(fname, 'a') as ref:
271
            # group
272
            try:
273
                grp = ref.create_group(self.storage_group_name)
274
                grp.attrs['comment'] = 'Zonal mean data'
275
            except:
276
                grp = ref[self.storage_group_name]
277
278
            # dimensions
279
            try:
280
                lat_var = grp.create_dataset('latitude', (self.n_latitude,), dtype=np.float32, **compress)
281
            except:
282
                lat_var = grp['latitude']
283
            lat_var[:] = self.latitude_centers
284
            lat_var.attrs['units'] = 'degrees_north'
285
            lat_var.attrs['standard_name'] = 'latitude'
286
287
            try:
288
                res = grp.attrs['resolution']
289
            except (AttributeError, KeyError):
290
                res = None
291
                grp.attrs['resolution'] = self.resolution
292
293
            if res is not None and res != self.resolution:
294
                self.logger.error("Spatial resolution in run and spatial resolution in file do not match.")
295
                return
296
297
            try:
298
                lat_bounds_var = grp.create_dataset('latitude_bounds', (self.n_latitude,2), dtype=np.float32, **compress)
299
                lat_bounds_var.dims.create_scale(grp['latitude'], 'latitude')
300
                lat_bounds_var.dims[0].attach_scale(grp['latitude'])
301
                lat_bounds_var.dims.create_scale(ref['nv'], 'nv')
302
                lat_bounds_var.dims[1].attach_scale(ref['nv'])
303
            except:
304
                lat_bounds_var = grp['latitude_bounds']
305
            lat_bounds_var[...] = self.latitude_bounds
306
307
            time_step = self.time_index_in_output
308
309
            # if self.count is not None:
310
            #     try:
311
            #         count_var = grp.create_dataset('count', (1, self.n_latitude), dtype=np.float32, maxshape=(None, self.n_latitude), **compress)
312
            #         count_var.dims.create_scale(ref['time'], 'time')
313
            #         count_var.dims[0].attach_scale(ref['time'])
314
            #         count_var.dims.create_scale(grp['rgrid'], 'rgrid')
315
            #         count_var.dims[1].attach_scale(grp['rgrid'])
316
            #     except:
317
            #         count_var = grp['count']
318
            #         count_var.resize(self.time_index_in_output+1, axis=0)
319
            #
320
            #     count_var[time_step, :] = self.count
321
322
            for k in [name for name in self.variable_names if not name.endswith('_count')]:
323
                if k.endswith('_land'):
324
                    kk = k.replace('_land', '')
325
                elif k.endswith('_sea'):
326
                    kk = k.replace('_sea', '')
327
                else:
328
                    kk = k
329
330
                if self.variables_meta[kk]['noscanline']:
331
                    continue
332
333
334
                try:
335 716:6a888bfabda2 Maarten
                    var = grp[k]
336
                    var.resize(self.time_index_in_output+1, axis=0)
337
                except:
338
                    if self.time_index_in_output > 0:
339
                        continue
340 703:adfe2d8b896b Maarten
                    var = grp.create_dataset(k, (1, self.n_latitude), dtype=np.float32, maxshape=(None, self.n_latitude), **compress)
341
                    var.dims.create_scale(ref['time'], 'time')
342
                    var.dims[0].attach_scale(ref['time'])
343
                    var.dims.create_scale(grp['latitude'], 'latitude')
344
                    var.dims[1].attach_scale(grp['latitude'])
345
346
                try:
347
                    var.attrs['units'] = self.variables_meta[kk]['units']
348
                except (AttributeError, TypeError):
349
                    var.attrs['units'] = "1"
350
351
                try:
352
                    var.attrs['long_name'] = self.variables_meta[kk]['title']
353
                except (AttributeError, TypeError):
354
                    var.attrs['long_name'] = k
355
                try:
356
                    var.attrs['range'] = [float(v) for v in self.variables_meta[kk]['data_range']]
357
                except TypeError:
358
                    var.attrs['range'] = [np.min(self.variables[k]), np.max(self.variables[k])] if self.variables_meta[kk]['data_range'] else 'false'
359
                except AttributeError:
360
                    var.attrs['range'] = [np.min(self.variables[k]), np.max(self.variables[k])]
361
                try:
362
                    var.attrs['color_scale'] = self.variables_meta[kk]['color_scale']
363
                except (AttributeError, TypeError):
364
                    var.attrs['color_scale'] = "nipy_spectral"
365
                try:
366
                    var.attrs['log_range'] = str(self.variables_meta[kk]['log_range'])
367
                except:
368
                    var.attrs['log_range'] = "false"
369
                var.attrs['comment'] = 'Mean of variable {0}'.format(k)
370
371
                var[time_step, :] = self.variables[k]
372
                try:
373 716:6a888bfabda2 Maarten
                    cvar = grp[k+'_count']
374
                    cvar.resize(self.time_index_in_output+1, axis=0)
375
                except:
376
                    if self.time_index_in_output > 0:
377
                        continue
378 703:adfe2d8b896b Maarten
                    cvar = grp.create_dataset(k+'_count', (1, self.n_latitude), dtype=np.int32, maxshape=(None, self.n_latitude), **compress)
379
                    cvar.dims.create_scale(ref['time'], 'time')
380
                    cvar.dims[0].attach_scale(ref['time'])
381
                    cvar.dims.create_scale(grp['latitude'], 'latitude')
382
                    cvar.dims[1].attach_scale(grp['latitude'])
383
                cvar[time_step, :] = self.variables[k+'_count']
384
385
    def plot(self, varname, figure, ax=None, autorange=False, **kwargs):
386
        import matplotlib as mpl
387
        mpl.use('svg') # to allow running without X11
388
        mpl.rc('font', family='DejaVu Sans')
389
        import matplotlib.pyplot as plt
390
        import matplotlib.cm as cm
391
        from matplotlib.colors import Normalize, LogNorm
392
        import matplotlib.ticker as ticker
393
        import temis_color_tables
394
395
        if varname.endswith('_land') or varname.endswith('_sea'):
396
            return False
397
398
        cidx = (self.variables[varname + '_count'] != 0)
399
        self.variables[varname]
400
401
        self.logger.info("Plotting zonal average of %s", varname)
402
403
        if ax is None:
404
            ax = figure.add_subplot(1, 1, 1)
405
406
        has_negative = False
407
        n_points = 0
408
        for surface, trace in zip(('all', '_land', '_sea'), ('k-', 'r-', 'b-')):
409
            if surface == 'all':
410
                name = varname
411
            else:
412
                name = varname + surface
413
            cidx = (self.variables[name + '_count'] != 0)
414
            n_points += np.sum(cidx)
415
416
            self.logger.debug("{0}.plot(): found {1} valid points for {2}".format(self.__class__.__name__, np.sum(cidx), name))
417
            data = np.ma.asarray(self.variables[name])
418
            data.mask = np.logical_not(cidx)
419
            ax.plot(data, self.latitude_centers, trace, label=surface.strip('_'))
420
421
            has_negative = has_negative or np.any(data <= 0)
422
423
        if n_points == 0:
424
            self.logger.debug("{0}.plot(): found no valid points for {1}".format(self.__class__.__name__, varname))
425
            return False
426
427
        ax.set_xlabel(self.variables_meta[varname]['title'] if self.variables_meta[varname]['units'] == "1"
428
                      else "{0} [{1}]".format(self.variables_meta[varname]['title'],
429
                                              self.variables_meta[varname]['units']))
430
        ax.set_ylabel("Latitude [degrees]")
431
        if self.variables_meta[varname]['log_range']:
432
            if has_negative:
433
                self.logger.warning("Log scale requested on negative data in zonal mean plot of '%s' (ignoring request)", varname)
434
            else:
435
                ax.set_xscale('log')
436
        if not autorange:
437
            ax.set_xlim(self.variables_meta[varname]['data_range'])
438
439
        ax.set_ylim(-90.0, 90.0)
440
        ax.legend()
441
        return True
442
443
    def plot_zonal_mean(self, varname, figure, ax=None, autorange=False, **kwargs):
444
        return self.plot(varname, figure, ax=None, autorange=autorange, **kwargs)
445
446
    def zonal_mean(self, varname, surface='all'):
447
        if varname not in self.variables:
448
            self.logger.debug("Worldplot data for '%s' not found.", varname)
449
            return None
450
451
        if surface == 'land':
452
            zonal = np.ma.asarray(self.variables[varname+'_land'])
453
            zonal.mask = (self.variables[varname + '_land_count'] == 0)
454
        elif surface == 'sea':
455
            zonal = np.ma.asarray(self.variables[varname+'_sea'])
456
            zonal.mask = (self.variables[varname + '_sea_count'] == 0)
457
        else:
458
            zonal = np.ma.asarray(self.variables[varname])
459
            zonal.mask = (self.variables[varname + '_count'] == 0)
460
461
        if np.all(zonal.mask):
462
            return None
463
464
        rval = {'zonal_mean':zonal,
465
                'lat':self.latitude_centers,
466
                'lat_bounds':self.latitude_bounds}
467
        try:
468
            if surface == 'land':
469
                rval['title'] = self.variables_meta[varname]['title'] + ' (land)'
470
            elif surface == 'sea':
471
                rval['title'] = self.variables_meta[varname]['title'] + ' (sea)'
472
            else:
473
                rval['title'] = self.variables_meta[varname]['title']
474
        except KeyError:
475
            rval['title'] = varname
476
477
        try:
478
            rval['units'] = self.variables_meta[varname]['units']
479
        except KeyError:
480
            rval['units'] = "1"
481
482
        try:
483
            rval['range'] = self.variables_meta[varname]['data_range']
484
        except KeyError:
485
            rval['range'] = False
486
487
        try:
488
            rval['log_range'] = self.variables_meta[varname]['log_range']
489
        except KeyError:
490
            rval['log_range'] = False
491
492
        try:
493
            rval['colorscale'] = self.variables_meta[varname]['color_scale']
494
        except KeyError:
495
            rval['colorscale'] = "nipy_spectral"
496
497
        return rval