Project

General

Profile

Statistics
| Branch: | Tag: | Revision:

pycama / src / pycama / ZonalPlot.py @ 810:24bcdf3e6692

History | View | Annotate | Download (22.8 KB)

1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3

    
4
# Copyright 2019-2020 Maarten Sneep, KNMI
5
#
6
# Redistribution and use in source and binary forms, with or without
7
# modification, are permitted provided that the following conditions are met:
8
#
9
# 1. Redistributions of source code must retain the above copyright notice,
10
#    this list of conditions and the following disclaimer.
11
#
12
# 2. Redistributions in binary form must reproduce the above copyright notice,
13
#    this list of conditions and the following disclaimer in the documentation
14
#    and/or other materials provided with the distribution.
15
#
16
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
20
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26

    
27
## @file ZonalPlot.py
28
#  @author Maarten Sneep
29
#
30
#  This file defines a subclass of the pycama.AnalysisAndPlot.AnalysisAndPlot class.
31
#  This subclass implements the zonal average for PyCAMA.
32

    
33
import math
34
import logging
35

    
36
import numpy as np
37
import netCDF4
38
import h5py
39

    
40
from .AnalysisAndPlot import AnalysisAndPlot
41

    
42
from .utilities import *
43

    
44
## Collect data for a zonal average
45
#
46
#  This is a subclass of the pycama.AnalysisAndPlot.AnalysisAndPlot class intended
47
#  for collecting data as a zonal mean and plotting the result. As for all
48
#  pycama.AnalysisAndPlot.AnalysisAndPlot subclasses the extracted data can be
49
#  stored into/read from a netCDF4 file.
50
#
51
#  The __init__ method is inherited.
52
class ZonalPlot(AnalysisAndPlot):
53
    ## Class specific preparation.
54
    #
55
    #  @param resolution Keyword parameter, float. Set the spatial resolution \f$\Delta\delta\f$ in degrees. Defaults to 0.5.
56
    #
57
    def setup(self, **kwargs):
58
        if 'zonal_resolution' in kwargs:
59
            self.resolution = math.ceil(360.0*kwargs['zonal_resolution'])/360.0
60
        elif 'resolution' in kwargs:
61
            self.resolution = math.ceil(360.0*kwargs['resolution'])/360.0
62
        else:
63
            self.resolution = 1.0
64

    
65
        ## The latitude grid is linear (equidistant) from south to north, with pixel centers at
66
        #  \f$\delta_c = -90.0 + \Delta\delta/2 + i\Delta\delta\f$, with \f$i=0, 1, \ldots, (180/\Delta\delta)-1\f$.
67
        #  This instance variable is an array with just the pixel centers.
68
        self.latitude_centers = None
69
        ## The number of latitudes in the grid, a simple integer, with value the
70
        #  lenght of the `latitude_centers` array.
71
        self.n_latitude = -1
72
        ## latitude boundaries, an [n_latitude, 2] array.
73
        self.latitude_bounds = None
74

    
75
        self.define_grid()
76

    
77
    ## Define the zonal average grid
78
    #
79
    #  This creates the grid following the definitions for each of the instance variables.
80
    def define_grid(self):
81
        lat_c = np.arange(-90.0+self.resolution/2, 90.0, self.resolution)
82
        self.latitude_centers = lat_c
83
        self.n_latitude = len(lat_c)
84
        lat_b = np.arange(-90.0, 90.0+self.resolution/2, self.resolution)
85
        self.latitude_bounds = np.transpose(np.asarray([lat_b[0:-1], lat_b[1:]]))
86

    
87
    ## A an input variable.
88
    #
89
    #  @param var a pycama.Variable.Variable instance.
90
    #
91
    #  This method reserves memory for the zonal average data, both the averaged data and the observation counter, for all pixels, and land/sea separately.
92
    def add_variable(self, var):
93
        for modifier in ("", "_land", "_sea"):
94
            name = var.name + modifier
95
            self.variables[name] = np.zeros((self.n_latitude,), dtype=np.float32)
96
            self.variables[name + '_count'] = np.zeros((self.n_latitude,), dtype=np.int32)
97

    
98
        super(self.__class__, self).add_variable(var)
99
        self.variables_meta[var.name]['noscanline'] = var.noscanline
100
        if hasattr(var, 'map_range'):
101
            self.variables_meta[var.name]['data_range'] = var.map_range
102

    
103
    ## Extract the required information from the input data.
104
    #
105
    #  Using the latitude bounds we find for each observation into which latitude band it should be sorted.
106
    #  In a loop over the latitude bands we select the observations for each band. If there are none we skip to the next latitude band.
107
    #  The longitudes are brought into the [-180, 180) range, and longitude bin boundaries are extracted for the latitude band.
108
    #
109
    #  The inner loop is over the variables. For each variable the valid data is selected (taking unsynchronized variable into account - I think),
110
    #  and the `np.histogram()` function is used to calculate the value for each longitude bin.
111
    #  The longitudes are used as primary histogram variable, with the data we're actually interested
112
    #  in the weights parameter. A separate call to `np.histogram()` is used to count the number of
113
    #  observations in each grid cell.
114
    #
115
    #  After extraction of the data the mean is taken by dividing the sum of the observations
116
    #  by the number of observations (where the latter is not 0).
117
    def process(self):
118
        lat = self.input_variables.variables['latitude'].aggregate_data
119
        try:
120
            surface_flags = np.asarray(self.input_variables.variables['surface_classification'].aggregate_data, dtype=np.uint8)
121
        except KeyError:
122
            surface_flags = None
123
        bins = np.concatenate([self.latitude_bounds[:, 0], [self.latitude_bounds[-1, 1]]])
124

    
125
        variables = [v for v in self.input_variables.variables.values() if v.show and not v.noscanline]
126
        n_variables = len(variables)
127
        n_interval = max([(n_variables//20), 4])
128
        for i, v in enumerate(variables):
129
            if i > 0 and i % n_interval == 0:
130
                self.progress(100*(i+1)/n_variables)
131
            for surface in ('all', 'land', 'sea'):
132
                if surface == 'land':
133
                    if surface_flags is None:
134
                        continue
135
                    sflag = (surface_flags & 3) == 0
136
                    name = v.name + '_land'
137
                elif surface == 'sea':
138
                    if surface_flags is None:
139
                        continue
140
                    sflag = (surface_flags & 3) == 1
141
                    name = v.name + '_sea'
142
                else:
143
                    sflag = np.ones(v.aggregate_data.shape, dtype=np.bool)
144
                    name = v.name
145

    
146
                try:
147
                    vselected = np.logical_and(
148
                                    np.logical_and(
149
                                        np.logical_and(
150
                                            np.isfinite(v.aggregate_data),
151
                                            np.logical_not(v.aggregate_data.mask)),
152
                                            v.aggregate_data < float.fromhex('1.ep+122')),
153
                                            sflag)
154
                except (AttributeError, IndexError):
155
                    vselected = np.logical_and(
156
                                    np.logical_and(
157
                                        np.isfinite(v.aggregate_data),
158
                                        v.aggregate_data < float.fromhex('1.ep+122')),
159
                                        sflag)
160
                if np.any(vselected):
161
                    h = np.histogram(lat[vselected], bins=bins, weights=v.aggregate_data[vselected])[0]
162
                    c = np.histogram(lat[vselected], bins=bins)[0]
163

    
164
                    self.variables[name] += h
165
                    self.variables[name + '_count'] += c
166

    
167
        # divide by number of points for mean
168
        for name in [n for n in self.variables.keys() if not n.endswith('_count')]:
169
            cidx = (self.variables[name + '_count'] != 0)
170
            self.variables[name][cidx] /= self.variables[name + '_count'][cidx]
171

    
172
    ## Merge data into a combined dataset.
173
    #
174
    #  @param other The object to be added to self,
175
    #               also an instance of the pycama.WorldPlot.WorldPlot class.
176
    def __iadd__(self, other):
177
        for name in [n for n in self.variables.keys() if not n.endswith('_count')]:
178
            self.logger.debug("{0}.__iadd__(): processing '{1}'".format(self.__class__.__name__, name))
179

    
180
            S = self.variables[name]
181
            C = self.variables[name + '_count']
182
            oS = other.variables[name]
183
            oC = other.variables[name + '_count']
184
            idx = C+oC > 0
185
            self.variables[name][idx] = (S[idx]*C[idx] + oS[idx]*oC[idx])/(C[idx]+oC[idx])
186
            self.variables[name + '_count'] = C + oC
187

    
188
    ## Read processed data from the input file, for specified time index.
189
    #
190
    #  @param fname NetCDF file with input data.
191
    #  @param time_index Time slice to read.
192
    def ingest(self, fname, time_index):
193
        self.logger.debug("{0}.ingest(): reading {1}".format(self.__class__.__name__, fname))
194
        self.time_index_in_output = time_index
195
        with h5py.File(fname, 'r') as ref:
196
            if self.storage_group_name not in ref.keys():
197
                self.logger.warning("Zonalplot data not in '%s'", os.path.basename(fname))
198
                return False
199
            grp = ref[self.storage_group_name]
200
            self.resolution = grp.attrs['resolution']
201
            self.n_latitude = len(grp['latitude'])
202
            self.latitude_centers = grp['latitude'][:]
203
            self.latitude_bounds = grp['latitude_bounds'][...]
204

    
205
            self.variables = {}
206
            self.variables_meta = {}
207
            for k in [name.decode("ASCII") for name in ref['variable_names'][:] if name.decode("ASCII") not in ('latitude', 'longitude')]:
208
                if k not in grp.keys():
209
                    self.logger.debug("Variable name {0} requested but not available".format(k))
210
                    continue
211
                for kk in (k, k+'_land', k+'_sea'):
212
                    self.logger.debug("{0}.ingest(): processing {1}".format(self.__class__.__name__, kk))
213
                    var = grp[kk]
214
                    self.variables[kk] = var[self.time_index_in_output, :]
215
                    count_var = grp[kk+'_count']
216
                    self.variables[kk+'_count'] = count_var[self.time_index_in_output, :]
217

    
218
                    if k != kk:
219
                        continue
220

    
221
                    self.variables_meta[k] = {}
222
                    self.variables_meta[k]['noscanline'] = False
223
                    try:
224
                        self.variables_meta[k]['units'] = var.attrs['units']
225
                        if isinstance(self.variables_meta[k]['units'], bytes):
226
                            self.variables_meta[k]['units'] = str(self.variables_meta[k]['units'], 'utf-8')
227
                    except (AttributeError, KeyError):
228
                        self.variables_meta[k]['units'] = "1"
229
                    try:
230
                        self.variables_meta[k]['title'] = var.attrs['long_name']
231
                        if isinstance(self.variables_meta[k]['title'], bytes):
232
                            self.variables_meta[k]['title'] = str(self.variables_meta[k]['title'], 'utf-8')
233
                    except (AttributeError, KeyError):
234
                        self.variables_meta[k]['title'] = k
235

    
236
                    try:
237
                        if isinstance(var.attrs['range'], (bytes, str)) and var.attrs['range'].lower() in (b'false', 'false'):
238
                            self.variables_meta[k]['data_range'] = False
239
                        else:
240
                            self.variables_meta[k]['data_range'] = var.attrs['range']
241
                    except (AttributeError, KeyError):
242
                        self.variables_meta[k]['data_range'] = None
243

    
244
                    try:
245
                        self.variables_meta[k]['color_scale'] = var.attrs['color_scale']
246
                        if isinstance(self.variables_meta[k]['color_scale'], bytes):
247
                            self.variables_meta[k]['color_scale'] = str(self.variables_meta[k]['color_scale'], 'utf-8')
248
                    except (AttributeError, KeyError):
249
                        self.variables_meta[k]['color_scale'] = 'nipy_spectral'
250
                    try:
251
                        self.variables_meta[k]['log_range'] = (var.attrs['log_range'].lower() == b'true'
252
                                                               if isinstance(var.attrs['log_range'], bytes) else
253
                                                               var.attrs['log_range'].lower() == 'true')
254
                    except (AttributeError, KeyError):
255
                        self.variables_meta[k]['log_range'] = False
256
        return True
257

    
258
    ## Write processed data to output netcdf file.
259
    #
260
    #  @param fname File to write to
261
    #  @param mode  Writing mode, defaults to append.
262
    #
263
    #  Write data (including extraction specific dimensions) to the group with
264
    #  the name given in the `storage_group_name` property ("`worldplot_data`" for this class).
265
    def dump(self, fname, mode='a'):
266
        compress={'compression':'gzip', 'compression_opts':3, 'shuffle':True, 'fletcher32':True}
267
        with h5py.File(fname, 'a') as ref:
268
            # group
269
            try:
270
                grp = ref.create_group(self.storage_group_name)
271
                grp.attrs['comment'] = 'Zonal mean data'
272
            except:
273
                grp = ref[self.storage_group_name]
274

    
275
            # dimensions
276
            try:
277
                lat_var = grp.create_dataset('latitude', (self.n_latitude,), dtype=np.float32, **compress)
278
            except:
279
                lat_var = grp['latitude']
280
            lat_var[:] = self.latitude_centers
281
            lat_var.attrs['units'] = 'degrees_north'
282
            lat_var.attrs['standard_name'] = 'latitude'
283

    
284
            try:
285
                res = grp.attrs['resolution']
286
            except (AttributeError, KeyError):
287
                res = None
288
                grp.attrs['resolution'] = self.resolution
289

    
290
            if res is not None and res != self.resolution:
291
                self.logger.error("Spatial resolution in run and spatial resolution in file do not match.")
292
                return
293

    
294
            try:
295
                lat_bounds_var = grp.create_dataset('latitude_bounds', (self.n_latitude,2), dtype=np.float32, **compress)
296
                lat_bounds_var.dims.create_scale(grp['latitude'], 'latitude')
297
                lat_bounds_var.dims[0].attach_scale(grp['latitude'])
298
                lat_bounds_var.dims.create_scale(ref['nv'], 'nv')
299
                lat_bounds_var.dims[1].attach_scale(ref['nv'])
300
            except:
301
                lat_bounds_var = grp['latitude_bounds']
302
            lat_bounds_var[...] = self.latitude_bounds
303

    
304
            time_step = self.time_index_in_output
305

    
306
            # if self.count is not None:
307
            #     try:
308
            #         count_var = grp.create_dataset('count', (1, self.n_latitude), dtype=np.float32, maxshape=(None, self.n_latitude), **compress)
309
            #         count_var.dims.create_scale(ref['time'], 'time')
310
            #         count_var.dims[0].attach_scale(ref['time'])
311
            #         count_var.dims.create_scale(grp['rgrid'], 'rgrid')
312
            #         count_var.dims[1].attach_scale(grp['rgrid'])
313
            #     except:
314
            #         count_var = grp['count']
315
            #         count_var.resize(self.time_index_in_output+1, axis=0)
316
            #
317
            #     count_var[time_step, :] = self.count
318

    
319
            for k in [name for name in self.variable_names if not name.endswith('_count')]:
320
                if k.endswith('_land'):
321
                    kk = k.replace('_land', '')
322
                elif k.endswith('_sea'):
323
                    kk = k.replace('_sea', '')
324
                else:
325
                    kk = k
326

    
327
                if self.variables_meta[kk]['noscanline']:
328
                    continue
329

    
330

    
331
                try:
332
                    var = grp[k]
333
                    var.resize(self.time_index_in_output+1, axis=0)
334
                except:
335
                    if self.time_index_in_output > 0:
336
                        continue
337
                    var = grp.create_dataset(k, (1, self.n_latitude), dtype=np.float32, maxshape=(None, self.n_latitude), **compress)
338
                    var.dims.create_scale(ref['time'], 'time')
339
                    var.dims[0].attach_scale(ref['time'])
340
                    var.dims.create_scale(grp['latitude'], 'latitude')
341
                    var.dims[1].attach_scale(grp['latitude'])
342

    
343
                try:
344
                    var.attrs['units'] = self.variables_meta[kk]['units']
345
                except (AttributeError, TypeError):
346
                    var.attrs['units'] = "1"
347

    
348
                try:
349
                    var.attrs['long_name'] = self.variables_meta[kk]['title']
350
                except (AttributeError, TypeError):
351
                    var.attrs['long_name'] = k
352
                try:
353
                    var.attrs['range'] = [float(v) for v in self.variables_meta[kk]['data_range']]
354
                except TypeError:
355
                    var.attrs['range'] = [np.min(self.variables[k]), np.max(self.variables[k])] if self.variables_meta[kk]['data_range'] else 'false'
356
                except AttributeError:
357
                    var.attrs['range'] = [np.min(self.variables[k]), np.max(self.variables[k])]
358
                try:
359
                    var.attrs['color_scale'] = self.variables_meta[kk]['color_scale']
360
                except (AttributeError, TypeError):
361
                    var.attrs['color_scale'] = "nipy_spectral"
362
                try:
363
                    var.attrs['log_range'] = str(self.variables_meta[kk]['log_range'])
364
                except:
365
                    var.attrs['log_range'] = "false"
366
                var.attrs['comment'] = 'Mean of variable {0}'.format(k)
367

    
368
                var[time_step, :] = self.variables[k]
369
                try:
370
                    cvar = grp[k+'_count']
371
                    cvar.resize(self.time_index_in_output+1, axis=0)
372
                except:
373
                    if self.time_index_in_output > 0:
374
                        continue
375
                    cvar = grp.create_dataset(k+'_count', (1, self.n_latitude), dtype=np.int32, maxshape=(None, self.n_latitude), **compress)
376
                    cvar.dims.create_scale(ref['time'], 'time')
377
                    cvar.dims[0].attach_scale(ref['time'])
378
                    cvar.dims.create_scale(grp['latitude'], 'latitude')
379
                    cvar.dims[1].attach_scale(grp['latitude'])
380
                cvar[time_step, :] = self.variables[k+'_count']
381

    
382
    def plot(self, varname, figure, ax=None, autorange=False, **kwargs):
383
        import matplotlib as mpl
384
        mpl.use('svg') # to allow running without X11
385
        mpl.rc('font', family='DejaVu Sans')
386
        import matplotlib.pyplot as plt
387
        import matplotlib.cm as cm
388
        from matplotlib.colors import Normalize, LogNorm
389
        import matplotlib.ticker as ticker
390
        import temis_color_tables
391

    
392
        if varname.endswith('_land') or varname.endswith('_sea'):
393
            return False
394

    
395
        cidx = (self.variables[varname + '_count'] != 0)
396
        self.variables[varname]
397

    
398
        self.logger.info("Plotting zonal average of %s", varname)
399

    
400
        if ax is None:
401
            ax = figure.add_subplot(1, 1, 1)
402

    
403
        has_negative = False
404
        n_points = 0
405
        for surface, trace in zip(('all', '_land', '_sea'), ('k-', 'r-', 'b-')):
406
            if surface == 'all':
407
                name = varname
408
            else:
409
                name = varname + surface
410
            cidx = (self.variables[name + '_count'] != 0)
411
            n_points += np.sum(cidx)
412

    
413
            self.logger.debug("{0}.plot(): found {1} valid points for {2}".format(self.__class__.__name__, np.sum(cidx), name))
414
            data = np.ma.asarray(self.variables[name])
415
            data.mask = np.logical_not(cidx)
416
            ax.plot(data, self.latitude_centers, trace, label=surface.strip('_'))
417

    
418
            has_negative = has_negative or np.any(data <= 0)
419

    
420
        if n_points == 0:
421
            self.logger.debug("{0}.plot(): found no valid points for {1}".format(self.__class__.__name__, varname))
422
            return False
423

    
424
        ax.set_xlabel(self.variables_meta[varname]['title'] if self.variables_meta[varname]['units'] == "1"
425
                      else "{0} [{1}]".format(self.variables_meta[varname]['title'],
426
                                              self.variables_meta[varname]['units']))
427
        ax.set_ylabel("Latitude [degrees]")
428
        if self.variables_meta[varname]['log_range']:
429
            if has_negative:
430
                self.logger.warning("Log scale requested on negative data in zonal mean plot of '%s' (ignoring request)", varname)
431
            else:
432
                ax.set_xscale('log')
433
        if not autorange:
434
            ax.set_xlim(self.variables_meta[varname]['data_range'])
435

    
436
        ax.set_ylim(-90.0, 90.0)
437
        ax.legend()
438
        return True
439

    
440
    def plot_zonal_mean(self, varname, figure, ax=None, autorange=False, **kwargs):
441
        return self.plot(varname, figure, ax=None, autorange=autorange, **kwargs)
442

    
443
    def zonal_mean(self, varname, surface='all'):
444
        if varname not in self.variables:
445
            self.logger.debug("Worldplot data for '%s' not found.", varname)
446
            return None
447

    
448
        if surface == 'land':
449
            zonal = np.ma.asarray(self.variables[varname+'_land'])
450
            zonal.mask = (self.variables[varname + '_land_count'] == 0)
451
        elif surface == 'sea':
452
            zonal = np.ma.asarray(self.variables[varname+'_sea'])
453
            zonal.mask = (self.variables[varname + '_sea_count'] == 0)
454
        else:
455
            zonal = np.ma.asarray(self.variables[varname])
456
            zonal.mask = (self.variables[varname + '_count'] == 0)
457

    
458
        if np.all(zonal.mask):
459
            return None
460

    
461
        rval = {'zonal_mean':zonal,
462
                'lat':self.latitude_centers,
463
                'lat_bounds':self.latitude_bounds}
464
        try:
465
            if surface == 'land':
466
                rval['title'] = self.variables_meta[varname]['title'] + ' (land)'
467
            elif surface == 'sea':
468
                rval['title'] = self.variables_meta[varname]['title'] + ' (sea)'
469
            else:
470
                rval['title'] = self.variables_meta[varname]['title']
471
        except KeyError:
472
            rval['title'] = varname
473

    
474
        try:
475
            rval['units'] = self.variables_meta[varname]['units']
476
        except KeyError:
477
            rval['units'] = "1"
478

    
479
        try:
480
            rval['range'] = self.variables_meta[varname]['data_range']
481
        except KeyError:
482
            rval['range'] = False
483

    
484
        try:
485
            rval['log_range'] = self.variables_meta[varname]['log_range']
486
        except KeyError:
487
            rval['log_range'] = False
488

    
489
        try:
490
            rval['colorscale'] = self.variables_meta[varname]['color_scale']
491
        except KeyError:
492
            rval['colorscale'] = "nipy_spectral"
493

    
494
        return rval