"""
General utilities.
"""
import functools
import numpy as np
import pandas as pd
def parse_engine(engine):
"""
Choose the best engine available and check if it's valid.
Parameters
----------
engine : str
The name of the engine. If ``"auto"`` will favor numba if it's available.
Returns
-------
engine : str
The name of the engine that should be used.
"""
engines = {"auto", "numba", "numpy"}
if engine not in engines:
raise ValueError("Invalid engine '{}'. Must be in {}.".format(engine, engines))
if engine == "auto":
try:
import numba # pylint: disable=unused-variable
return "numba"
except ImportError:
return "numpy"
return engine
def dummy_jit(**kwargs): # pylint: disable=unused-argument
"""
Replace numba.jit if not installed with a function that raises RunTimeError.
Use as a decorator.
Parameters
----------
function
A function that you would decorate with :func:`numba.jit`.
Returns
-------
function
A function that raises :class:`RunTimeError` warning that numba isn't installed.
"""
def dummy_decorator(function):
"The actual decorator"
@functools.wraps(function)
def dummy_function(*args, **kwargs): # pylint: disable=unused-argument
"Just raise an exception."
raise RuntimeError("Could not find numba.")
return dummy_function
return dummy_decorator
def n_1d_arrays(arrays, n):
"""
Get the first n elements from a tuple/list, make sure they are arrays, and ravel.
Parameters
----------
arrays : tuple of arrays
The arrays. Can be lists or anything that can be converted to a numpy array
(including numpy arrays).
n : int
How many arrays to return.
Returns
-------
1darrays : tuple of arrays
The converted 1D numpy arrays.
Examples
--------
>>> import numpy as np
>>> arrays = [np.arange(4).reshape(2, 2)]*3
>>> n_1d_arrays(arrays, n=2)
(array([0, 1, 2, 3]), array([0, 1, 2, 3]))
"""
return tuple(np.atleast_1d(i).ravel() for i in arrays[:n])
def check_data(data):
"""
Check the *data* argument and make sure it's a tuple.
If the data is a single array, return it as a tuple with a single element.
This is the default format accepted and used by all gridders and processing
functions.
Examples
--------
>>> check_data([1, 2, 3])
([1, 2, 3],)
>>> check_data(([1, 2], [3, 4]))
([1, 2], [3, 4])
"""
if not isinstance(data, tuple):
data = (data,)
return data
[docs]def variance_to_weights(variance, tol=1e-15, dtype="float64"):
"""
Converts data variances to weights for gridding.
Weights are defined as the inverse of the variance, scaled to the range
[0, 1], i.e. ``variance.min()/variance``.
Any variance that is smaller than *tol* will automatically receive a weight
of 1 to avoid zero division or blown up weights.
Parameters
----------
variance : array or tuple of arrays
An array with the variance of each point. If there are multiple arrays
in a tuple, will calculated weights for each of them separately. Can
have NaNs but they will be converted to zeros and therefore receive a
weight of 1.
tol : float
The tolerance, or cutoff threshold, for small variances.
dtype : str or numpy dtype
The type of the output weights array.
Returns
-------
weights : array or tuple of arrays
Data weights in the range [0, 1] with the same shape as *variance*. If
more than one variance array was provided, then this will be a tuple
with the weights corresponding to each variance array.
Examples
--------
>>> print(variance_to_weights([0, 2, 0.2, 1e-16]))
[1. 0.1 1. 1. ]
>>> print(variance_to_weights([0, 0, 0, 0]))
[1. 1. 1. 1.]
>>> for w in variance_to_weights(([0, 1, 10], [2, 4.0, 8])):
... print(w)
[1. 1. 0.1]
[1. 0.5 0.25]
"""
variance = check_data(variance)
weights = []
for var in variance:
var = np.nan_to_num(np.atleast_1d(var), copy=False)
w = np.ones_like(var, dtype=dtype)
nonzero = var > tol
if np.any(nonzero):
nonzero_var = var[nonzero]
w[nonzero] = nonzero_var.min() / nonzero_var
weights.append(w)
if len(weights) == 1:
return weights[0]
return tuple(weights)
[docs]def maxabs(*args):
"""
Calculate the maximum absolute value of the given array(s).
Use this to set the limits of your colorbars and center them on zero.
Parameters
----------
args
One or more arrays. If more than one are given, a single maximum will be
calculated across all arrays.
Returns
-------
maxabs : float
The maximum absolute value across all arrays.
Examples
--------
>>> maxabs((1, -10, 25, 2, 3))
25
>>> maxabs((1, -10.5, 25, 2), (0.1, 100, -500), (-200, -300, -0.1, -499))
500.0
"""
arrays = [np.atleast_1d(i) for i in args]
absolute = [np.abs([i.min(), i.max()]).max() for i in arrays]
return np.max(absolute)
[docs]def grid_to_table(grid):
"""
Convert a grid to a table with the values and coordinates of each point.
Takes a 2D grid as input, extracts the coordinates and runs them through
:func:`numpy.meshgrid` to create a 2D table. Works for 2D grids and any number of
variables. Use cases includes passing gridded data to functions that expect data in
XYZ format, such as :class:`verde.BlockReduce`
Parameters
----------
grid : :class:`xarray.Dataset`
A 2D grid with one or more data variables.
Returns
-------
table : :class:`pandas.DataFrame`
Table with coordinates and variable values for each point in the grid.
Examples
--------
>>> import xarray as xr
>>> import numpy as np
>>> # Create a sample grid with a single data variable
>>> temperature = xr.DataArray(
... np.arange(20).reshape((4, 5)),
... coords=(np.arange(4), np.arange(5, 10)),
... dims=['northing', 'easting']
... )
>>> grid = xr.Dataset({"temperature": temperature})
>>> table = grid_to_table(grid)
>>> list(sorted(table.columns))
['easting', 'northing', 'temperature']
>>> print(table.northing.values)
[0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3]
>>> print(table.easting.values)
[5 6 7 8 9 5 6 7 8 9 5 6 7 8 9 5 6 7 8 9]
>>> print(table.temperature.values)
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]
>>> # Grids with multiple data variables will have more columns.
>>> wind_speed = xr.DataArray(
... np.arange(20, 40).reshape((4, 5)),
... coords=(np.arange(4), np.arange(5, 10)),
... dims=['northing', 'easting']
... )
>>> grid['wind_speed'] = wind_speed
>>> table = grid_to_table(grid)
>>> list(sorted(table.columns))
['easting', 'northing', 'temperature', 'wind_speed']
>>> print(table.northing.values)
[0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3]
>>> print(table.easting.values)
[5 6 7 8 9 5 6 7 8 9 5 6 7 8 9 5 6 7 8 9]
>>> print(table.temperature.values)
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]
>>> print(table.wind_speed.values)
[20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39]
"""
coordinate_names = [*grid.coords.keys()]
coord_north = grid.coords[coordinate_names[0]].values
coord_east = grid.coords[coordinate_names[1]].values
coordinates = [i.ravel() for i in np.meshgrid(coord_east, coord_north)]
coord_dict = {
coordinate_names[0]: coordinates[1],
coordinate_names[1]: coordinates[0],
}
variable_name = [*grid.data_vars.keys()]
variable_data = grid.to_array().values
variable_arrays = variable_data.reshape(
len(variable_name), int(len(variable_data.ravel()) / len(variable_name))
)
var_dict = dict(zip(variable_name, variable_arrays))
coord_dict.update(var_dict)
data = pd.DataFrame(coord_dict)
return data