correldata
Dataframe-like tables of data with correlated uncertainties
These data are stored in a dictionary, whose values are numpy arrays with elements which may be strings, floats, or floats with associated uncertainties as defined in the uncertainties library.
1""" 2Dataframe-like tables of data with correlated uncertainties 3 4These data are stored in a dictionary, whose values are numpy arrays 5with elements which may be strings, floats, or floats with associated uncertainties 6as defined in the [uncertainties](https://pypi.org/project/uncertainties) library. 7""" 8 9import os as _os 10import numpy as _np 11import warnings as _wrn 12import uncertainties as _uc 13 14from typing import Callable, Hashable, Any 15from uncertainties.unumpy import nominal_values as nv 16 17from ._metadata import * 18 19 20class MissingNominalValue(Exception): 21 "Exception raised in case of missing nominal value(s)" 22 pass 23 24class MissingStandardError(Exception): 25 "Exception raised in case of missing standard error(s)" 26 pass 27 28class RedundantUncertainty(Exception): 29 "Exception raised in case of redundant/ambiguous specification of uncertainties" 30 pass 31 32 33nv = nv 34"Alias for [`uncertainties.unumpy.nominal_values()`](https://pythonhosted.org/uncertainties/numpy_guide.html#uncertainties-and-nominal-values)" 35 36 37def smart_type(s: str) -> (int | float | str): 38 ''' 39 Tries to convert string `s` to an `int`, or to an `float` if that fails. 40 If both fail, return the original string unchanged. 41 ''' 42 if s.isdigit(): return int(s) 43 try: return float(s) 44 except: pass 45 return s 46 47 48def is_symmetric_positive_semidefinite(M: _np.ndarray) -> bool: 49 "Test whether 2-D array `M` is symmetric and positive semidefinite." 50 ev = _np.linalg.eigvals(M) 51 return ( 52 _np.allclose(M, M.T) # M is symmetric 53 and _np.all( 54 (ev > 0) | _np.isclose(ev, 0) 55 ) # all eignevalues are either real and strictly positive or close to zero 56 ) 57 58 59def f2s( 60 x: Any, 61 f: (str | Callable | dict), 62 k: Hashable = None, 63 fb: (str | Callable) = 'z.6g', 64) -> str: 65 ''' 66 Format `x` according to format `f` 67 68 * If `f` is a string, return `f'{x:{f}}'` 69 * If `f` is a callable, return `f(x)` 70 * If `f` is a dict and optional argument `k` is a hashable, 71 return f2s(x, f[k]), otherwise return f2s(x, fb) 72 ''' 73 if isinstance (x, str): 74 return x 75 if isinstance (f, str): 76 return f'{x:{f}}' 77 if isinstance (f, Callable): 78 return f(x) 79 if isinstance (f, dict): 80 if k in f: 81 return f2s(x, f[k]) 82 if isinstance (fb, str): 83 return f'{x:{fb}}' 84 if isinstance (fb, Callable): 85 return fb(x) 86 raise TypeError(f'f2s() formatting argument f = {repr(f)} is neither a string nor a callable nor a dict.') 87 88 89def read_list( 90 data: list, 91): 92 """ 93 Read data from a list of dicts and return a `CorrelData` instance. 94 95 Valid arguments are lists of dicts where each dict share a non-empty set of keys, 96 i.e. there must be one of more keys that all dicts have in common. 97 98 > [!NOTE] 99 > Primarily intended for data where uncertainties are already specified as 100 > [UFloat](https://pythonhosted.org/uncertainties/tech_guide.html) values. 101 > In other words, this function offers no built-in way to specify uncertainties 102 > (no keywords such as `SE`, `correl`, or `covar`). 103 104 > [!TIP] 105 > **Example** 106 > 107 > ```py 108 > import correldata 109 > 110 > foo = correldata.CorrelData( 111 > X = [1., 2., 3.], 112 > SE_X = [1., 1., 1.], 113 > Y = [4., 5., 6.], 114 > SE_Y = [1., 1., 1.], 115 > ) 116 > 117 > U = foo['X'] + foo['Y'] 118 > V = foo['X'] - foo['Y'] 119 > 120 > bar = correldata.read_list([ 121 > dict(Name = 'abc', U = U[0], V = V[0]), 122 > dict(Name = 'def', U = U[1], V = V[1]), 123 > dict(Name = 'ghi', U = U[2], V = V[2]), 124 > ]) 125 > 126 > print(bar.str()) 127 > ``` 128 > yields: 129 > ```text 130 > Name, U, SE_U, V, SE_V 131 > abc, 5, 1.41421, -3, 1.41421 132 > def, 7, 1.41421, -3, 1.41421 133 > ghi, 9, 1.41421, -3, 1.41421 134 > ``` 135 """ 136 if len(data) == 0: 137 raise _wrn.warn("Input list is empty; returning None.") 138 return None 139 140 shared_keys = [k for k in data[0]] 141 for row in data[1:]: 142 shared_keys = [k for k in shared_keys if k in row] 143 144 if len(shared_keys) == 0: 145 raise _wrn.warn("No common subset of keys; returning None.") 146 return None 147 148 data_dict = {} 149 for k in shared_keys: 150 data_dict[k] = [row[k] for row in data] 151 152 return CorrelData(data_dict) 153 154 155def read_str( 156 data: str, 157 sep: str = ',', 158 validate_covar: bool = True, 159): 160 """ 161 Read data from a CSV-like string and return a `CorrelData` instance. 162 163 Column names are interpreted in the following way: 164 * In most cases, each columns is converted to a dict value, with the corresponding 165 dict key being the column's label. 166 * Columns whose label starts with `SE` are interpreted as specifying the standard 167 error for the latest preceding data column. 168 * Columns whose label starts with `correl` are interpreted as specifying the 169 correlation matrix for the latest preceding data column. In that case, column labels 170 are ignored for the rest of the columns belonging to this matrix. 171 * Columns whose label starts with `covar` are interpreted as specifying the 172 covariance matrix for the latest preceding data column. In that case, column labels 173 are ignored for the rest of the columns belonging to this matrix. 174 * `SE`, `correl`, and `covar` may be specified for any arbitrary variable other than 175 the latest preceding data column, by adding an underscore followed by the variable's 176 label (ex: `SE_foo`, `correl_bar`, `covar_baz`). 177 * `correl`, and `covar` may also be specified for any pair of variable, by adding an 178 underscore followed by the two variable labels, joined by a second underscore 179 (ex: `correl_foo_bar`, `covar_X_Y`). The elements of the first and second variables 180 correspond, respectively, to the lines and columns of this matrix. 181 * Exceptions will be raised, for any given variable: 182 - when specifying both `covar` and any combination of (`SE`, `correl`) 183 - when specifying `correl` without `SE` 184 185 **Arguments** 186 - `data`: a CSV-like string 187 - `sep`: the CSV separator 188 - `validate_covar`: whether to check that the overall covariance matrix 189 is symmetric and positive semidefinite. Specifying `validate_covar = False` 190 bypasses this computationally expensive step. 191 192 **Example** 193 ```py 194 import correldata 195 196 data = ''' 197 Sample, Tacid, D47, SE, correl,,, D48, covar,,, correl_D47_D48 198 FOO, 90., .245, .005, 1, 0.5, 0.5, .145, 4e-4, 1e-4, 1e-4, 0.5, 0, 0 199 BAR, 90., .246, .005, 0.5, 1, 0.5, .146, 1e-4, 4e-4, 1e-4, 0, 0.5, 0 200 BAZ, 90., .247, .005, 0.5, 0.5, 1, .147, 1e-4, 1e-4, 4e-4, 0, 0, 0.5 201 '''[1:-1] 202 203 print(correldata.read_str(data)) 204 ``` 205 yields: 206 ``` 207 { 208 'Sample': array(['FOO', 'BAR', 'BAZ'], dtype='<U3'), 209 'Tacid': array([90., 90., 90.]), 210 'D47': uarray([0.245+/-0.005, 0.246+/-0.005, 0.247+/-0.005], dtype=object), 211 'D48': uarray([0.145+/-0.02, 0.146+/-0.02, 0.147+/-0.02], dtype=object) 212 } 213 ``` 214 """ 215 216 data = [[smart_type(e.strip()) for e in l.split(sep)] for l in data.split('\n')] 217 N = len(data) - 1 218 219 values, se, correl, covar = {}, {}, {}, {} 220 j = 0 221 while j < len(data[0]): 222 field = data[0][j] 223 if not ( 224 field.startswith('SE_') 225 or field.startswith('correl_') 226 or field.startswith('covar_') 227 or field == 'SE' 228 or field == 'correl' 229 or field == 'covar' 230 or len(field) == 0 231 ): 232 values[field] = _np.array([l[j] for l in data[1:]]) 233 j += 1 234 oldfield = field 235 elif field.startswith('SE_'): 236 se[field[3:]] = _np.array([l[j] for l in data[1:]]) 237 j += 1 238 elif field == 'SE': 239 se[oldfield] = _np.array([l[j] for l in data[1:]]) 240 j += 1 241 elif field.startswith('correl_'): 242 correl[field[7:]] = _np.array([l[j:j+N] for l in data[1:]]) 243 j += N 244 elif field == 'correl': 245 correl[oldfield] = _np.array([l[j:j+N] for l in data[1:]]) 246 j += N 247 elif field.startswith('covar_'): 248 covar[field[6:]] = _np.array([l[j:j+N] for l in data[1:]]) 249 j += N 250 elif field == 'covar': 251 covar[oldfield] = _np.array([l[j:j+N] for l in data[1:]]) 252 j += N 253 254 nakedvalues = {} 255 for k in [_ for _ in values]: 256 if ( 257 k not in se 258 and k not in correl 259 and k not in covar 260 ): 261 nakedvalues[k] = values.pop(k) 262 263 for x in values: 264 if x in covar: 265 if x in se: 266 raise KeyError(f'Too much information: both SE and covar are specified for variable "{x}".') 267 if x in correl: 268 raise KeyError(f'Too much information: both correl and covar are specified for variable "{x}".') 269 if x in correl: 270 if x not in se: 271 raise KeyError(f'Not enough information: correl is specified without SE for variable "{x}".') 272 273 for x in correl: 274 if x in values: 275 covar[x] = _np.diag(se[x]) @ correl[x] @ _np.diag(se[x]) 276 else: 277 for x1 in values: 278 for x2 in values: 279 if x == f'{x1}_{x2}': 280 if x1 in se: 281 se1 = se[x1] 282 else: 283 if x1 in covar: 284 se1 = _np.diag(covar[x1])**0.5 285 else: 286 raise KeyError(f'Not enough information: correl_{x} is specified without SE for variable "{x1}".') 287 if x2 in se: 288 se2 = se[x2] 289 else: 290 if x2 in covar: 291 se2 = _np.diag(covar[x2])**0.5 292 else: 293 raise KeyError(f'Not enough information: correl_{x} is specified without SE for variable "{x1}".') 294 295 covar[x] = _np.diag(se1) @ correl[x] @ _np.diag(se2) 296 297 for x in se: 298 if x in values and x not in correl: 299 covar[x] = _np.diag(se[x]**2) 300 301 for k in [_ for _ in covar]: 302 if k not in values: 303 for j1 in values: 304 for j2 in values: 305 if k == f'{j1}_{j2}': 306 covar[f'{j2}_{j1}'] = covar[f'{j1}_{j2}'].T 307 308 X = _np.array([_ for k in values for _ in values[k]]) 309 CM = _np.zeros((X.size, X.size)) 310 for i, vi in enumerate(values): 311 for j, vj in enumerate(values): 312 if vi == vj: 313 if vi in covar: 314 CM[N*i:N*i+N,N*j:N*j+N] = covar[vi] 315 else: 316 if f'{vi}_{vj}' in covar: 317 CM[N*i:N*i+N,N*j:N*j+N] = covar[f'{vi}_{vj}'] 318 319 s = _np.diag(CM)**.5 320 s[s==0] = 1. 321 invs = _np.diag(s**-1) 322 323 if ( 324 validate_covar 325 and not ( 326 is_symmetric_positive_semidefinite(CM) 327 or is_symmetric_positive_semidefinite(invs @ CM @ invs) 328 ) 329 ): 330 raise _np.linalg.LinAlgError('The complete covariance matrix is not symmetric positive-semidefinite.') 331 332 corvalues = uarray(_uc.correlated_values(X, CM)) 333 334 allvalues = nakedvalues 335 336 for i, x in enumerate(values): 337 allvalues[x] = corvalues[i*N:i*N+N] 338 339 return CorrelData(allvalues) 340 341 342def read_csv( 343 filename: str | _os.PathLike, 344 **kwargs, 345): 346 """ 347 Read correlated data from a CSV file. 348 349 **Arguments** 350 - `filename`: `str` or path to the file to read from 351 - `kwargs`: passed to correldata.read_str() 352 """ 353 with open(filename) as fid: 354 return read_str(fid.read(), **kwargs) 355 356 357class uarray(_np.ndarray): 358 """ 359 1-D [ndarray](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html) 360 of [UFloat](https://pythonhosted.org/uncertainties/tech_guide.html) values 361 """ 362 363 def __new__(cls, a): 364 obj = _np.asarray(a).view(cls) 365 return obj 366 367 @property 368 def nv(self): 369 "Return the array of nominal values (read-only)." 370 return _uc.unumpy.nominal_values(_np.array(self)) 371 372 @property 373 def se(self): 374 "Return the array of standard errors (read-only)" 375 return _uc.unumpy.std_devs(_np.array(self)) 376 377 @property 378 def correl(self): 379 "Return the correlation matrix of the array elements (read-only)" 380 return _np.array(_uc.correlation_matrix(self)) 381 382 @property 383 def covar(self): 384 """Return the covariance matrix of the array elements (read-only)""" 385 return _np.array(_uc.covariance_matrix(self)) 386 387 @property 388 def mahalanobis(self): 389 "Return the squared Mahalanobis distance from zero of the array (read-only)" 390 flatself = self.n.flatten().reshape((1, self.size)) 391 return (flatself @ _np.linalg.inv(self.covar) @ flatself.T)[0,0] 392 393 n = nv 394 "Alias for `uarray.nv`" 395 396 s = se 397 "Alias for `uarray.se`" 398 399 cor = correl 400 "Alias for `uarray.correl`" 401 402 cov = covar 403 "Alias for `uarray.covar`" 404 405 m = mahalanobis 406 "Alias for `uarray.mahalanobis`" 407 408 409class CorrelData(dict): 410 """ 411 Dataframe-like tables of data with correlated uncertainties 412 """ 413 414 def __init__(self, *args, **kwargs): 415 """ 416 **Arguments:** same as for a `dict()` 417 """ 418 super().__init__(*args, **kwargs) 419 for k in self: 420 # cast as array 421 self[k] = _np.asarray(self[k]) 422 # cast as uarray if ufloats are present 423 if any([ 424 isinstance(_, _uc.UFloat) 425 for _ in self[k] 426 ]): 427 self[k] = uarray(self[k]) 428 429 # check that lengths are consistent 430 firstk = next(iter(self)) 431 n = len(self[firstk]) 432 for k in self: 433 assert self[k].shape in [(n,), (n, n)], f'{k}.shape is {self[k].shape} and not ({n}, {n}) as expected' 434 435 # sort keys for uncertainty assignment 436 keys, skeys, corkeys, covkeys = [], [], [], [] 437 for k in self: 438 if k.startswith('SE_'): 439 skeys.append(k[3:]) 440 elif k.startswith('correl_'): 441 corkeys.append(k[7:]) 442 elif k.startswith('covar_'): 443 covkeys.append(k[6:]) 444 else: 445 keys.append(k) 446 447 for k in covkeys: 448 # check for missing nominal values 449 if k not in keys: 450 raise MissingNominalValue(f'covar_{k} is missing a corresponding nominal value {k}') 451 # check for redundant specification of uncertainty 452 if k in corkeys: 453 raise RedundantUncertainty(f'Both covar_{k} and correl_{k} are specified') 454 if k in skeys: 455 raise RedundantUncertainty(f'Both covar_{k} and SE_{k} are specified') 456 457 for k in corkeys: 458 # check for correl without SE 459 if k not in skeys: 460 raise MissingStandardError(f'correl_{k} is missing a corresponding standard error SE_{k}') 461 462 for k in skeys: 463 # check for missing nominal values 464 if k not in keys: 465 raise MissingNominalValue(f'SE_{k} is missing a corresponding nominal value {k}') 466 467 for k in covkeys: 468 self[k] = uarray(_uc.correlated_values(self[k], self[f'covar_{k}'])) 469 self.pop(f'covar_{k}') 470 471 for k in skeys: 472 se = _np.array(self[f'SE_{k}']) 473 if k in corkeys: 474 correl = _np.array(self[f'correl_{k}']) 475 self.pop(f'correl_{k}') 476 else: 477 correl = _np.eye(len(self[k])) 478 covar = se[None,:] * correl * se[:, None] 479 self[k] = uarray(_uc.correlated_values(self[k], covar)) 480 self.pop(f'SE_{k}') 481 482 @property 483 def size(self): 484 "Returns the number of data rows" 485 k = next(iter(self)) 486 return len(self[k]) 487 488 @property 489 def rows(self): 490 """ 491 Iterator over rows of data 492 493 **Usage:** 494 495 ```py 496 import correldata, numpy 497 498 data = correldata.CorrelData( 499 X = numpy.array([1, 2, 3]), 500 Y = numpy.array([4, 5, 6]), 501 ) 502 503 for r in data.rows: 504 print(r) 505 ``` 506 yields: 507 ``` 508 >>> {'X': np.int64(1), 'Y': np.int64(4)} 509 >>> {'X': np.int64(2), 'Y': np.int64(5)} 510 >>> {'X': np.int64(3), 'Y': np.int64(6)} 511 ``` 512 """ 513 return self._row_iterator() 514 515 def _row_iterator(self): 516 n = next(iter(self.values())).shape[0] 517 for i in range(n): 518 yield {k: v[i] for k, v in self.items()} 519 520 def str( 521 self, 522 sep: str = ',', 523 include_fields: list = None, 524 exclude_fields: list = [], 525 float_format: (str | dict | Callable) = 'z.6g', 526 correl_format: (str | dict | Callable) = 'z.6f', 527 default_float_format: (str | Callable) = 'z.6g', 528 default_correl_format: (str | Callable) = 'z.6f', 529 show_nv: bool = True, 530 show_se: bool = True, 531 show_correl: bool = True, 532 show_mixed_correl: bool = True, 533 align: str = '>', 534 atol: float = 1e-12, 535 rtol: float = 1e-12, 536 ): 537 ''' 538 Return CSV-like string 539 540 **Arguments** 541 - `sep`: the CSV separator 542 - `include_fields`: subset of fields to write; if `None`, write all fields 543 - `exclude_fields`: subset of fields to ignore (takes precedence over `include_fields`); 544 to exclude only the SE for field `foo`, include `SE_foo`; same goes for `correl_foo` 545 - `float_format`: formatting for float values. May be a string (ex: `'z.3f'`), a callable 546 (ex: `lambda x: '.2f' if x else '0'`), or a dictionary of strings and/or callables, with dict keys 547 corresponding to different fields (ex: `{'foo': '.2e', 'bar': (lambda x: str(x))}`). 548 - `correl_format`: same as `float_format`, but applies to correlation matrix elements 549 - `default_float_format`: only used when `float_format` is a dict; in that case, fields 550 missing from `float_format.keys()` will use `default_float_format` instead. 551 corresponding to different fields (ex: `{'foo': '.2e', 'bar': `lambda x: str(x)`}`). 552 - `default_correl_format`: same as `default_float_format`, but applies to `correl_format` 553 - `show_nv`: show nominal values 554 - `show_se`: show standard errors 555 - `show_correl`: show correlations for any given field (ex: `correl_X`) 556 - `show_mixed_correl`: show correlations between different fields (ex: `correl_X_Y`) 557 - `align`: right-align (`>`), left-align (`<`), or don't align (empty string) CSV values 558 - `atol`: passed to [numpy.allclose()](https://numpy.org/doc/stable/reference/generated/numpy.allclose.html) 559 when deciding whether a matrix is equal to the identity matrix or to the zero matrix 560 - `rtol`: passed to [numpy.allclose()](https://numpy.org/doc/stable/reference/generated/numpy.allclose.html) 561 when deciding whether a matrix is equal to the identity matrix or to the zero matrix 562 563 564 **Example** 565 566 ```py 567 from correldata import uarray, CorrelData 568 569 X = uarray(_uc.correlated_values([1., 2., 3.], _np.eye(3)*0.09)) 570 Y = uarray(_uc.correlated_values([4., 5., 6.], _np.eye(3)*0.16)) 571 572 data = CorrelData( 573 X = X, 574 Y = Y, 575 Z = X+Y, 576 ) 577 578 print( 579 data.str( 580 float_format = 'z.1f', 581 correl_format = 'z.1f', 582 ) 583 ) 584 ``` 585 yields: 586 ``` 587 X, SE_X, Y, SE_Y, Z, SE_Z, correl_X_Z, , , correl_Y_Z, , 588 1.0, 0.3, 4.0, 0.4, 5.0, 0.5, 0.6, 0.0, 0.0, 0.8, 0.0, 0.0 589 2.0, 0.3, 5.0, 0.4, 7.0, 0.5, 0.0, 0.6, 0.0, 0.0, 0.8, 0.0 590 3.0, 0.3, 6.0, 0.4, 9.0, 0.5, 0.0, 0.0, 0.6, 0.0, 0.0, 0.8 591 ``` 592 ''' 593 if include_fields is None: 594 include_fields = [_ for _ in self] 595 cols, ufields = [], [] 596 for f in include_fields: 597 if f in exclude_fields: 598 continue 599 if isinstance(self[f], uarray): 600 ufields.append(f) 601 N = self[f].size 602 if show_nv: 603 cols.append([f] + [f2s(_, float_format, f, default_float_format) for _ in self[f].n]) 604 if show_se and (f'SE_{f}' not in exclude_fields): 605 cols.append([f'SE_{f}'] + [f2s(_, float_format, f, default_float_format) for _ in self[f].s]) 606 if show_correl and (f'correl_{f}' not in exclude_fields): 607 CM = _uc.correlation_matrix(self[f]) 608 if not _np.allclose(CM, _np.eye(N), atol = atol, rtol = rtol): 609 for i in range(N): 610 cols.append( 611 ['' if i else f'correl_{f}'] 612 + [ 613 f2s( 614 CM[i,j], 615 correl_format, 616 f, 617 default_correl_format, 618 ) 619 for j in range(N) 620 ] 621 ) 622 elif show_nv: 623 cols.append([f] + [f2s(_, float_format, f, default_float_format) for _ in self[f]]) 624 625 if show_mixed_correl: 626 for i in range(len(ufields)): 627 for j in range(i): 628 if f'correl_{ufields[i]}_{ufields[j]}' in exclude_fields or f'correl_{ufields[j]}_{ufields[i]}' in exclude_fields: 629 continue 630 CM = _uc.correlation_matrix((*self[ufields[i]], *self[ufields[j]]))[:N, -N:] 631 if not _np.allclose(CM, _np.zeros((N, N)), atol = atol, rtol = rtol): 632 for k in range(N): 633 cols.append( 634 ['' if k else f'correl_{ufields[j]}_{ufields[i]}'] 635 + [ 636 f2s( 637 CM[k,l], 638 correl_format, 639 f, 640 default_correl_format, 641 ) 642 for l in range(N) 643 ] 644 ) 645 646 lines = list(map(list, zip(*cols))) 647 648 if align: 649 lengths = [max([len(e) for e in l]) for l in cols] 650 for l in lines: 651 for k,ln in enumerate(lengths): 652 l[k] = f'{l[k]:{align}{ln}s}' 653 return '\n'.join([(sep+' ').join(l) for l in lines]) 654 655 return '\n'.join([sep.join(l) for l in lines]) 656 657 def to_csv(self, filename, **kwargs): 658 ''' 659 Write data to a CSV file. 660 661 **Arguments** 662 - `filename`: `str` or path to the CSV file 663 - `kwargs`: passed to `CorrelData.str()` 664 ''' 665 with open(filename, 'w') as fid: 666 return fid.write(self.str(**kwargs)) 667 668 669def as_uarray( 670 X: (uarray | _np.ndarray | _uc.UFloat | float), 671 Xse: (_np.ndarray | float | None) = None, 672 CM: (_np.ndarray | None) = None, 673) -> uarray: 674 """ 675 Convert the input to an uarray. If the input is a single float or 676 [UFloat](https://pythonhosted.org/uncertainties/tech_guide.html), 677 yields an uarray of size 1. 678 679 **Arguments** 680 * `X`: nominal value(s) 681 * `CM`: covariance matrix of X; not needed if elements of X are of type 682 [`UFloat`](https://pythonhosted.org/uncertainties/tech_guide.html) 683 or if `Xse` is specified. 684 * `Xse`,: SE of X; not needed if elements of X are of type 685 [`UFloat`](https://pythonhosted.org/uncertainties/tech_guide.html) 686 or if `CM` is specified. 687 688 If neither `CM` nor `Xse` are specified, assume SE = 0. 689 """ 690 691 if isinstance(X, uarray): 692 return X 693 694 if isinstance(X, _np.ndarray): 695 if _np.all([isinstance(_, _uc.UFloat) for _ in X]): 696 return uarray(X) 697 else: 698 X = X.astype(float) 699 700 if CM is not None: 701 if Xse is not None: raise ValueError('Too much information: Xse is redundant because CM is already specified.') 702 703 if CM is None: 704 if Xse is None: 705 Xse = X * 0 706 707 CM = _np.diag((*Xse,))**2 708 709 return uarray(_uc.correlated_values(X, CM)) 710 711 if isinstance(X, _uc.UFloat): 712 return uarray([X]) 713 714 if isinstance(X, (float, int)): 715 716 if CM is not None: 717 if Xse is not None: raise ValueError('Too much information: Xse is redundant because CM is already specified.') 718 Xse = CM[0,0]**0.5 719 720 return uarray([_uc.ufloat(X, Xse)]) 721 722 723def as_pair_of_uarrays( 724 X: (uarray | _np.ndarray | _uc.UFloat | float), 725 Y: (uarray | _np.ndarray | _uc.UFloat | float), 726 Xse: (_np.ndarray | float | None) = None, 727 Yse: (_np.ndarray | float | None) = None, 728 CM: (_np.ndarray | None) = None, 729) -> uarray: 730 """ 731 Convert the input to a pair of uarrays. 732 733 **Arguments** 734 * `X`: x values 735 * `Y`: y values 736 * `CM`: covariance matrix of `(*X, *Y)`; not needed if elements of X and Y are of type 737 [`uncertainties.UFloat`](https://pythonhosted.org/uncertainties/tech_guide.html) 738 or if (`Xse`, `Yse`) are specified. 739 * `Xse`, `Yse`: SE of X and Y; not needed if elements of X and Y are of type 740 [`uncertainties.UFloat`](https://pythonhosted.org/uncertainties/tech_guide.html) 741 or if `CM` is specified. 742 743 If neither `CM`, `Xse` nor `Yse` are specified, assume SE = 0. 744 """ 745 746 if type(X) is not type(Y): 747 raise TypeError(f'X ({type(X)}) and Y ({type(Y)}) must have the same type.') 748 749 if isinstance(X, uarray): 750 return (X, Y) 751 752 if isinstance(X, _np.ndarray): 753 if ( 754 _np.all([isinstance(_, _uc.UFloat) for _ in X]) 755 and 756 _np.all([isinstance(_, _uc.UFloat) for _ in Y]) 757 ): 758 return uarray(X), uarray(Y) 759 else: 760 X = X.astype(float) 761 Y = Y.astype(float) 762 763 if CM is not None: 764 if Xse is not None: raise ValueError('Too much information: Xse is redundant because CM is already specified.') 765 if Yse is not None: raise ValueError('Too much information: Yse is redundant because CM is already specified.') 766 767 if CM is None: 768 if Xse is None: 769 Xse = X * 0 770 if Yse is None: 771 Yse = Y * 0 772 773 CMx = _np.diag((*Xse,))**2 774 CMy = _np.diag((*Yse,))**2 775 return uarray(_uc.correlated_values(X, CMx)), uarray(_uc.correlated_values(Y, CMy)) 776 777 else: 778 XY = uarray(_uc.correlated_values([*X, *Y], CM)) 779 return XY[:X.size], XY[X.size:] 780 781 if isinstance(X, _uc.UFloat): 782 return uarray([X]), uarray([Y]) 783 784 if isinstance(X, (float, int)): 785 786 if CM is not None: 787 if Xse is not None: raise ValueError('Too much information: Xse is redundant because CM is already specified.') 788 if Yse is not None: raise ValueError('Too much information: Yse is redundant because CM is already specified.') 789 790 if CM is None: 791 if Xse is None: raise ValueError('Not enough information: specify either CM or Xse.') 792 if Yse is None: raise ValueError('Not enough information: specify either CM or Yse.') 793 794 CM = _np.diag([Xse, Yse])**2 795 796 XY = uarray(_uc.correlated_values([X, Y], CM)) 797 return XY[:1], XY[1:]
21class MissingNominalValue(Exception): 22 "Exception raised in case of missing nominal value(s)" 23 pass
Exception raised in case of missing nominal value(s)
25class MissingStandardError(Exception): 26 "Exception raised in case of missing standard error(s)" 27 pass
Exception raised in case of missing standard error(s)
29class RedundantUncertainty(Exception): 30 "Exception raised in case of redundant/ambiguous specification of uncertainties" 31 pass
Exception raised in case of redundant/ambiguous specification of uncertainties
87def nominal_values(arr): 88 """ 89 Return the nominal values of the numbers in NumPy array arr. 90 91 Elements that are not numbers with uncertainties (derived from a 92 class from this module) are passed through untouched (because a 93 numpy.array can contain numbers with uncertainties and pure floats 94 simultaneously). 95 96 If arr is of type unumpy.matrix, the returned array is a 97 numpy.matrix, because the resulting matrix does not contain 98 numbers with uncertainties. 99 """ 100 101 return unumpy_to_numpy_matrix(to_nominal_values(arr))
Alias for uncertainties.unumpy.nominal_values()
38def smart_type(s: str) -> (int | float | str): 39 ''' 40 Tries to convert string `s` to an `int`, or to an `float` if that fails. 41 If both fail, return the original string unchanged. 42 ''' 43 if s.isdigit(): return int(s) 44 try: return float(s) 45 except: pass 46 return s
Tries to convert string s to an int, or to an float if that fails.
If both fail, return the original string unchanged.
49def is_symmetric_positive_semidefinite(M: _np.ndarray) -> bool: 50 "Test whether 2-D array `M` is symmetric and positive semidefinite." 51 ev = _np.linalg.eigvals(M) 52 return ( 53 _np.allclose(M, M.T) # M is symmetric 54 and _np.all( 55 (ev > 0) | _np.isclose(ev, 0) 56 ) # all eignevalues are either real and strictly positive or close to zero 57 )
Test whether 2-D array M is symmetric and positive semidefinite.
60def f2s( 61 x: Any, 62 f: (str | Callable | dict), 63 k: Hashable = None, 64 fb: (str | Callable) = 'z.6g', 65) -> str: 66 ''' 67 Format `x` according to format `f` 68 69 * If `f` is a string, return `f'{x:{f}}'` 70 * If `f` is a callable, return `f(x)` 71 * If `f` is a dict and optional argument `k` is a hashable, 72 return f2s(x, f[k]), otherwise return f2s(x, fb) 73 ''' 74 if isinstance (x, str): 75 return x 76 if isinstance (f, str): 77 return f'{x:{f}}' 78 if isinstance (f, Callable): 79 return f(x) 80 if isinstance (f, dict): 81 if k in f: 82 return f2s(x, f[k]) 83 if isinstance (fb, str): 84 return f'{x:{fb}}' 85 if isinstance (fb, Callable): 86 return fb(x) 87 raise TypeError(f'f2s() formatting argument f = {repr(f)} is neither a string nor a callable nor a dict.')
Format x according to format f
- If
fis a string, returnf'{x:{f}}' - If
fis a callable, returnf(x) - If
fis a dict and optional argumentkis a hashable, return f2s(x, f[k]), otherwise return f2s(x, fb)
90def read_list( 91 data: list, 92): 93 """ 94 Read data from a list of dicts and return a `CorrelData` instance. 95 96 Valid arguments are lists of dicts where each dict share a non-empty set of keys, 97 i.e. there must be one of more keys that all dicts have in common. 98 99 > [!NOTE] 100 > Primarily intended for data where uncertainties are already specified as 101 > [UFloat](https://pythonhosted.org/uncertainties/tech_guide.html) values. 102 > In other words, this function offers no built-in way to specify uncertainties 103 > (no keywords such as `SE`, `correl`, or `covar`). 104 105 > [!TIP] 106 > **Example** 107 > 108 > ```py 109 > import correldata 110 > 111 > foo = correldata.CorrelData( 112 > X = [1., 2., 3.], 113 > SE_X = [1., 1., 1.], 114 > Y = [4., 5., 6.], 115 > SE_Y = [1., 1., 1.], 116 > ) 117 > 118 > U = foo['X'] + foo['Y'] 119 > V = foo['X'] - foo['Y'] 120 > 121 > bar = correldata.read_list([ 122 > dict(Name = 'abc', U = U[0], V = V[0]), 123 > dict(Name = 'def', U = U[1], V = V[1]), 124 > dict(Name = 'ghi', U = U[2], V = V[2]), 125 > ]) 126 > 127 > print(bar.str()) 128 > ``` 129 > yields: 130 > ```text 131 > Name, U, SE_U, V, SE_V 132 > abc, 5, 1.41421, -3, 1.41421 133 > def, 7, 1.41421, -3, 1.41421 134 > ghi, 9, 1.41421, -3, 1.41421 135 > ``` 136 """ 137 if len(data) == 0: 138 raise _wrn.warn("Input list is empty; returning None.") 139 return None 140 141 shared_keys = [k for k in data[0]] 142 for row in data[1:]: 143 shared_keys = [k for k in shared_keys if k in row] 144 145 if len(shared_keys) == 0: 146 raise _wrn.warn("No common subset of keys; returning None.") 147 return None 148 149 data_dict = {} 150 for k in shared_keys: 151 data_dict[k] = [row[k] for row in data] 152 153 return CorrelData(data_dict)
Read data from a list of dicts and return a CorrelData instance.
Valid arguments are lists of dicts where each dict share a non-empty set of keys, i.e. there must be one of more keys that all dicts have in common.
Primarily intended for data where uncertainties are already specified as
UFloat values.
In other words, this function offers no built-in way to specify uncertainties
(no keywords such as SE, correl, or covar).
Example
import correldata
foo = correldata.CorrelData(
X = [1., 2., 3.],
SE_X = [1., 1., 1.],
Y = [4., 5., 6.],
SE_Y = [1., 1., 1.],
)
U = foo['X'] + foo['Y']
V = foo['X'] - foo['Y']
bar = correldata.read_list([
dict(Name = 'abc', U = U[0], V = V[0]),
dict(Name = 'def', U = U[1], V = V[1]),
dict(Name = 'ghi', U = U[2], V = V[2]),
])
print(bar.str())
yields:
Name, U, SE_U, V, SE_V
abc, 5, 1.41421, -3, 1.41421
def, 7, 1.41421, -3, 1.41421
ghi, 9, 1.41421, -3, 1.41421
156def read_str( 157 data: str, 158 sep: str = ',', 159 validate_covar: bool = True, 160): 161 """ 162 Read data from a CSV-like string and return a `CorrelData` instance. 163 164 Column names are interpreted in the following way: 165 * In most cases, each columns is converted to a dict value, with the corresponding 166 dict key being the column's label. 167 * Columns whose label starts with `SE` are interpreted as specifying the standard 168 error for the latest preceding data column. 169 * Columns whose label starts with `correl` are interpreted as specifying the 170 correlation matrix for the latest preceding data column. In that case, column labels 171 are ignored for the rest of the columns belonging to this matrix. 172 * Columns whose label starts with `covar` are interpreted as specifying the 173 covariance matrix for the latest preceding data column. In that case, column labels 174 are ignored for the rest of the columns belonging to this matrix. 175 * `SE`, `correl`, and `covar` may be specified for any arbitrary variable other than 176 the latest preceding data column, by adding an underscore followed by the variable's 177 label (ex: `SE_foo`, `correl_bar`, `covar_baz`). 178 * `correl`, and `covar` may also be specified for any pair of variable, by adding an 179 underscore followed by the two variable labels, joined by a second underscore 180 (ex: `correl_foo_bar`, `covar_X_Y`). The elements of the first and second variables 181 correspond, respectively, to the lines and columns of this matrix. 182 * Exceptions will be raised, for any given variable: 183 - when specifying both `covar` and any combination of (`SE`, `correl`) 184 - when specifying `correl` without `SE` 185 186 **Arguments** 187 - `data`: a CSV-like string 188 - `sep`: the CSV separator 189 - `validate_covar`: whether to check that the overall covariance matrix 190 is symmetric and positive semidefinite. Specifying `validate_covar = False` 191 bypasses this computationally expensive step. 192 193 **Example** 194 ```py 195 import correldata 196 197 data = ''' 198 Sample, Tacid, D47, SE, correl,,, D48, covar,,, correl_D47_D48 199 FOO, 90., .245, .005, 1, 0.5, 0.5, .145, 4e-4, 1e-4, 1e-4, 0.5, 0, 0 200 BAR, 90., .246, .005, 0.5, 1, 0.5, .146, 1e-4, 4e-4, 1e-4, 0, 0.5, 0 201 BAZ, 90., .247, .005, 0.5, 0.5, 1, .147, 1e-4, 1e-4, 4e-4, 0, 0, 0.5 202 '''[1:-1] 203 204 print(correldata.read_str(data)) 205 ``` 206 yields: 207 ``` 208 { 209 'Sample': array(['FOO', 'BAR', 'BAZ'], dtype='<U3'), 210 'Tacid': array([90., 90., 90.]), 211 'D47': uarray([0.245+/-0.005, 0.246+/-0.005, 0.247+/-0.005], dtype=object), 212 'D48': uarray([0.145+/-0.02, 0.146+/-0.02, 0.147+/-0.02], dtype=object) 213 } 214 ``` 215 """ 216 217 data = [[smart_type(e.strip()) for e in l.split(sep)] for l in data.split('\n')] 218 N = len(data) - 1 219 220 values, se, correl, covar = {}, {}, {}, {} 221 j = 0 222 while j < len(data[0]): 223 field = data[0][j] 224 if not ( 225 field.startswith('SE_') 226 or field.startswith('correl_') 227 or field.startswith('covar_') 228 or field == 'SE' 229 or field == 'correl' 230 or field == 'covar' 231 or len(field) == 0 232 ): 233 values[field] = _np.array([l[j] for l in data[1:]]) 234 j += 1 235 oldfield = field 236 elif field.startswith('SE_'): 237 se[field[3:]] = _np.array([l[j] for l in data[1:]]) 238 j += 1 239 elif field == 'SE': 240 se[oldfield] = _np.array([l[j] for l in data[1:]]) 241 j += 1 242 elif field.startswith('correl_'): 243 correl[field[7:]] = _np.array([l[j:j+N] for l in data[1:]]) 244 j += N 245 elif field == 'correl': 246 correl[oldfield] = _np.array([l[j:j+N] for l in data[1:]]) 247 j += N 248 elif field.startswith('covar_'): 249 covar[field[6:]] = _np.array([l[j:j+N] for l in data[1:]]) 250 j += N 251 elif field == 'covar': 252 covar[oldfield] = _np.array([l[j:j+N] for l in data[1:]]) 253 j += N 254 255 nakedvalues = {} 256 for k in [_ for _ in values]: 257 if ( 258 k not in se 259 and k not in correl 260 and k not in covar 261 ): 262 nakedvalues[k] = values.pop(k) 263 264 for x in values: 265 if x in covar: 266 if x in se: 267 raise KeyError(f'Too much information: both SE and covar are specified for variable "{x}".') 268 if x in correl: 269 raise KeyError(f'Too much information: both correl and covar are specified for variable "{x}".') 270 if x in correl: 271 if x not in se: 272 raise KeyError(f'Not enough information: correl is specified without SE for variable "{x}".') 273 274 for x in correl: 275 if x in values: 276 covar[x] = _np.diag(se[x]) @ correl[x] @ _np.diag(se[x]) 277 else: 278 for x1 in values: 279 for x2 in values: 280 if x == f'{x1}_{x2}': 281 if x1 in se: 282 se1 = se[x1] 283 else: 284 if x1 in covar: 285 se1 = _np.diag(covar[x1])**0.5 286 else: 287 raise KeyError(f'Not enough information: correl_{x} is specified without SE for variable "{x1}".') 288 if x2 in se: 289 se2 = se[x2] 290 else: 291 if x2 in covar: 292 se2 = _np.diag(covar[x2])**0.5 293 else: 294 raise KeyError(f'Not enough information: correl_{x} is specified without SE for variable "{x1}".') 295 296 covar[x] = _np.diag(se1) @ correl[x] @ _np.diag(se2) 297 298 for x in se: 299 if x in values and x not in correl: 300 covar[x] = _np.diag(se[x]**2) 301 302 for k in [_ for _ in covar]: 303 if k not in values: 304 for j1 in values: 305 for j2 in values: 306 if k == f'{j1}_{j2}': 307 covar[f'{j2}_{j1}'] = covar[f'{j1}_{j2}'].T 308 309 X = _np.array([_ for k in values for _ in values[k]]) 310 CM = _np.zeros((X.size, X.size)) 311 for i, vi in enumerate(values): 312 for j, vj in enumerate(values): 313 if vi == vj: 314 if vi in covar: 315 CM[N*i:N*i+N,N*j:N*j+N] = covar[vi] 316 else: 317 if f'{vi}_{vj}' in covar: 318 CM[N*i:N*i+N,N*j:N*j+N] = covar[f'{vi}_{vj}'] 319 320 s = _np.diag(CM)**.5 321 s[s==0] = 1. 322 invs = _np.diag(s**-1) 323 324 if ( 325 validate_covar 326 and not ( 327 is_symmetric_positive_semidefinite(CM) 328 or is_symmetric_positive_semidefinite(invs @ CM @ invs) 329 ) 330 ): 331 raise _np.linalg.LinAlgError('The complete covariance matrix is not symmetric positive-semidefinite.') 332 333 corvalues = uarray(_uc.correlated_values(X, CM)) 334 335 allvalues = nakedvalues 336 337 for i, x in enumerate(values): 338 allvalues[x] = corvalues[i*N:i*N+N] 339 340 return CorrelData(allvalues)
Read data from a CSV-like string and return a CorrelData instance.
Column names are interpreted in the following way:
- In most cases, each columns is converted to a dict value, with the corresponding dict key being the column's label.
- Columns whose label starts with
SEare interpreted as specifying the standard error for the latest preceding data column. - Columns whose label starts with
correlare interpreted as specifying the correlation matrix for the latest preceding data column. In that case, column labels are ignored for the rest of the columns belonging to this matrix. - Columns whose label starts with
covarare interpreted as specifying the covariance matrix for the latest preceding data column. In that case, column labels are ignored for the rest of the columns belonging to this matrix. SE,correl, andcovarmay be specified for any arbitrary variable other than the latest preceding data column, by adding an underscore followed by the variable's label (ex:SE_foo,correl_bar,covar_baz).correl, andcovarmay also be specified for any pair of variable, by adding an underscore followed by the two variable labels, joined by a second underscore (ex:correl_foo_bar,covar_X_Y). The elements of the first and second variables correspond, respectively, to the lines and columns of this matrix.- Exceptions will be raised, for any given variable:
- when specifying both
covarand any combination of (SE,correl) - when specifying
correlwithoutSE
- when specifying both
Arguments
data: a CSV-like stringsep: the CSV separatorvalidate_covar: whether to check that the overall covariance matrix is symmetric and positive semidefinite. Specifyingvalidate_covar = Falsebypasses this computationally expensive step.
Example
import correldata
data = '''
Sample, Tacid, D47, SE, correl,,, D48, covar,,, correl_D47_D48
FOO, 90., .245, .005, 1, 0.5, 0.5, .145, 4e-4, 1e-4, 1e-4, 0.5, 0, 0
BAR, 90., .246, .005, 0.5, 1, 0.5, .146, 1e-4, 4e-4, 1e-4, 0, 0.5, 0
BAZ, 90., .247, .005, 0.5, 0.5, 1, .147, 1e-4, 1e-4, 4e-4, 0, 0, 0.5
'''[1:-1]
print(correldata.read_str(data))
yields:
{
'Sample': array(['FOO', 'BAR', 'BAZ'], dtype='<U3'),
'Tacid': array([90., 90., 90.]),
'D47': uarray([0.245+/-0.005, 0.246+/-0.005, 0.247+/-0.005], dtype=object),
'D48': uarray([0.145+/-0.02, 0.146+/-0.02, 0.147+/-0.02], dtype=object)
}
343def read_csv( 344 filename: str | _os.PathLike, 345 **kwargs, 346): 347 """ 348 Read correlated data from a CSV file. 349 350 **Arguments** 351 - `filename`: `str` or path to the file to read from 352 - `kwargs`: passed to correldata.read_str() 353 """ 354 with open(filename) as fid: 355 return read_str(fid.read(), **kwargs)
Read correlated data from a CSV file.
Arguments
filename:stror path to the file to read fromkwargs: passed to correldata.read_str()
358class uarray(_np.ndarray): 359 """ 360 1-D [ndarray](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html) 361 of [UFloat](https://pythonhosted.org/uncertainties/tech_guide.html) values 362 """ 363 364 def __new__(cls, a): 365 obj = _np.asarray(a).view(cls) 366 return obj 367 368 @property 369 def nv(self): 370 "Return the array of nominal values (read-only)." 371 return _uc.unumpy.nominal_values(_np.array(self)) 372 373 @property 374 def se(self): 375 "Return the array of standard errors (read-only)" 376 return _uc.unumpy.std_devs(_np.array(self)) 377 378 @property 379 def correl(self): 380 "Return the correlation matrix of the array elements (read-only)" 381 return _np.array(_uc.correlation_matrix(self)) 382 383 @property 384 def covar(self): 385 """Return the covariance matrix of the array elements (read-only)""" 386 return _np.array(_uc.covariance_matrix(self)) 387 388 @property 389 def mahalanobis(self): 390 "Return the squared Mahalanobis distance from zero of the array (read-only)" 391 flatself = self.n.flatten().reshape((1, self.size)) 392 return (flatself @ _np.linalg.inv(self.covar) @ flatself.T)[0,0] 393 394 n = nv 395 "Alias for `uarray.nv`" 396 397 s = se 398 "Alias for `uarray.se`" 399 400 cor = correl 401 "Alias for `uarray.correl`" 402 403 cov = covar 404 "Alias for `uarray.covar`" 405 406 m = mahalanobis 407 "Alias for `uarray.mahalanobis`"
368 @property 369 def nv(self): 370 "Return the array of nominal values (read-only)." 371 return _uc.unumpy.nominal_values(_np.array(self))
Return the array of nominal values (read-only).
373 @property 374 def se(self): 375 "Return the array of standard errors (read-only)" 376 return _uc.unumpy.std_devs(_np.array(self))
Return the array of standard errors (read-only)
378 @property 379 def correl(self): 380 "Return the correlation matrix of the array elements (read-only)" 381 return _np.array(_uc.correlation_matrix(self))
Return the correlation matrix of the array elements (read-only)
383 @property 384 def covar(self): 385 """Return the covariance matrix of the array elements (read-only)""" 386 return _np.array(_uc.covariance_matrix(self))
Return the covariance matrix of the array elements (read-only)
388 @property 389 def mahalanobis(self): 390 "Return the squared Mahalanobis distance from zero of the array (read-only)" 391 flatself = self.n.flatten().reshape((1, self.size)) 392 return (flatself @ _np.linalg.inv(self.covar) @ flatself.T)[0,0]
Return the squared Mahalanobis distance from zero of the array (read-only)
368 @property 369 def nv(self): 370 "Return the array of nominal values (read-only)." 371 return _uc.unumpy.nominal_values(_np.array(self))
Alias for uarray.nv
373 @property 374 def se(self): 375 "Return the array of standard errors (read-only)" 376 return _uc.unumpy.std_devs(_np.array(self))
Alias for uarray.se
378 @property 379 def correl(self): 380 "Return the correlation matrix of the array elements (read-only)" 381 return _np.array(_uc.correlation_matrix(self))
Alias for uarray.correl
383 @property 384 def covar(self): 385 """Return the covariance matrix of the array elements (read-only)""" 386 return _np.array(_uc.covariance_matrix(self))
Alias for uarray.covar
410class CorrelData(dict): 411 """ 412 Dataframe-like tables of data with correlated uncertainties 413 """ 414 415 def __init__(self, *args, **kwargs): 416 """ 417 **Arguments:** same as for a `dict()` 418 """ 419 super().__init__(*args, **kwargs) 420 for k in self: 421 # cast as array 422 self[k] = _np.asarray(self[k]) 423 # cast as uarray if ufloats are present 424 if any([ 425 isinstance(_, _uc.UFloat) 426 for _ in self[k] 427 ]): 428 self[k] = uarray(self[k]) 429 430 # check that lengths are consistent 431 firstk = next(iter(self)) 432 n = len(self[firstk]) 433 for k in self: 434 assert self[k].shape in [(n,), (n, n)], f'{k}.shape is {self[k].shape} and not ({n}, {n}) as expected' 435 436 # sort keys for uncertainty assignment 437 keys, skeys, corkeys, covkeys = [], [], [], [] 438 for k in self: 439 if k.startswith('SE_'): 440 skeys.append(k[3:]) 441 elif k.startswith('correl_'): 442 corkeys.append(k[7:]) 443 elif k.startswith('covar_'): 444 covkeys.append(k[6:]) 445 else: 446 keys.append(k) 447 448 for k in covkeys: 449 # check for missing nominal values 450 if k not in keys: 451 raise MissingNominalValue(f'covar_{k} is missing a corresponding nominal value {k}') 452 # check for redundant specification of uncertainty 453 if k in corkeys: 454 raise RedundantUncertainty(f'Both covar_{k} and correl_{k} are specified') 455 if k in skeys: 456 raise RedundantUncertainty(f'Both covar_{k} and SE_{k} are specified') 457 458 for k in corkeys: 459 # check for correl without SE 460 if k not in skeys: 461 raise MissingStandardError(f'correl_{k} is missing a corresponding standard error SE_{k}') 462 463 for k in skeys: 464 # check for missing nominal values 465 if k not in keys: 466 raise MissingNominalValue(f'SE_{k} is missing a corresponding nominal value {k}') 467 468 for k in covkeys: 469 self[k] = uarray(_uc.correlated_values(self[k], self[f'covar_{k}'])) 470 self.pop(f'covar_{k}') 471 472 for k in skeys: 473 se = _np.array(self[f'SE_{k}']) 474 if k in corkeys: 475 correl = _np.array(self[f'correl_{k}']) 476 self.pop(f'correl_{k}') 477 else: 478 correl = _np.eye(len(self[k])) 479 covar = se[None,:] * correl * se[:, None] 480 self[k] = uarray(_uc.correlated_values(self[k], covar)) 481 self.pop(f'SE_{k}') 482 483 @property 484 def size(self): 485 "Returns the number of data rows" 486 k = next(iter(self)) 487 return len(self[k]) 488 489 @property 490 def rows(self): 491 """ 492 Iterator over rows of data 493 494 **Usage:** 495 496 ```py 497 import correldata, numpy 498 499 data = correldata.CorrelData( 500 X = numpy.array([1, 2, 3]), 501 Y = numpy.array([4, 5, 6]), 502 ) 503 504 for r in data.rows: 505 print(r) 506 ``` 507 yields: 508 ``` 509 >>> {'X': np.int64(1), 'Y': np.int64(4)} 510 >>> {'X': np.int64(2), 'Y': np.int64(5)} 511 >>> {'X': np.int64(3), 'Y': np.int64(6)} 512 ``` 513 """ 514 return self._row_iterator() 515 516 def _row_iterator(self): 517 n = next(iter(self.values())).shape[0] 518 for i in range(n): 519 yield {k: v[i] for k, v in self.items()} 520 521 def str( 522 self, 523 sep: str = ',', 524 include_fields: list = None, 525 exclude_fields: list = [], 526 float_format: (str | dict | Callable) = 'z.6g', 527 correl_format: (str | dict | Callable) = 'z.6f', 528 default_float_format: (str | Callable) = 'z.6g', 529 default_correl_format: (str | Callable) = 'z.6f', 530 show_nv: bool = True, 531 show_se: bool = True, 532 show_correl: bool = True, 533 show_mixed_correl: bool = True, 534 align: str = '>', 535 atol: float = 1e-12, 536 rtol: float = 1e-12, 537 ): 538 ''' 539 Return CSV-like string 540 541 **Arguments** 542 - `sep`: the CSV separator 543 - `include_fields`: subset of fields to write; if `None`, write all fields 544 - `exclude_fields`: subset of fields to ignore (takes precedence over `include_fields`); 545 to exclude only the SE for field `foo`, include `SE_foo`; same goes for `correl_foo` 546 - `float_format`: formatting for float values. May be a string (ex: `'z.3f'`), a callable 547 (ex: `lambda x: '.2f' if x else '0'`), or a dictionary of strings and/or callables, with dict keys 548 corresponding to different fields (ex: `{'foo': '.2e', 'bar': (lambda x: str(x))}`). 549 - `correl_format`: same as `float_format`, but applies to correlation matrix elements 550 - `default_float_format`: only used when `float_format` is a dict; in that case, fields 551 missing from `float_format.keys()` will use `default_float_format` instead. 552 corresponding to different fields (ex: `{'foo': '.2e', 'bar': `lambda x: str(x)`}`). 553 - `default_correl_format`: same as `default_float_format`, but applies to `correl_format` 554 - `show_nv`: show nominal values 555 - `show_se`: show standard errors 556 - `show_correl`: show correlations for any given field (ex: `correl_X`) 557 - `show_mixed_correl`: show correlations between different fields (ex: `correl_X_Y`) 558 - `align`: right-align (`>`), left-align (`<`), or don't align (empty string) CSV values 559 - `atol`: passed to [numpy.allclose()](https://numpy.org/doc/stable/reference/generated/numpy.allclose.html) 560 when deciding whether a matrix is equal to the identity matrix or to the zero matrix 561 - `rtol`: passed to [numpy.allclose()](https://numpy.org/doc/stable/reference/generated/numpy.allclose.html) 562 when deciding whether a matrix is equal to the identity matrix or to the zero matrix 563 564 565 **Example** 566 567 ```py 568 from correldata import uarray, CorrelData 569 570 X = uarray(_uc.correlated_values([1., 2., 3.], _np.eye(3)*0.09)) 571 Y = uarray(_uc.correlated_values([4., 5., 6.], _np.eye(3)*0.16)) 572 573 data = CorrelData( 574 X = X, 575 Y = Y, 576 Z = X+Y, 577 ) 578 579 print( 580 data.str( 581 float_format = 'z.1f', 582 correl_format = 'z.1f', 583 ) 584 ) 585 ``` 586 yields: 587 ``` 588 X, SE_X, Y, SE_Y, Z, SE_Z, correl_X_Z, , , correl_Y_Z, , 589 1.0, 0.3, 4.0, 0.4, 5.0, 0.5, 0.6, 0.0, 0.0, 0.8, 0.0, 0.0 590 2.0, 0.3, 5.0, 0.4, 7.0, 0.5, 0.0, 0.6, 0.0, 0.0, 0.8, 0.0 591 3.0, 0.3, 6.0, 0.4, 9.0, 0.5, 0.0, 0.0, 0.6, 0.0, 0.0, 0.8 592 ``` 593 ''' 594 if include_fields is None: 595 include_fields = [_ for _ in self] 596 cols, ufields = [], [] 597 for f in include_fields: 598 if f in exclude_fields: 599 continue 600 if isinstance(self[f], uarray): 601 ufields.append(f) 602 N = self[f].size 603 if show_nv: 604 cols.append([f] + [f2s(_, float_format, f, default_float_format) for _ in self[f].n]) 605 if show_se and (f'SE_{f}' not in exclude_fields): 606 cols.append([f'SE_{f}'] + [f2s(_, float_format, f, default_float_format) for _ in self[f].s]) 607 if show_correl and (f'correl_{f}' not in exclude_fields): 608 CM = _uc.correlation_matrix(self[f]) 609 if not _np.allclose(CM, _np.eye(N), atol = atol, rtol = rtol): 610 for i in range(N): 611 cols.append( 612 ['' if i else f'correl_{f}'] 613 + [ 614 f2s( 615 CM[i,j], 616 correl_format, 617 f, 618 default_correl_format, 619 ) 620 for j in range(N) 621 ] 622 ) 623 elif show_nv: 624 cols.append([f] + [f2s(_, float_format, f, default_float_format) for _ in self[f]]) 625 626 if show_mixed_correl: 627 for i in range(len(ufields)): 628 for j in range(i): 629 if f'correl_{ufields[i]}_{ufields[j]}' in exclude_fields or f'correl_{ufields[j]}_{ufields[i]}' in exclude_fields: 630 continue 631 CM = _uc.correlation_matrix((*self[ufields[i]], *self[ufields[j]]))[:N, -N:] 632 if not _np.allclose(CM, _np.zeros((N, N)), atol = atol, rtol = rtol): 633 for k in range(N): 634 cols.append( 635 ['' if k else f'correl_{ufields[j]}_{ufields[i]}'] 636 + [ 637 f2s( 638 CM[k,l], 639 correl_format, 640 f, 641 default_correl_format, 642 ) 643 for l in range(N) 644 ] 645 ) 646 647 lines = list(map(list, zip(*cols))) 648 649 if align: 650 lengths = [max([len(e) for e in l]) for l in cols] 651 for l in lines: 652 for k,ln in enumerate(lengths): 653 l[k] = f'{l[k]:{align}{ln}s}' 654 return '\n'.join([(sep+' ').join(l) for l in lines]) 655 656 return '\n'.join([sep.join(l) for l in lines]) 657 658 def to_csv(self, filename, **kwargs): 659 ''' 660 Write data to a CSV file. 661 662 **Arguments** 663 - `filename`: `str` or path to the CSV file 664 - `kwargs`: passed to `CorrelData.str()` 665 ''' 666 with open(filename, 'w') as fid: 667 return fid.write(self.str(**kwargs))
Dataframe-like tables of data with correlated uncertainties
415 def __init__(self, *args, **kwargs): 416 """ 417 **Arguments:** same as for a `dict()` 418 """ 419 super().__init__(*args, **kwargs) 420 for k in self: 421 # cast as array 422 self[k] = _np.asarray(self[k]) 423 # cast as uarray if ufloats are present 424 if any([ 425 isinstance(_, _uc.UFloat) 426 for _ in self[k] 427 ]): 428 self[k] = uarray(self[k]) 429 430 # check that lengths are consistent 431 firstk = next(iter(self)) 432 n = len(self[firstk]) 433 for k in self: 434 assert self[k].shape in [(n,), (n, n)], f'{k}.shape is {self[k].shape} and not ({n}, {n}) as expected' 435 436 # sort keys for uncertainty assignment 437 keys, skeys, corkeys, covkeys = [], [], [], [] 438 for k in self: 439 if k.startswith('SE_'): 440 skeys.append(k[3:]) 441 elif k.startswith('correl_'): 442 corkeys.append(k[7:]) 443 elif k.startswith('covar_'): 444 covkeys.append(k[6:]) 445 else: 446 keys.append(k) 447 448 for k in covkeys: 449 # check for missing nominal values 450 if k not in keys: 451 raise MissingNominalValue(f'covar_{k} is missing a corresponding nominal value {k}') 452 # check for redundant specification of uncertainty 453 if k in corkeys: 454 raise RedundantUncertainty(f'Both covar_{k} and correl_{k} are specified') 455 if k in skeys: 456 raise RedundantUncertainty(f'Both covar_{k} and SE_{k} are specified') 457 458 for k in corkeys: 459 # check for correl without SE 460 if k not in skeys: 461 raise MissingStandardError(f'correl_{k} is missing a corresponding standard error SE_{k}') 462 463 for k in skeys: 464 # check for missing nominal values 465 if k not in keys: 466 raise MissingNominalValue(f'SE_{k} is missing a corresponding nominal value {k}') 467 468 for k in covkeys: 469 self[k] = uarray(_uc.correlated_values(self[k], self[f'covar_{k}'])) 470 self.pop(f'covar_{k}') 471 472 for k in skeys: 473 se = _np.array(self[f'SE_{k}']) 474 if k in corkeys: 475 correl = _np.array(self[f'correl_{k}']) 476 self.pop(f'correl_{k}') 477 else: 478 correl = _np.eye(len(self[k])) 479 covar = se[None,:] * correl * se[:, None] 480 self[k] = uarray(_uc.correlated_values(self[k], covar)) 481 self.pop(f'SE_{k}')
Arguments: same as for a dict()
483 @property 484 def size(self): 485 "Returns the number of data rows" 486 k = next(iter(self)) 487 return len(self[k])
Returns the number of data rows
489 @property 490 def rows(self): 491 """ 492 Iterator over rows of data 493 494 **Usage:** 495 496 ```py 497 import correldata, numpy 498 499 data = correldata.CorrelData( 500 X = numpy.array([1, 2, 3]), 501 Y = numpy.array([4, 5, 6]), 502 ) 503 504 for r in data.rows: 505 print(r) 506 ``` 507 yields: 508 ``` 509 >>> {'X': np.int64(1), 'Y': np.int64(4)} 510 >>> {'X': np.int64(2), 'Y': np.int64(5)} 511 >>> {'X': np.int64(3), 'Y': np.int64(6)} 512 ``` 513 """ 514 return self._row_iterator()
Iterator over rows of data
Usage:
import correldata, numpy
data = correldata.CorrelData(
X = numpy.array([1, 2, 3]),
Y = numpy.array([4, 5, 6]),
)
for r in data.rows:
print(r)
yields:
>>> {'X': np.int64(1), 'Y': np.int64(4)}
>>> {'X': np.int64(2), 'Y': np.int64(5)}
>>> {'X': np.int64(3), 'Y': np.int64(6)}
521 def str( 522 self, 523 sep: str = ',', 524 include_fields: list = None, 525 exclude_fields: list = [], 526 float_format: (str | dict | Callable) = 'z.6g', 527 correl_format: (str | dict | Callable) = 'z.6f', 528 default_float_format: (str | Callable) = 'z.6g', 529 default_correl_format: (str | Callable) = 'z.6f', 530 show_nv: bool = True, 531 show_se: bool = True, 532 show_correl: bool = True, 533 show_mixed_correl: bool = True, 534 align: str = '>', 535 atol: float = 1e-12, 536 rtol: float = 1e-12, 537 ): 538 ''' 539 Return CSV-like string 540 541 **Arguments** 542 - `sep`: the CSV separator 543 - `include_fields`: subset of fields to write; if `None`, write all fields 544 - `exclude_fields`: subset of fields to ignore (takes precedence over `include_fields`); 545 to exclude only the SE for field `foo`, include `SE_foo`; same goes for `correl_foo` 546 - `float_format`: formatting for float values. May be a string (ex: `'z.3f'`), a callable 547 (ex: `lambda x: '.2f' if x else '0'`), or a dictionary of strings and/or callables, with dict keys 548 corresponding to different fields (ex: `{'foo': '.2e', 'bar': (lambda x: str(x))}`). 549 - `correl_format`: same as `float_format`, but applies to correlation matrix elements 550 - `default_float_format`: only used when `float_format` is a dict; in that case, fields 551 missing from `float_format.keys()` will use `default_float_format` instead. 552 corresponding to different fields (ex: `{'foo': '.2e', 'bar': `lambda x: str(x)`}`). 553 - `default_correl_format`: same as `default_float_format`, but applies to `correl_format` 554 - `show_nv`: show nominal values 555 - `show_se`: show standard errors 556 - `show_correl`: show correlations for any given field (ex: `correl_X`) 557 - `show_mixed_correl`: show correlations between different fields (ex: `correl_X_Y`) 558 - `align`: right-align (`>`), left-align (`<`), or don't align (empty string) CSV values 559 - `atol`: passed to [numpy.allclose()](https://numpy.org/doc/stable/reference/generated/numpy.allclose.html) 560 when deciding whether a matrix is equal to the identity matrix or to the zero matrix 561 - `rtol`: passed to [numpy.allclose()](https://numpy.org/doc/stable/reference/generated/numpy.allclose.html) 562 when deciding whether a matrix is equal to the identity matrix or to the zero matrix 563 564 565 **Example** 566 567 ```py 568 from correldata import uarray, CorrelData 569 570 X = uarray(_uc.correlated_values([1., 2., 3.], _np.eye(3)*0.09)) 571 Y = uarray(_uc.correlated_values([4., 5., 6.], _np.eye(3)*0.16)) 572 573 data = CorrelData( 574 X = X, 575 Y = Y, 576 Z = X+Y, 577 ) 578 579 print( 580 data.str( 581 float_format = 'z.1f', 582 correl_format = 'z.1f', 583 ) 584 ) 585 ``` 586 yields: 587 ``` 588 X, SE_X, Y, SE_Y, Z, SE_Z, correl_X_Z, , , correl_Y_Z, , 589 1.0, 0.3, 4.0, 0.4, 5.0, 0.5, 0.6, 0.0, 0.0, 0.8, 0.0, 0.0 590 2.0, 0.3, 5.0, 0.4, 7.0, 0.5, 0.0, 0.6, 0.0, 0.0, 0.8, 0.0 591 3.0, 0.3, 6.0, 0.4, 9.0, 0.5, 0.0, 0.0, 0.6, 0.0, 0.0, 0.8 592 ``` 593 ''' 594 if include_fields is None: 595 include_fields = [_ for _ in self] 596 cols, ufields = [], [] 597 for f in include_fields: 598 if f in exclude_fields: 599 continue 600 if isinstance(self[f], uarray): 601 ufields.append(f) 602 N = self[f].size 603 if show_nv: 604 cols.append([f] + [f2s(_, float_format, f, default_float_format) for _ in self[f].n]) 605 if show_se and (f'SE_{f}' not in exclude_fields): 606 cols.append([f'SE_{f}'] + [f2s(_, float_format, f, default_float_format) for _ in self[f].s]) 607 if show_correl and (f'correl_{f}' not in exclude_fields): 608 CM = _uc.correlation_matrix(self[f]) 609 if not _np.allclose(CM, _np.eye(N), atol = atol, rtol = rtol): 610 for i in range(N): 611 cols.append( 612 ['' if i else f'correl_{f}'] 613 + [ 614 f2s( 615 CM[i,j], 616 correl_format, 617 f, 618 default_correl_format, 619 ) 620 for j in range(N) 621 ] 622 ) 623 elif show_nv: 624 cols.append([f] + [f2s(_, float_format, f, default_float_format) for _ in self[f]]) 625 626 if show_mixed_correl: 627 for i in range(len(ufields)): 628 for j in range(i): 629 if f'correl_{ufields[i]}_{ufields[j]}' in exclude_fields or f'correl_{ufields[j]}_{ufields[i]}' in exclude_fields: 630 continue 631 CM = _uc.correlation_matrix((*self[ufields[i]], *self[ufields[j]]))[:N, -N:] 632 if not _np.allclose(CM, _np.zeros((N, N)), atol = atol, rtol = rtol): 633 for k in range(N): 634 cols.append( 635 ['' if k else f'correl_{ufields[j]}_{ufields[i]}'] 636 + [ 637 f2s( 638 CM[k,l], 639 correl_format, 640 f, 641 default_correl_format, 642 ) 643 for l in range(N) 644 ] 645 ) 646 647 lines = list(map(list, zip(*cols))) 648 649 if align: 650 lengths = [max([len(e) for e in l]) for l in cols] 651 for l in lines: 652 for k,ln in enumerate(lengths): 653 l[k] = f'{l[k]:{align}{ln}s}' 654 return '\n'.join([(sep+' ').join(l) for l in lines]) 655 656 return '\n'.join([sep.join(l) for l in lines])
Return CSV-like string
Arguments
sep: the CSV separatorinclude_fields: subset of fields to write; ifNone, write all fieldsexclude_fields: subset of fields to ignore (takes precedence overinclude_fields); to exclude only the SE for fieldfoo, includeSE_foo; same goes forcorrel_foofloat_format: formatting for float values. May be a string (ex:'z.3f'), a callable (ex:lambda x: '.2f' if x else '0'), or a dictionary of strings and/or callables, with dict keys corresponding to different fields (ex:{'foo': '.2e', 'bar': (lambda x: str(x))}).correl_format: same asfloat_format, but applies to correlation matrix elementsdefault_float_format: only used whenfloat_formatis a dict; in that case, fields missing fromfloat_format.keys()will usedefault_float_formatinstead. corresponding to different fields (ex:{'foo': '.2e', 'bar':lambda x: str(x)}).default_correl_format: same asdefault_float_format, but applies tocorrel_formatshow_nv: show nominal valuesshow_se: show standard errorsshow_correl: show correlations for any given field (ex:correl_X)show_mixed_correl: show correlations between different fields (ex:correl_X_Y)align: right-align (>), left-align (<), or don't align (empty string) CSV valuesatol: passed to numpy.allclose() when deciding whether a matrix is equal to the identity matrix or to the zero matrixrtol: passed to numpy.allclose() when deciding whether a matrix is equal to the identity matrix or to the zero matrix
Example
from correldata import uarray, CorrelData
X = uarray(_uc.correlated_values([1., 2., 3.], _np.eye(3)*0.09))
Y = uarray(_uc.correlated_values([4., 5., 6.], _np.eye(3)*0.16))
data = CorrelData(
X = X,
Y = Y,
Z = X+Y,
)
print(
data.str(
float_format = 'z.1f',
correl_format = 'z.1f',
)
)
yields:
X, SE_X, Y, SE_Y, Z, SE_Z, correl_X_Z, , , correl_Y_Z, ,
1.0, 0.3, 4.0, 0.4, 5.0, 0.5, 0.6, 0.0, 0.0, 0.8, 0.0, 0.0
2.0, 0.3, 5.0, 0.4, 7.0, 0.5, 0.0, 0.6, 0.0, 0.0, 0.8, 0.0
3.0, 0.3, 6.0, 0.4, 9.0, 0.5, 0.0, 0.0, 0.6, 0.0, 0.0, 0.8
658 def to_csv(self, filename, **kwargs): 659 ''' 660 Write data to a CSV file. 661 662 **Arguments** 663 - `filename`: `str` or path to the CSV file 664 - `kwargs`: passed to `CorrelData.str()` 665 ''' 666 with open(filename, 'w') as fid: 667 return fid.write(self.str(**kwargs))
Write data to a CSV file.
Arguments
filename:stror path to the CSV filekwargs: passed toCorrelData.str()
670def as_uarray( 671 X: (uarray | _np.ndarray | _uc.UFloat | float), 672 Xse: (_np.ndarray | float | None) = None, 673 CM: (_np.ndarray | None) = None, 674) -> uarray: 675 """ 676 Convert the input to an uarray. If the input is a single float or 677 [UFloat](https://pythonhosted.org/uncertainties/tech_guide.html), 678 yields an uarray of size 1. 679 680 **Arguments** 681 * `X`: nominal value(s) 682 * `CM`: covariance matrix of X; not needed if elements of X are of type 683 [`UFloat`](https://pythonhosted.org/uncertainties/tech_guide.html) 684 or if `Xse` is specified. 685 * `Xse`,: SE of X; not needed if elements of X are of type 686 [`UFloat`](https://pythonhosted.org/uncertainties/tech_guide.html) 687 or if `CM` is specified. 688 689 If neither `CM` nor `Xse` are specified, assume SE = 0. 690 """ 691 692 if isinstance(X, uarray): 693 return X 694 695 if isinstance(X, _np.ndarray): 696 if _np.all([isinstance(_, _uc.UFloat) for _ in X]): 697 return uarray(X) 698 else: 699 X = X.astype(float) 700 701 if CM is not None: 702 if Xse is not None: raise ValueError('Too much information: Xse is redundant because CM is already specified.') 703 704 if CM is None: 705 if Xse is None: 706 Xse = X * 0 707 708 CM = _np.diag((*Xse,))**2 709 710 return uarray(_uc.correlated_values(X, CM)) 711 712 if isinstance(X, _uc.UFloat): 713 return uarray([X]) 714 715 if isinstance(X, (float, int)): 716 717 if CM is not None: 718 if Xse is not None: raise ValueError('Too much information: Xse is redundant because CM is already specified.') 719 Xse = CM[0,0]**0.5 720 721 return uarray([_uc.ufloat(X, Xse)])
Convert the input to an uarray. If the input is a single float or UFloat, yields an uarray of size 1.
Arguments
X: nominal value(s)CM: covariance matrix of X; not needed if elements of X are of typeUFloator ifXseis specified.Xse,: SE of X; not needed if elements of X are of typeUFloator ifCMis specified.
If neither CM nor Xse are specified, assume SE = 0.
724def as_pair_of_uarrays( 725 X: (uarray | _np.ndarray | _uc.UFloat | float), 726 Y: (uarray | _np.ndarray | _uc.UFloat | float), 727 Xse: (_np.ndarray | float | None) = None, 728 Yse: (_np.ndarray | float | None) = None, 729 CM: (_np.ndarray | None) = None, 730) -> uarray: 731 """ 732 Convert the input to a pair of uarrays. 733 734 **Arguments** 735 * `X`: x values 736 * `Y`: y values 737 * `CM`: covariance matrix of `(*X, *Y)`; not needed if elements of X and Y are of type 738 [`uncertainties.UFloat`](https://pythonhosted.org/uncertainties/tech_guide.html) 739 or if (`Xse`, `Yse`) are specified. 740 * `Xse`, `Yse`: SE of X and Y; not needed if elements of X and Y are of type 741 [`uncertainties.UFloat`](https://pythonhosted.org/uncertainties/tech_guide.html) 742 or if `CM` is specified. 743 744 If neither `CM`, `Xse` nor `Yse` are specified, assume SE = 0. 745 """ 746 747 if type(X) is not type(Y): 748 raise TypeError(f'X ({type(X)}) and Y ({type(Y)}) must have the same type.') 749 750 if isinstance(X, uarray): 751 return (X, Y) 752 753 if isinstance(X, _np.ndarray): 754 if ( 755 _np.all([isinstance(_, _uc.UFloat) for _ in X]) 756 and 757 _np.all([isinstance(_, _uc.UFloat) for _ in Y]) 758 ): 759 return uarray(X), uarray(Y) 760 else: 761 X = X.astype(float) 762 Y = Y.astype(float) 763 764 if CM is not None: 765 if Xse is not None: raise ValueError('Too much information: Xse is redundant because CM is already specified.') 766 if Yse is not None: raise ValueError('Too much information: Yse is redundant because CM is already specified.') 767 768 if CM is None: 769 if Xse is None: 770 Xse = X * 0 771 if Yse is None: 772 Yse = Y * 0 773 774 CMx = _np.diag((*Xse,))**2 775 CMy = _np.diag((*Yse,))**2 776 return uarray(_uc.correlated_values(X, CMx)), uarray(_uc.correlated_values(Y, CMy)) 777 778 else: 779 XY = uarray(_uc.correlated_values([*X, *Y], CM)) 780 return XY[:X.size], XY[X.size:] 781 782 if isinstance(X, _uc.UFloat): 783 return uarray([X]), uarray([Y]) 784 785 if isinstance(X, (float, int)): 786 787 if CM is not None: 788 if Xse is not None: raise ValueError('Too much information: Xse is redundant because CM is already specified.') 789 if Yse is not None: raise ValueError('Too much information: Yse is redundant because CM is already specified.') 790 791 if CM is None: 792 if Xse is None: raise ValueError('Not enough information: specify either CM or Xse.') 793 if Yse is None: raise ValueError('Not enough information: specify either CM or Yse.') 794 795 CM = _np.diag([Xse, Yse])**2 796 797 XY = uarray(_uc.correlated_values([X, Y], CM)) 798 return XY[:1], XY[1:]
Convert the input to a pair of uarrays.
Arguments
X: x valuesY: y valuesCM: covariance matrix of(*X, *Y); not needed if elements of X and Y are of typeuncertainties.UFloator if (Xse,Yse) are specified.Xse,Yse: SE of X and Y; not needed if elements of X and Y are of typeuncertainties.UFloator ifCMis specified.
If neither CM, Xse nor Yse are specified, assume SE = 0.