Package PyDSTool :: Package Toolbox :: Module fracdim
[hide private]
[frames] | no frames]

Source Code for Module PyDSTool.Toolbox.fracdim

   1  """Fractal dimension estimates for analysis of datasets having rank 2 or 3.
 
   2  
 
   3  A journal publication concerning the use of these functions appears in
 
   4  IEEE Transactions on BioMedical Engineering, 2008.
 
   5  
 
   6  (c) 2005, 2006. Robert Clewley, John Guckenheimer.
 
   7  """ 
   8  
 
   9  from __future__ import division 
  10  from PyDSTool import * 
  11  from PyDSTool.common import _seq_types 
  12  from PyDSTool.Toolbox.data_analysis import * 
  13  from numpy import extract, mean, std, ndarray 
  14  from scipy.linalg import norm 
  15  from PyDSTool.matplotlib_import import * 
  16  from matplotlib import cm 
  17  from time import localtime 
  18  from copy import copy 
  19  
 
  20  __all__ = ['corr_dim', 'corr_dim_with_progress',
 
  21             'find_cover', 'find_outliers', 'get_cover_radii',
 
  22             'do_stats', 'nhd', 'timeseq', 'get_rV_curves',
 
  23             'get_filtered_ixs', 'plot_radius_distribution', 'filter_by_radius',
 
  24             'plot_dim_by_thickness', 'plot_thickness_by_dim', 'find_nhd',
 
  25             'prep_secant_figure', 'PD_E', 'slope_range', 'scatterplot_slopes',
 
  26             'scatter_histo', 'rescatter', 'sorted_by_slope', 'sorted_by_radius'] 
  27  
 
  28  # -----------------------------------------------------------------------------
 
  29  
 
  30  
 
31 -def corr_dim(data, which_norm=2):
32 """Correlation dimension of a data set. 33 """ 34 npts = size(data,0) 35 d = zeros((npts*(npts-1)/2,), 'd') # size of upper triangle (no diagonal) of m vs. m matrix 36 # max ix for each i in expression for d is (i-1)*(i-2)/2 + (i-2) 37 for i in xrange(1,npts): 38 for j in xrange(i-1): 39 d[(i-1)*(i-2)/2+j] = norm(data[i]-data[j], which_norm) 40 d.sort() 41 logd = log(d).ravel() 42 logv = log(range(1,len(d)+1)) 43 nan_ixs = isfinite(logd) 44 logd = extract(nan_ixs, logd) 45 logv = extract(nan_ixs, logv) 46 return (logv, logd)
47 48
49 -def corr_dim_with_progress(data, inforate=10000, which_norm=2):
50 """Version of correlation dimension with text file progress dump, 51 at a sample rate of 'inforate', because the calculation typically 52 takes a long time. 53 """ 54 npts = size(data,0) 55 dsize = npts*(npts-1)/2 56 d = zeros((dsize,), 'd') # size of upper triangle (no diagonal) of m vs. m matrix 57 # max ix for each i in expression for d is (i-1)*(i-2)/2 + (i-2) 58 for i in xrange(1,npts): 59 for j in xrange(i-1): 60 ix = (i-1)*(i-2)/2+j 61 if mod(ix,inforate) == 0: 62 dump_progress(ix, dsize) 63 d[ix] = norm(data[i]-data[j]) 64 d.sort() 65 logd = log(d).ravel() 66 logv = log(range(1,len(d)+1)) 67 nan_ixs = isfinite(logd) 68 logd = extract(nan_ixs, logd) 69 logv = extract(nan_ixs, logv) 70 return (logv, logd)
71 72
73 -def timeseq(time, covering, refpt):
74 """Extract time sequence from a covering""" 75 ts = time.take(covering[refpt][3], 0) 76 print "time sequence min = %f, max = %f"%(min(ts),max(ts)) 77 return ts
78 79 80 81 #### Version 2 of regression residual method 82 #### This one starts at sqrt(N)
83 -def find_cover(data, remain_tol=5, 84 step=None, initref=None, wlo=1, whi=1, 85 check_large_nhds=True, large_nhd=300, 86 min_nhd_size=50, min_ix=25, 87 max_res=1.0, max_std=0.2, quiet=True):
88 """ 89 OUTPUTS: 90 91 covering is a dict: 92 reference point -> (dim, low radius, high radius, covered_indices) 93 covered is a dict: 94 point -> reference points whose neighbourhoods include it 95 """ 96 covering = {} 97 covered = {}.fromkeys(range(len(data)),None) 98 not_covered = covered.keys() 99 if remain_tol > len(data): 100 remain_tol = 0 101 makerefpt_ixs = [] 102 if initref is not None: 103 refptix = initref 104 else: 105 refptix = 0 # initial value 106 N = size(data,0) 107 start_ix = int(round(sqrt(N))) 108 print "N = ", N, "start ix = ", start_ix 109 logv_raw = log(range(1,N+1)) 110 done = False 111 unsuccessful = [] 112 if step is None: 113 ixstep = max([int(around(len(data)/2000.)),1]) 114 elif step > 0: 115 if step < 1: 116 # treat as %age of # data points 117 ixstep = int(math.ceil(len(d)*step)) 118 if not quiet: 119 print "Using index step %i out of %i points"%(ixstep, len(d)) 120 else: 121 ixstep = step 122 else: 123 raise ValueError("Invalid step argument") 124 while not done: 125 if not quiet: 126 print "\n*************\nCalculating distances and point-wise dimension around pt %i ..."%refptix 127 logv, logd, d, d_inv_dict = log_distances_with_D(data, refptix, logv_raw) 128 d_inv_keys = sortedDictKeys(d_inv_dict) 129 d_inv_vals = sortedDictValues(d_inv_dict) 130 d_inv = Pointset(indepvararray=d_inv_keys, indepvartype=Float, 131 coordarray=d_inv_vals, coordtype=Int, tolerance=5e-5) 132 if not quiet: 133 print "Finding self-consistent neighbourhood", 134 # search from min_ix -> end, increasing hiix from loix+1 until dimensionality 135 # (slope) stabilizes to within an absolute tolerance and does not grow larger 136 # than a different tolerance 137 err=-1 138 len_logd = len(logd) 139 old_dim = None 140 hiix = start_ix 141 if not quiet: 142 print "Start ix = ", start_ix 143 loix=start_ix-2 144 nhd_too_small = False 145 ref_dim = -1 146 all_res = [] 147 in_zone = False 148 # Grow West! 149 while not in_zone and loix > min_ix+ixstep: 150 loix -= ixstep 151 pfit=polyfit(logd[loix:hiix],logv[loix:hiix],1) 152 dim = pfit[0] 153 residual = get_linear_regression_residual(pfit, logd[loix:hiix],logv[loix:hiix], 154 weight='') 155 all_res.append(residual) 156 if not quiet and mod(hiix,50) == 0: 157 print ".", 158 if old_dim is not None: 159 sd = std(all_res) 160 ## if residual > max_res: 161 ## in_zone = True 162 if sd > max_std: 163 in_zone = True 164 old_dim = dim 165 # Grow East! 166 all_res = [] #[all_res[-1]] 167 not_done = True 168 while not_done and hiix < len_logd-1-ixstep: 169 hiix += ixstep 170 # do a straight-line best fit 171 pfit=polyfit(logd[loix:hiix],logv[loix:hiix],1) 172 dim = pfit[0] 173 residual = get_linear_regression_residual(pfit, logd[loix:hiix],logv[loix:hiix], 174 weight='lo', w=wlo) 175 all_res.append(residual) 176 if not quiet: 177 print "(%i,%i)"%(loix,hiix), 178 print "dim=%.4f, residual = %.4f"%(dim,residual) 179 if not quiet and mod(hiix,50) == 0: 180 print ".", 181 try: 182 sd = std(all_res) 183 except: 184 sd = 0 185 ## if residual > max_res: 186 ## if not quiet: 187 ## print "residual > max_res" 188 ## not_done = False 189 if sd > max_std: 190 if not quiet: 191 print "residuals s.d. > max_std" 192 not_done = False 193 old_dim = dim 194 nhd_too_small = hiix-loix < min_nhd_size 195 if not quiet: 196 print "nhd_too_small = ", nhd_too_small 197 if nhd_too_small: 198 if not quiet: 199 print "Neighbourhood too small. Moving to a different reference point..." 200 unsuccessful.append(refptix) 201 else: 202 if not quiet: 203 print "\nDimension = %f"%dim 204 print "Found best fit line from relative ix %i to %i (radius %f)"%(loix, hiix, d[hiix]) 205 # consolidate results in terms of global index positions in data 206 covered_ixs = [d_inv(d[ix])[0] for ix in range(loix, hiix+1)] 207 covered_ixs.append(refptix) 208 # if nhd very large, recheck one in every 100 points because ptwise 209 # dim estimate may occasionally vary between points in that nhd. 210 if hiix-loix > large_nhd and check_large_nhds: 211 num_recheck = int(around((hiix-loix)/large_nhd)) 212 for i in range(num_recheck): 213 ix = random.randrange(0, hiix-loix) 214 if ix not in makerefpt_ixs and ix != refptix: 215 makerefpt_ixs.append(ix) 216 covering[refptix] = (dim, d[loix], d[hiix], covered_ixs) 217 for ix in covered_ixs: 218 if covered[ix] is None: 219 covered[ix] = [refptix] 220 else: 221 covered[ix].append(refptix) 222 not_covered = remain(not_covered, covered_ixs) 223 # only assert this as a consistency check the first time through the loop 224 ## assert len(not_covered) + len(covered_ixs) + 1 == lendat 225 # find new ref pt in not_covered and repeat 226 num_uncovered = len(not_covered) 227 ## if not quiet: 228 print "%i points left to cover"%(num_uncovered+len(makerefpt_ixs)) 229 if num_uncovered < remain_tol: 230 if len(makerefpt_ixs) == 0: 231 done = True 232 else: 233 # check these ref pts that were part of large nhds 234 refptix = makerefpt_ixs.pop() 235 check_large_nhds = False # switch this off now! 236 else: 237 pick_unsuccessful = 0 238 while pick_unsuccessful < num_uncovered and pick_unsuccessful >= 0: 239 refptix = not_covered[random.randrange(0,num_uncovered)] 240 if refptix in unsuccessful: 241 pick_unsuccessful += 1 242 else: 243 pick_unsuccessful = -1 244 if pick_unsuccessful == num_uncovered: 245 done = True 246 return (covered, covering)
247 248 249
250 -def find_nhd(data, tol_up=.2, tol_down=0.05, quiet=True, max_res=0.2, max_std=0.1, 251 step=None, initref=None, min_ix=10, min_nhd_size=20, 252 max_radius_ratio=1.5):
253 """Find a single neighbourhood around a reference point.""" 254 covering = {} 255 covered = {}.fromkeys(range(len(data)),None) 256 if initref is not None: 257 refptix = initref 258 else: 259 refptix = 0 # initial value 260 logv_raw = log(range(1,size(data,0)+1)) 261 if not quiet: 262 print "\n*************\nCalculating distances and point-wise dimension around pt %i ..."%refptix 263 doplot = 1 264 else: 265 doplot = 0 266 logv, logd, d, d_inv_dict = log_distances_with_D(data, refptix, logv_raw) 267 d_inv_keys = sortedDictKeys(d_inv_dict) 268 d_inv_vals = sortedDictValues(d_inv_dict) 269 d_inv = Pointset(indepvararray=d_inv_keys, indepvartype=Float, 270 coordarray=d_inv_vals, coordtype=Int, tolerance=5e-5) 271 if not quiet: 272 print "Finding self-consistent neighbourhood", 273 # search from min_ix -> end, increasing hiix from loix+1 until dimensionality 274 # (slope) stabilizes to within an absolute tolerance and does not grow larger 275 # than a different tolerance 276 err=-1 277 len_logd = len(logd) 278 old_dim = None 279 loix=min_ix 280 hiix=loix+1 281 nhd_too_small = (len_logd < min_ix+10) 282 if step is None: 283 ixstep = max([int(around(len(data)/2000.)),1]) 284 elif step > 0: 285 if step < 1: 286 # treat as %age of # data points 287 ixstep = int(math.ceil(len(d)*step)) 288 if not quiet: 289 print "Using index step %i out of %i points"%(ixstep, len(d)) 290 else: 291 ixstep = step 292 else: 293 raise ValueError("Invalid step argument") 294 if not quiet: 295 plot(logd,logv) 296 not_done = not nhd_too_small 297 in_zone = False 298 ref_dim = -1 299 log_max_radius_ratio = log(max_radius_ratio) 300 all_res = [] 301 while not_done and hiix < len_logd-1-ixstep: 302 hiix += ixstep 303 # do a straight-line best fit 304 pfit = polyfit(logd[loix:hiix],logv[loix:hiix],1) 305 dim = pfit[0] 306 residual = get_linear_regression_residual(pfit, logd[loix:hiix],logv[loix:hiix]) 307 ## if hiix - loix > min_nhd_size: 308 all_res.append(residual) 309 if not quiet: 310 print "Dim = %.3f in [%i,%i], Residual = %.4f "%(dim,loix,hiix,residual), 311 plot([logd[loix],logd[hiix]],[logd[loix]*pfit[0]+pfit[1],logd[hiix]*pfit[0]+pfit[1]]) 312 ## if mod(hiix,50) == 0: 313 ## print ".", 314 if old_dim is not None: 315 ## err = abs(dim-old_dim)/dim 316 sd = std(all_res) 317 if not quiet: 318 print "S.d. = %.4f "%sd 319 if residual > max_res: 320 print "residual > max_res" 321 not_done = False 322 if sd > max_std: 323 print "residuals s.d. > max_std" 324 not_done = False 325 ## if err > max_err and hiix-loix > min_nhd_size/4: 326 ## if not quiet: 327 ## print "\nMaxed out successive difference in dimension estimate" 328 ## print " ... current dim = %.3f, old dim = %.3f, error = %.3f"%(dim,old_dim,err) 329 ## not_done = False 330 ## continue 331 ## if in_zone: 332 ## zone_err = abs(dim-ref_dim)/ref_dim 333 ## if in_zone and zone_err > tol_up: 334 ## not_done = False # end neighbourhood growth 335 ## if logd[hiix]-logd[loix] > log_max_radius_ratio: 336 ## not_done = False # end neighbourhood growth 337 ## if not quiet: 338 ## print "Radius too large: %.4f/%.4f"%(exp(logd[hiix]),exp(logd[loix])) 339 ## elif err < tol_down: 340 ## in_zone = True 341 ## ref_dim = dim 342 ## if not quiet: 343 ## print " - entered zone at ix", hiix, " - with ref dim ", ref_dim 344 old_residual = residual 345 old_dim = dim 346 nhd_too_small = nhd_too_small or hiix-loix < min_nhd_size 347 if nhd_too_small: 348 print "Neighbourhood too small. Try a different starting index or a new reference point ..." 349 print "Dim found over ixs [%i, %i] = %.4f"%(loix,hiix,dim) 350 raise RuntimeError 351 else: 352 if not quiet: 353 print "\nDimension = %f"%dim 354 print "Found best fit line from relative ix %i to %i (radius %f)"%(loix, hiix, d[hiix]) 355 # consolidate results in terms of global index positions in data 356 covered_ixs = [d_inv(d[ix])[0] for ix in range(loix, hiix+1)] 357 covered_ixs.append(refptix) 358 covering[refptix] = (dim, d[loix], d[hiix], covered_ixs) 359 for ix in covered_ixs: 360 if covered[ix] is None: 361 covered[ix] = [refptix] 362 else: 363 covered[ix].append(refptix) 364 return (covered, covering)
365 366 367
368 -def do_stats(covering, covered, maxD, bin_width=1, 369 fignum=1, num_panels=4, s_frac=0.5, 370 nhd_size_thresh=None, nhd_max_plot_size=None, minD=None, 371 D_estimate_method="absolute", weight_by_size=False, 372 force_xaxis_limits=False, radius_yaxis_limits=None, 373 save=''):
374 """Produces statistics of pointwise dimension estimates of a data set, 375 including a histogram of the distribution of neighbourhood dimensions. 376 377 R. Clewley, June 2006. 378 379 INPUTS: 380 381 maxD sets the maximum dimension to cater for. 382 383 bin_width selects the width of bins for dimensions (default 1). 384 385 fignum forces a particular figure number to be used for display. 386 (Use fignum=0 to suppress plotting of the results and just return 387 the statistics.) 388 389 num_panels argument must be 2 (for just nhd size and bin histo) or 390 4 (also include min and max nhd radius). 391 392 s_frac selects the fraction (or multiple) of the standard deviation of 393 neighbourhood sizes to use as the cutoff for the "bulk" of the data 394 set, in order to avoid including large outliers in determining 395 nhd_size_threshold (default 0.5). 396 397 nhd_size_threshold selects the smallest neighbourhood size to be 398 included in histogram of neighbourhood dimensionalities 399 (default None => automatic selection, i.e., the mean of the first 400 s_frac of the neighbourhood sizes' standard deviation). 401 Set to zero to prevent it being used or plotted. 402 403 nhd_max_plot_size selects the y-axis limit on the plot of neighbourhood 404 sizes (default None => automatic selection). 405 406 D_estimate_method can be 'absolute' (uses cutoff of size = 1 to estimate D 407 in histogram tail), or 'relative' (uses a cutoff = %age of peak binned 408 neighbourhoods) 409 410 force_xaxis_limits (Boolean) determines whether x-axis limits should be 411 forced to be the value of the (minD,maxD) parameters. 412 413 minD sets the minimum dimension to cater for (default to auto-choice). 414 415 radius_yaxis_limits (pair) sets the r_min and r_max y-axis upper limits, 416 respectively. 417 418 save (string) sets a filename to use for saving the figure in available 419 formats (so don't include a file extension). 420 421 422 OUTPUTS: 423 424 d_stats: dictionary {"D_tail": estimated D from tail, 425 "D_mean": histo mean, 426 "D_std": histo std dev} 427 428 dbins: Pointset of D bins 429 430 cover_by_dimix: dictionary of covers by index of dimensions binned 431 (i.e., [0, bin_width, 2*bin_width, ..., maxD+1]) 432 433 cover_by_size: list of neighbourhood coverings ordered by their size 434 435 cbu: array indicating how many points in overlap between neighbourhoods 436 (array index corresponds to a number of points shared, 437 value = number of nhds with that number of points shared) 438 439 ixs: list of indices of neighbourhoods of size > nhd_size_threshold 440 441 (integral: internal diagnostic feature) 442 """ 443 dx = zeros((len(covering),1),'f') 444 ly = zeros((len(covering),1),'f') 445 rloy = zeros((len(covering),1),'f') 446 rhiy = zeros((len(covering),1),'f') 447 drange = arange(0, maxD+1, bin_width, 'd').tolist() 448 dbins = data_bins('D', drange) 449 cover_by_dimix = {}.fromkeys(range(len(drange))) 450 for dix in range(len(drange)): 451 cover_by_dimix[dix] = [] 452 cover_by_size_dict = {}.fromkeys(range(len(covered)+1)) 453 ix = 0 454 integral = 0 455 largest = 0 456 try: 457 for p, (d, rlo, rhi, l) in covering.iteritems(): 458 dx[ix] = d 459 rloy[ix] = rlo 460 rhiy[ix] = rhi 461 lenl = len(l) 462 if lenl > largest: 463 largest = lenl 464 try: 465 cover_by_size_dict[lenl].append(p) 466 except AttributeError: 467 cover_by_size_dict[lenl] = [p] 468 ly[ix] = lenl 469 integral += lenl 470 ix += 1 471 except ValueError: 472 # No max radius information available! 473 # compatible with old version of covering that does not return rhi 474 for p, (d, rlo, l) in covering.iteritems(): 475 dx[ix] = d 476 rloy[ix] = rlo 477 rhiy[ix] = 0 478 lenl = len(l) 479 if lenl > largest: 480 largest = lenl 481 try: 482 cover_by_size_dict[lenl].append(p) 483 except AttributeError: 484 cover_by_size_dict[lenl] = [p] 485 ly[ix] = lenl 486 integral += lenl 487 ix += 1 488 print "\nNeighbourhood statistics:" 489 print "There are %i neighbourhoods to this covering"%len(covering) 490 print "Largest neighbourhood has %i points"%largest 491 cover_by_size = [di for di in cover_by_size_dict.iteritems() if di[1] is not None] 492 cover_by_size.sort() 493 csizes = [c[0] for c in cover_by_size] 494 if nhd_size_thresh is None: 495 s = std(csizes) 496 sm = s_frac*s 497 m = mean(csizes) 498 print "Std. dev. of cover_by_size =", s 499 print "Max size found =", max(csizes) 500 print "Mean size found =", m 501 ## # find largest index of set of covering nhds such that its size 502 ## # is smaller than s_frac% of the std deviation of the sizes 503 ## for i in range(len(cover_by_size)): 504 ## if csizes[i] > sm: 505 ## break 506 ## try: 507 ## nhd_size_thresh = mean(csizes[:i]) 508 ## except ZeroDivisionError: 509 ## nhd_size_thresh = 0 510 ## print "N'hood size threshold used = mean(cover_by_sizes restricted to %.3f of 1st std. dev.)"%s_frac 511 ## print " =", nhd_size_thresh 512 nhd_size_thresh = m-sm 513 print "N'hood size threshold used = mean - %.3f of std. dev."%s_frac 514 print " =", nhd_size_thresh 515 else: 516 if nhd_size_thresh > 0: 517 print "N'hood size threshold set by user =", nhd_size_thresh 518 if nhd_max_plot_size is None: 519 ## # 2 * original set sizes mean 520 ## try: 521 ## nhd_max_plot_size = 2*mean(csizes) 522 ## except ZeroDivisionError: 523 ## nhd_max_plot_size = largest 524 ## print "Using max plot nhd size = 2*mean(cover_by_sizes) =", nhd_max_plot_size 525 nhd_max_plot_size = largest 526 print "Using max plot nhd size of largest neighbourhood found =", nhd_max_plot_size 527 else: 528 print "Using max plot nhd size set by user =", nhd_max_plot_size 529 # reverse so that largest first for returning to user 530 cover_by_size.reverse() 531 filtered_ixs = [] 532 try: 533 for p, (d, rlo, rhi, l) in covering.iteritems(): 534 if d <= maxD: 535 dix = dbins.resolve_bin_index(d) 536 if len(l) > nhd_size_thresh: 537 if weight_by_size: 538 inc = len(l)/largest 539 else: 540 inc = 1 541 dbins.increment(d, inc) 542 filtered_ixs.append(p) 543 cover_by_dimix[dix].append(p) 544 except ValueError: 545 # compatible with old version of covering that does not return rhi 546 for p, (d, rlo, l) in covering.iteritems(): 547 if d <= maxD: 548 dix = dbins.resolve_bin_index(d) 549 if len(l) > nhd_size_thresh: 550 if weight_by_size: 551 inc = len(l)/largest 552 else: 553 inc = 1 554 dbins.increment(d, inc) 555 filtered_ixs.append(p) 556 cover_by_dimix[dix].append(p) 557 # decide on single dimension estimate 558 ks=dbins.midpoints 559 d_est = 0 # initial value 560 if D_estimate_method == "absolute": 561 # based on largest D > 1 in 562 # tail of dbins histogram (assumes unimodal) 563 if len(covering) < 3: 564 dbin_thresh = 0 565 else: 566 dbin_thresh = 1 567 for k in ks: 568 n = dbins(k) 569 if d_est > 0 and n == 0: 570 # stop before a second spike might occur 571 break 572 if n > dbin_thresh: 573 d_est = k 574 elif D_estimate_method == "relative": 575 peak = 0 576 last_peak = 0 577 for k in ks: 578 n = dbins(k) 579 if n > peak and n > 1: 580 peak = n 581 last_peak = n 582 if d_est > 0 and n == 0: 583 # stop before a second spike might occur 584 break 585 if peak > 0: 586 # only applies if peak already found 587 if n > .2*last_peak: 588 # discard bins if smaller than 20% of last peak 589 d_est = k 590 # only change last peak record if new one is actually smaller! 591 last_peak = n 592 else: 593 raise ValueError("Invalid dimension estimation method name") 594 if fignum > 0: 595 fontmath = args(fontsize=22,fontname='Times') 596 fonttext = args(fontsize=18,fontname='Times') 597 # make plot 598 print "Plotting histograms to figure", fignum 599 assert num_panels in [2,4], "num_panels argument must be 2 or 4" 600 figure(fignum) 601 if num_panels == 4: 602 subplot(2,2,1) 603 plot(dx, rhiy, 'ro') 604 glines = getp(gca(), 'xticklines')+getp(gca(), 'yticklines') 605 setp(glines, 'linewidth', 2) 606 ## title('nhd max radius') 607 xlabel(r'$\rm{Dimension}$', fonttext) 608 ylabel(r'$r_{max}$', fontmath) 609 if num_panels == 4: 610 subplot(2,2,2) 611 else: 612 subplot(2,1,1) 613 plot(dx, ly, 'ko') 614 glines = getp(gca(), 'xticklines')+getp(gca(), 'yticklines') 615 setp(glines, 'linewidth', 2) 616 dmin, dmax = figure(fignum).axes[0].get_xlim() 617 if dmax > maxD or force_xaxis_limits: 618 dmax = maxD 619 if dmin < minD or force_xaxis_limits: 620 if minD is not None: 621 dmin = minD 622 if nhd_size_thresh > 0: 623 plot([dmin,dmax],[nhd_size_thresh,nhd_size_thresh],'k--',linewidth=1.2) 624 ## title('size') 625 xlabel(r'$\rm{Dimension}$', fonttext) 626 ylabel(r'$\rm{N-hood \ size}$', fonttext) 627 if num_panels == 4: 628 subplot(2,2,3) 629 plot(dx, rloy, 'go') 630 glines = getp(gca(), 'xticklines')+getp(gca(), 'yticklines') 631 setp(glines, 'linewidth', 2) 632 ## title('minimum radius') 633 xlabel(r'$\rm{Dimension}$', fonttext) 634 ylabel(r'$r_{min}$', fontmath) 635 if num_panels == 4: 636 subplot(2,2,4) 637 else: 638 subplot(2,1,2) 639 dbx = dbins.midpoints.tolist() 640 dby = dbins.values() 641 bar(dbx, dby, color='b', width=dbx[1]-dbx[0]) 642 for panel in range(num_panels): 643 figure(fignum).axes[panel].set_xlim([dmin,dmax]) 644 ## title('number binned') 645 xlabel(r'$\rm{Dimension}$', fonttext) 646 ylabel(r'$\rm{Number \ binned}$',fonttext) 647 # Calculate overlap info: 648 # how many times does a point appear in the neighbourhoods? 649 # assumes will not be more than 100 times! 650 cbins = zeros((100,),'i') 651 max_used = 0 652 for clist in covered.itervalues(): 653 try: 654 n = len(clist) 655 except TypeError: 656 # a point that was not included in a covering 657 n = 0 658 if n > max_used: 659 max_used = n 660 if n < maxD+1: 661 cbins[n] = cbins[n] + 1 662 ## cran = range(max_used+1) 663 cbu = cbins[0:max_used+1] 664 if fignum > 0: 665 ## subplot(2,2,4) 666 ## bar(cran,cbu) 667 ## title('nhd overlap') 668 ## xlabel('# pts in common') 669 ## ylabel('# nhds') 670 if num_panels == 4: 671 figure(fignum).axes[0].set_position([0.1, 0.55, 0.37, 0.35]) 672 figure(fignum).axes[1].set_position([0.57, 0.55, 0.37, 0.35]) 673 figure(fignum).axes[2].set_position([0.1, 0.09, 0.37, 0.35]) 674 figure(fignum).axes[3].set_position([0.57, 0.09, 0.37, 0.35]) 675 figure(fignum).axes[1].set_ylim([0,nhd_max_plot_size]) 676 if radius_yaxis_limits is not None: 677 assert radius_yaxis_limits[1] >= radius_yaxis_limits[0], \ 678 "Specify radius y-axis limits in increasing order" 679 ylo1 = figure(fignum).axes[0].get_ylim()[0] 680 ylo0 = figure(fignum).axes[2].get_ylim()[0] 681 figure(fignum).axes[0].set_ylim([ylo1,radius_yaxis_limits[1]]) 682 figure(fignum).axes[2].set_ylim([ylo0,radius_yaxis_limits[0]]) 683 else: 684 figure(fignum).axes[0].set_position([0.125, 0.57, 0.775, 0.364]) 685 figure(fignum).axes[1].set_position([0.125, 0.08, 0.775, 0.364]) 686 figure(fignum).axes[0].set_ylim([0,nhd_max_plot_size]) 687 draw() 688 if save != '': 689 savefig(save+'.png') 690 savefig(save+'.svg') 691 savefig(save+'.eps') 692 # don't assert the following if re-checking points in large nhds (the default) 693 #assert sum([i*cbins[i] for i in cran]) == integral 694 tail_ix = dbins.resolve_bin_index(d_est) 695 if isinstance(tail_ix, tuple): 696 tail_ix = tail_ix[1] 697 try: 698 m = dbins[:tail_ix].mean() 699 s = dbins[:tail_ix].std() 700 except: 701 print "Tail found at index %i and D = %.3f"%(tail_ix,d_est) 702 print " -- problem computing mean and std dev for this mode" 703 print " so calculating for whole data set" 704 m = dbins.mean() 705 s = dbins.std() 706 print "Estimate dimension to be (to resolution of histogram bin width):", d_est 707 print " with histogram mean D = %.5f and std dev D = %.5f"%(m, s) 708 return {"D_tail": d_est, "D_mean": m, "D_std": s}, \ 709 dbins, cover_by_dimix, cover_by_size, cbu, filtered_ixs, integral
710 711
712 -def get_filtered_ixs(cover_by_dimix):
713 """Returns the list of indices in covering for which the neighbourhood 714 size was larger than the threshold for dimension binning.""" 715 fixs=[] 716 for p, ixlist in cover_by_dimix.iteritems(): 717 fixs.extend(ixlist) 718 fixs.sort() 719 return fixs
720 721
722 -def find_outliers(data, plotdata, cluster_dim, cluster_width, max_central=50):
723 outliers = [] 724 central = [] 725 assert cluster_width > 0 and cluster_dim - cluster_width > 1 726 for i, (dlo, dhi, refix) in enumerate(plotdata): 727 refix = int(refix) 728 if dlo < 1: 729 outliers.append(refix) 730 if dlo > cluster_dim-cluster_width and dlo < cluster_dim+cluster_width: 731 if len(central) < max_central: 732 central.append(refix) 733 num_central = len(central) 734 central_data = array([data[cix] for cix in central]) 735 if max_central > 0: 736 c_mean = mean(central_data,0) 737 else: 738 c_mean = central_data[0] 739 do = zeros((len(outliers),),float) 740 dc = zeros((num_central,),float) 741 for i, oix in enumerate(outliers): 742 do[i]=norm(c_mean-data[oix]) 743 for i, cdata in enumerate(central_data): 744 dc[i]=norm(c_mean-cdata) 745 ds = [] 746 for i in xrange(len(data)): 747 if i not in outliers: 748 ds.append(norm(data[i]-data[outliers[0]])) 749 ds = array(ds) 750 if num_central > 1: 751 mean_dc = mean(dc) 752 std_dc = std(dc) 753 else: 754 mean_dc = dc[0] 755 std_dc = 0 756 #print "Found %i outliers and %i central points"%(len(outliers),num_central) 757 return (mean(do), std(do), mean_dc, std_dc, mean(ds), std(ds), do)
758 759
760 -def get_cover_radii(covering, ixs=None, method='divide'):
761 """Return the radius of a covering""" 762 if method == 'divide': 763 rfun = lambda rh, rl: rh/rl 764 elif method == 'subtract': 765 rfun = lambda rh, rl: rh-rl 766 radii = {} 767 if ixs is None: 768 for p, (d, rlo, rhi, l) in covering.iteritems(): 769 radii[p] = rfun(rhi,rlo) 770 else: 771 for p, (d, rlo, rhi, l) in covering.iteritems(): 772 if p in ixs: 773 radii[p] = rfun(rhi,rlo) 774 return radii
775 776
777 -def filter_by_radius(covering, radii, rlo=None, rhi=None):
778 """Returns list of filtered (index, radius) pairs and corresponding 779 sub-coverings' dimensions""" 780 if rhi is None: 781 assert rlo is not None 782 test = lambda r: r > rlo 783 else: 784 if rlo is None: 785 test = lambda r: r < rhi 786 else: 787 test = lambda r: r > rlo and r < rhi 788 rfilt = [(ix,r) for (ix, r) in radii.iteritems() if test(r)] 789 return rfilt, [covering[rinfo[0]][0] for rinfo in rfilt]
790 791
792 -def plot_radius_distribution(radii, max_radius, radius_bin_width):
793 """Expects radii dictionary, as returned by get_cover_radii. 794 Returns figure handle.""" 795 rads = array(radii.values()) 796 dbins = data_bins('r', arange(0, max_radius+radius_bin_width, radius_bin_width)) 797 dbins.bin(rads) 798 dbx, dby = dbins.to_array() 799 f = figure() 800 bar(dbx,dby,width=rbin) 801 title('Distribution of nhd radii') 802 xlabel('r') 803 ylabel('#') 804 return f
805 806
807 -def plot_dim_by_thickness(covering, radii, rwidth=0, plotstyle='k.'):
808 """Plot of dimensions of neighbourhoods with thickness r, using 809 binned data of bin width rwidth. r is defined as the neighbourhood's 810 r_max - r_min. 811 812 If rwidth=0 (default) then no data binning is done. 813 Returns figure handle.""" 814 rlo, rhi = extent(radii.values()) 815 assert rlo > 0 and rhi > rlo 816 f=figure() 817 if rwidth > 0: 818 assert rwidth < rhi-rlo 819 for r in arange(0, rhi+rwidth, rwidth): 820 rs,ds=filter_by_radius(covering, radii, r, r+rwidth) 821 if ds != []: 822 plot([r], ds, plotstyle) 823 title('Distribution of dimension by radius bin (width '+str(rwidth)+')') 824 else: 825 rs,ds=filter_by_radius(covering, radii, 0, rhi) 826 if ds != []: 827 plot([r[1] for r in rs], ds, plotstyle) 828 ## title('Distribution of dimension by radius') 829 xlabel(r'$r_{max} \ - \ r_{min}$',fontsize=22) 830 ylabel(r'$\rm{Dimension}$',fontsize=20) 831 return f
832 833
834 -def plot_thickness_by_dim(covering, radii, plotstyle='k.'):
835 """Plot of thickness of neighbourhoods as a function of their 836 estimated dimension. r is defined as the neighbourhood's 837 r_max - r_min. 838 839 Returns figure handle.""" 840 rlo, rhi = extent(radii.values()) 841 assert rlo > 0 and rhi > rlo 842 f = figure() 843 rs, ds = filter_by_radius(covering, radii, 0, rhi) 844 if ds != []: 845 plot(ds, [r[1] for r in rs], plotstyle) 846 ylabel(r'$r_{max} \ - \ r_{min}$',fontsize=22) 847 xlabel(r'$\rm{Dimension}$',fontsize=20) 848 return f
849 850
851 -def nhd(data, covering, refpt):
852 return data.take(covering[refpt][3], 0)
853 854 # --------------------------------------------------------------------------- 855 856
857 -def get_rV_curves(data, refpts, doplot=True, show_seq=False):
858 """Get r-V curves for pointwise dimension estimate of data""" 859 if doplot: 860 figure() 861 rV = {} 862 for refpt in refpts: 863 if show_seq: 864 logv, logd, ixs_sorted = log_distances(data, refpt, doplot, 865 return_ixs=True) 866 else: 867 logv, logd = log_distances(data, refpt, doplot, 868 return_ixs=False) 869 if show_seq: 870 # get points that are not in increasing or decreasing sequence 871 notinseq = out_of_seq(ixs_sorted) 872 inseq = remain(ixs_sorted, notinseq) 873 else: 874 notinseq = [] 875 inseq = [] 876 rV[refpt] = (logv, logd, inseq, notinseq) 877 return rV
878 879
880 -def make_secant_fig_dict(items):
881 return { 882 'Dim': items[0], 883 'refpt': int(items[1]), 884 'r_min_minslope': items[2], 885 'r_min_maxslope': items[3], 886 'index': int(items[4]) 887 }
888
889 -def prep_secant_figure(data, plotdata, ssorted, radii, ixdata, sstats, 890 start_ix, stop_ix, Delta, fignum, save='', 891 show_seq=False):
892 sms=sstats['min_slope'] 893 res_info = {'min_slope':{}, 'max_slope': {}} 894 keyslopes_min=[sms['max'], sms['min'], sms['mean'], sms['mean']+sms['std']] 895 if sms['mean']-sms['std'] > 0: 896 keyslopes_min.append(sms['mean']-sms['std']) 897 ixs_min = [int(i) for i in find_from_sorted(plotdata[:,0], keyslopes_min)] 898 refpts_min = [ixdata[ix][1] for ix in ixs_min] 899 rm=res_info['min_slope'] 900 rm['max'] = make_secant_fig_dict([sms['max'], 901 refpts_min[0]]+radii[ixs_min[0]].tolist()) 902 rm['min'] = make_secant_fig_dict([sms['min'], 903 refpts_min[1]]+radii[ixs_min[1]].tolist()) 904 rm['mean'] = make_secant_fig_dict([sms['mean'], 905 refpts_min[2]]+radii[ixs_min[2]].tolist()) 906 rm['mean_plus_std'] = make_secant_fig_dict([sms['mean']+sms['std'], \ 907 refpts_min[3]]+radii[ixs_min[3]].tolist()) 908 if sms['mean']-sms['std'] > 0: 909 rm['mean_minus_std'] = make_secant_fig_dict([sms['mean']-sms['std'], \ 910 refpts_min[4]]+radii[ixs_min[4]].tolist()) 911 912 sms=sstats['max_slope'] 913 keyslopes_max=[sms['max'], sms['min'], sms['mean'], sms['mean']+sms['std']] 914 if sms['mean']-sms['std'] > 0: 915 keyslopes_max.append(sms['mean']-sms['std']) 916 ixs_max = [int(i) for i in find_from_sorted(plotdata[:,1], keyslopes_max)] 917 refpts_max = [ixdata[ix][1] for ix in ixs_max] 918 rm=res_info['max_slope'] 919 rm['max'] = make_secant_fig_dict([sms['max'], 920 refpts_max[0]]+radii[ixs_max[0]].tolist()) 921 rm['min'] = make_secant_fig_dict([sms['min'], 922 refpts_max[1]]+radii[ixs_max[1]].tolist()) 923 rm['mean'] = make_secant_fig_dict([sms['mean'], 924 refpts_max[2]]+radii[ixs_max[2]].tolist()) 925 rm['mean_plus_std'] = make_secant_fig_dict([sms['mean']+sms['std'], \ 926 refpts_max[3]]+radii[ixs_max[3]].tolist()) 927 if sms['mean']-sms['std'] > 0: 928 rm['mean_minus_std'] = make_secant_fig_dict([sms['mean']-sms['std'], \ 929 refpts_max[4]]+radii[ixs_max[4]].tolist()) 930 931 rV=get_rV_curves(data, refpts_min+refpts_max, False, show_seq) 932 res_info['min_slope']['refpts'] = refpts_min 933 res_info['max_slope']['refpts'] = refpts_max 934 res_info['rV'] = rV 935 cols_min=['b','r','y'] # min_slope [max, min, mean] 936 cols_max=['c','m','g'] # max_slope [max, min, mean] 937 figure(fignum) 938 subplot(1,2,1) 939 xlim=[Inf,-Inf] 940 ylim=[Inf,-Inf] 941 for i in [0,1,2]: 942 logv, logd, inseq, notinseq = rV[refpts_min[i]] 943 plot(logd, logv, cols_min[i], linewidth=2) 944 if show_seq: 945 if len(inseq) > len(notinseq): 946 # plot not in seq points in red 947 plot(logd[notinseq], logv[notinseq], 'r^') 948 else: 949 # plot seq points in green 950 plot(logd[inseq], logv[inseq], 'gv') 951 minlogd = min(logd) 952 maxlogd = max(logd) 953 minlogv = min(logv) 954 maxlogv = max(logv) 955 if minlogd < xlim[0]: 956 xlim[0] = minlogd 957 if maxlogd > xlim[1]: 958 xlim[1] = maxlogd 959 if minlogv < ylim[0]: 960 ylim[0] = minlogv 961 if maxlogv > ylim[1]: 962 ylim[1] = maxlogv 963 logv, logd, inseq, notinseq = rV[refpts_max[i]] 964 plot(logd, logv, cols_max[i], linewidth=2) 965 if show_seq: 966 if len(inseq) > len(notinseq): 967 # plot not in seq points in red 968 plot(logd[notinseq], logv[notinseq], 'r^') 969 else: 970 # plot seq points in green 971 plot(logd[inseq], logv[inseq], 'gv') 972 minlogd = min(logd) 973 maxlogd = max(logd) 974 minlogv = min(logv) 975 maxlogv = max(logv) 976 if minlogd < xlim[0]: 977 xlim[0] = minlogd 978 if maxlogd > xlim[1]: 979 xlim[1] = maxlogd 980 if minlogv < ylim[0]: 981 ylim[0] = minlogv 982 if maxlogv > ylim[1]: 983 ylim[1] = maxlogv 984 # plot line marking where start_ix'th point lies 985 plot(xlim, [logv[start_ix], logv[start_ix]], 'k--', linewidth=2) 986 # if stop_ix < N, plot line marking where stop_ix'th point lies 987 if stop_ix < len(data): 988 plot(xlim, [logv[stop_ix], logv[stop_ix]], 'k--', linewidth=2) 989 # New loop, to ensure markers are on top of all lines: 990 # Mark r_min and r_max for secant of both max and min slope, 991 # for each r-V curve plotted 992 for i in [0,1,2]: 993 logv, logd, inseq, notinseq = rV[refpts_min[i]] 994 loix_min = ixdata[ixs_min[i]][0][0] 995 hiix_min = find(logv, logv[loix_min]+log(Delta)) 996 plot([logd[loix_min]], [logv[loix_min]], cols_min[i]+'o', 997 markersize=10) 998 plot([logd[hiix_min]], [logv[hiix_min]], cols_min[i]+'o', 999 markersize=10) 1000 loix_min = ixdata[ixs_min[i]][0][1] 1001 hiix_min = find(logv, logv[loix_min]+log(Delta)) 1002 plot([logd[loix_min]], [logv[loix_min]], cols_min[i]+'s', 1003 markersize=10) 1004 plot([logd[hiix_min]], [logv[hiix_min]], cols_min[i]+'s', 1005 markersize=10) 1006 logv, logd, inseq, notinseq = rV[refpts_max[i]] 1007 loix_max = ixdata[ixs_max[i]][0][0] 1008 hiix_max = find(logv, logv[loix_max]+log(Delta)) 1009 plot([logd[loix_max]], [logv[loix_max]], cols_max[i]+'o', 1010 markersize=10) 1011 plot([logd[hiix_max]], [logv[hiix_max]], cols_max[i]+'o', 1012 markersize=10) 1013 loix_max = ixdata[ixs_max[i]][0][1] 1014 hiix_max = find(logv, logv[loix_max]+log(Delta)) 1015 plot([logd[loix_max]], [logv[loix_max]], cols_max[i]+'s', 1016 markersize=10) 1017 plot([logd[hiix_max]], [logv[hiix_max]], cols_max[i]+'s', 1018 markersize=10) 1019 # plot bar for Delta 1020 bar_x = 0.1*(xlim[1]-xlim[0])+xlim[0] 1021 bar_y = 0.5*(ylim[1]-ylim[0])+ylim[0] 1022 plot([bar_x, bar_x],[bar_y, bar_y+log(Delta)],'k-',linewidth=10) 1023 # labels 1024 fontmath_h = args(fontsize=22,fontname='Times', 1025 verticalalignment='top') 1026 fontmath_v = args(fontsize=22,fontname='Times', 1027 horizontalalignment='right') 1028 xlabel(r'$\rm{log \ } r_k$',fontmath_h) 1029 ylabel(r'$\rm{log \ } k$',fontmath_v) 1030 subplot(1,2,2) 1031 rescatter(plotdata, array([0.2,0.2,0.2]), newfigure=False) 1032 # replot selected points 1033 for i in [0,1,2]: 1034 rescatter(array([plotdata[ixs_min[i]]]), cols_min[i], 1035 marker='o', marker_size=150, newfigure=False) 1036 rescatter(array([plotdata[ixs_max[i]]]), cols_max[i], 1037 marker='s', marker_size=225, newfigure=False) 1038 figure(fignum).axes[0].set_xlim(xlim) 1039 figure(fignum).axes[0].set_ylim(ylim) 1040 figure(fignum).axes[0].set_position([0.07, 0.11, 0.41, 0.85]) 1041 figure(fignum).axes[1].set_position([0.54, 0.11, 0.41, 0.85]) 1042 figure(fignum).set_figwidth(14.5) 1043 draw() 1044 if save != '': 1045 figure(fignum).savefig(save+'.png') 1046 figure(fignum).savefig(save+'.svg') 1047 figure(fignum).savefig(save+'.eps') 1048 return res_info
1049 1050
1051 -def slope_range(data, refptix, step=2, Delta=10, startix=10, stopix=None, 1052 doplot=False, maxD=Inf):
1053 """Slope range by secants over a range of log(Delta) ball volumes""" 1054 N=size(data,0) 1055 assert Delta > 1 1056 assert N > Delta*startix 1057 logv_raw = log(range(1,N+1)) 1058 logv, logd, d, d_inv_dict = log_distances_with_D(data, refptix, logv_raw) 1059 hi_slope = 0 1060 lo_slope = 1e10 1061 loix = startix 1062 hiix = loix*Delta 1063 if stopix is None: 1064 stopix = N 1065 assert stopix <= N 1066 assert hiix < stopix 1067 logDelta = log(Delta) 1068 slope_by_loix = {} 1069 while True: 1070 # slope by secant 1071 slope = logDelta/(logd[hiix]-logd[loix]) 1072 if slope > hi_slope and slope < maxD: 1073 hi_slope = slope 1074 hi_slope_ix = loix 1075 if slope < lo_slope and slope < maxD: 1076 lo_slope = slope 1077 lo_slope_ix = loix 1078 slope_by_loix[loix] = slope 1079 loix += step 1080 hiix = Delta*loix 1081 if hiix >= stopix: 1082 break 1083 if doplot: 1084 plot(sortedDictValues(slope_by_loix)) 1085 return (slope_by_loix, lo_slope, lo_slope_ix, 1086 hi_slope, hi_slope_ix, logv, logd)
1087 1088 1089
1090 -def scatterplot_slopes(data, num_samples=None, startix=10, stopix=None, 1091 Delta=10, marker='o', marker_size=40, cmap=cm.hot, 1092 color_source=None, step=5, fignum=None, maxD=Inf):
1093 """fignum = 0 switches off plotting (just return statistics)""" 1094 if num_samples is None: 1095 num_samples = len(data)/5 1096 range_step = 5 1097 else: 1098 range_step = int(len(data)/num_samples) 1099 if range_step > 0: 1100 refpts = arange(0,len(data),range_step,'i') 1101 else: 1102 raise "too many sample points specified" 1103 num_refpts = len(refpts) 1104 plotdata = zeros((num_refpts,3),Float) 1105 ixdata = {} 1106 # colors indicate either min or max radii 1107 colors = zeros((num_refpts,3),Float) 1108 if color_source == 'lo': 1109 cix=0 1110 title_str = 'min' 1111 elif color_source == 'hi': 1112 cix=1 1113 title_str = 'max' 1114 elif color_source is None: 1115 title_str = '' 1116 col = array([0.3,0.3,0.3]) 1117 else: 1118 raise "invalid color source" 1119 print "Expect %i dots when finished:"%int(num_refpts/50) 1120 for i in range(num_refpts): 1121 if mod(i,50) == 49: 1122 print ".", 1123 slope_by_loix, lo, loix, hi, hiix, logv, logd = slope_range(data, 1124 refpts[i], startix=startix, stopix=stopix, 1125 Delta=Delta, step=step, maxD=maxD) 1126 plotdata[i,:] = array([lo,hi,refpts[i]]) 1127 ixdata[i] = [(loix,hiix),refpts[i]] 1128 colors[i,:] = array([exp(logd[loix]),exp(logd[hiix]),refpts[i]]) 1129 if fignum is None: 1130 figure() 1131 dofig = True 1132 elif fignum == 0: 1133 dofig = False 1134 else: 1135 figure(fignum) 1136 dofig = True 1137 if dofig: 1138 if color_source is None: 1139 handle=scatter(plotdata[:,0],plotdata[:,1],marker=marker,c=col, 1140 s=marker_size) 1141 else: 1142 handle=scatter(plotdata[:,0],plotdata[:,1],marker=marker,c=colors[:,cix], 1143 cmap=cmap,s=marker_size) 1144 colorbar() 1145 fonttext = args(fontsize=20,fontname='Times') 1146 xlabel(r'$\rm{min \ slope}$',fonttext) 1147 ylabel(r'$\rm{max \ slope}$',fonttext) 1148 if title_str != '': 1149 title('Color-coded by r_min of '+title_str+' slope') 1150 else: 1151 handle=None 1152 s=(sorted_by_slope(plotdata,0),sorted_by_slope(plotdata,1)) 1153 if color_source is None: 1154 c = () 1155 else: 1156 c=(sorted_by_radius(colors,0),sorted_by_radius(colors,1)) 1157 stats = {'min_slope': {}, 'max_slope': {}} 1158 stats['min_slope']['mean']=mean(plotdata[:,0]) 1159 stats['max_slope']['mean']=mean(plotdata[:,1]) 1160 stats['min_slope']['std']=std(plotdata[:,0]) 1161 stats['max_slope']['std']=std(plotdata[:,1]) 1162 stats['min_slope']['min']=min(plotdata[:,0]) 1163 stats['max_slope']['min']=min(plotdata[:,1]) 1164 stats['min_slope']['max']=max(plotdata[:,0]) 1165 stats['max_slope']['max']=max(plotdata[:,1]) 1166 print "\n" 1167 return (plotdata, s, colors, c, ixdata, stats, handle)
1168 1169
1170 -def sorted_by_slope(plotdata, select=0):
1171 """select: 0 = rmin, 1 = rmax""" 1172 ixs = argsort(plotdata, 0)[:,select] 1173 return zip(take(plotdata, ixs, 0)[:,select], ixs)
1174
1175 -def sorted_by_radius(colors, select=0):
1176 """select: 0 = rmin, 1 = rmax""" 1177 ixs = argsort(colors, 0)[:,select] 1178 return zip(take(colors, ixs, 0)[:,select], ixs)
1179
1180 -def find_from_sorted(x, v, next_largest=1, indices=None):
1181 if isinstance(v, _seq_types): 1182 a=array(x) 1183 res = [find(a, y, next_largest, indices) for y in v] 1184 return res 1185 else: 1186 return find(array(x), v, next_largest, indices)
1187 1188
1189 -def rescatter(plotdata, colors, color_source=None, marker='o', marker_size=40, 1190 cmap=cm.hot, newfigure=True):
1191 if color_source == 'lo': 1192 cix=0 1193 title_str = 'min' 1194 elif color_source == 'hi': 1195 cix=1 1196 title_str = 'max' 1197 elif color_source is None: 1198 title_str = '' 1199 if not isinstance(colors, str): 1200 assert shape(colors) == (3,) 1201 else: 1202 raise "invalid color source" 1203 if newfigure: 1204 figure() 1205 if color_source is None: 1206 handle=scatter(plotdata[:,0],plotdata[:,1],marker=marker, 1207 c=colors,s=marker_size) 1208 else: 1209 handle=scatter(plotdata[:,0],plotdata[:,1],marker=marker, 1210 c=colors[:,cix],cmap=cmap,s=marker_size) 1211 colorbar() 1212 fonttext_v = args(fontsize=22,fontname='Times', 1213 horizontalalignment='right') 1214 fonttext_h = args(fontsize=22,fontname='Times', 1215 verticalalignment='top') 1216 xlabel(r'$\rm{min \ slope}$',fonttext_h) 1217 ylabel(r'$\rm{max \ slope}$',fonttext_v) 1218 if title_str != '': 1219 title('Color-coded by r_min of '+title_str+' slope') 1220 return handle
1221 1222
1223 -def scatter_histo(plotdata,select=1,bin_width=1,maxD=None, 1224 xlim=None,ylim=None):
1225 ds = array(plotdata[:,select]) 1226 ds.shape = (len(plotdata),) 1227 if maxD is None: 1228 maxD = int(ceil(max(ds[:,0]))) 1229 drange = arange(0, maxD+1+bin_width, bin_width, typecode='d') 1230 dbins = data_bins('D', drange) 1231 dbins.bin(ds) 1232 dbx, dby = dbins.to_array() 1233 f=figure() 1234 b=bar(dbx, dby, color='b', width=dbx[1]-dbx[0]) 1235 if xlim is not None: 1236 f.axes[0].set_xlim(xlim) 1237 draw() 1238 if ylim is not None: 1239 f.axes[0].set_ylim(ylim) 1240 draw()
1241 1242
1243 -def PD_E(a, secfig=5, verbose=False, saveplot=True, force=False):
1244 """Pointwise dimension estimation (PD-E) using secants of r-V curves. 1245 See: 1246 1247 J. Guckenheimer, "Dimension Estimates for Attractors", 1248 Contemporary Mathematics, Vol. 28, 1984. 1249 1250 R. Clewley, J. Guckenheimer, F. Valero-Cuevas, "Estimating effective 1251 degrees of freedom in motor control", IEEE Transactions in Biomedical 1252 Engineering, 2007. 1253 """ 1254 pde_name = 'PD_E-'+a.name 1255 if 'N' in a.keys(): 1256 if 'stopix' in a.keys(): 1257 if a.stopix is not None: 1258 a.stopix = int(round(0.7*a.N)) 1259 else: 1260 a.stopix = int(round(0.7*a.N)) 1261 a.num_samples = int(round(0.2*a.N)) 1262 a.step = int(round(0.005*a.N)) 1263 else: 1264 a.N = None 1265 if 'stopix' not in a.keys(): 1266 a.stopix = None 1267 try: 1268 if force: 1269 raise ValueError 1270 data, pde_args, plotdata, ssorted, colors, \ 1271 csorted, ixdata, sstats = loadObjects(pde_name) 1272 print "Loaded data set and stats for %s"%a.name 1273 # don't check the data field 1274 if filteredDict(a, ['data','bin_width'], neg=True) != \ 1275 filteredDict(pde_args, ['data','bin_width'], neg=True): 1276 raise "Incompatible argument struct from file %s"%pde_name 1277 if a.data is None: 1278 a.data = data 1279 elif not all(a.data == data): 1280 raise "Incompatible data set loaded from file %s"%pde_name 1281 except ValueError: 1282 if a.data is None: 1283 try: 1284 data = eval(a.data_gen_str, globals(), a.data_gen_fun) 1285 except: 1286 print "Problem re-calculating data from given information" 1287 else: 1288 data = a.data 1289 print "Recalculating PD-E for %s"%a.name 1290 try: 1291 csource=a.color_source 1292 except AttributeError: 1293 csource=None 1294 if verbose: 1295 scatter_fig = None 1296 else: 1297 scatter_fig = 0 1298 if a.data is None: 1299 a.data = data 1300 plotdata, ssorted, colors, csorted, ixdata, sstats, handle = \ 1301 scatterplot_slopes(data, 1302 startix=a.startix, stopix=a.stopix, 1303 num_samples=a.num_samples, 1304 Delta=a.Delta, color_source=csource, step=a.step, 1305 maxD=a.maxD, fignum=scatter_fig) 1306 saveObjects([data, a, plotdata, ssorted, colors, csorted, ixdata, sstats], 1307 pde_name, force=True) 1308 if verbose: 1309 scatter_histo(plotdata,bin_width=a.bin_width,select=0) 1310 scatter_histo(plotdata,bin_width=a.bin_width,select=1) 1311 info(sstats) 1312 if saveplot: 1313 save_info='result_'+a.name 1314 else: 1315 save_info='' 1316 if 'show_seq' in a.keys(): 1317 show_seq = a['show_seq'] 1318 else: 1319 show_seq = False 1320 more_info=prep_secant_figure(data, plotdata, ssorted, colors, ixdata, sstats, 1321 a.startix, a.stopix, a.Delta, secfig, save=save_info, 1322 show_seq=show_seq) 1323 try: 1324 more_info['handle']=handle 1325 except UnboundLocalError: 1326 more_info['handle']=None 1327 more_info['stats']=sstats 1328 more_info['args']=a 1329 return more_info
1330