Source code for tools21cm.lightcone

'''
Methods to construct lightcones.
'''

from . import const, conv
import numpy as np
import os, glob, time
from .helper_functions import get_mesh_size, \
    determine_redshift_from_filename, get_data_and_type, print_msg, find_idx
from .density_file import DensityFile
from .vel_file import VelocityFile
from . import cosmo as cm
from . import statistics as st
from . import smoothing
import scipy.interpolate
from tqdm import tqdm

[docs] def make_lightcone(filenames, z_low = None, z_high = None, depth_mhz = None, file_redshifts = None, cbin_bits = 32, cbin_order = 'c', los_axis = 0, raw_density = False, interpolation='linear', reading_function=None, box_length_mpc=None): ''' Generate a lightcone from xfrac, density, or dT data cubes. Parameters: ----------- filenames : str, array-like, or dict The coeval cubes can be specified in one of the following formats: - An array of file names. - A text file containing the file names. - A dictionary with redshift values as keys and data as values. z_low : float, optional The lowest redshift for the lightcone. If not provided, the redshift of the lowest-z coeval cube is used. z_high : float, optional The highest redshift for the lightcone. If not provided, the redshift of the highest-z coeval cube is used. depth_mhz : float, optional The frequency depth of the lightcone in MHz. Used in determining the output redshifts. file_redshifts : str or array-like, optional The redshifts corresponding to the coeval cubes. This can be: - None: redshifts are inferred from file names. - Array: an array containing the redshift of each coeval cube. - Filename: a file from which redshifts will be read. cbin_bits : int, default=32 The number of bits in the cbin format data files, if applicable. cbin_order : char, default='c' The data order in the cbin format files ('c' for C-order, 'f' for Fortran-order). los_axis : int, default=0 The axis to use as the line-of-sight for the coeval cubes. raw_density : bool, default=False If True and the data is from a density file, the raw simulation units will be returned instead of the density in cgs units. interpolation : str, default='linear' The interpolation method to use for slices between output redshifts. Options: 'linear', 'step', 'sigmoid', 'step_cell'. reading_function : callable, optional A custom function to read the data cubes. If not provided, the function will use a default method to read the files. box_length_mpc : float, optional The length of the simulation box in comoving megaparsecs (Mpc). Used to calculate the output redshifts. Returns: -------- tuple A tuple containing: - lightcone (ndarray): The lightcone volume, where the first two axes have the same size as the input cubes. - z (ndarray): An array containing the redshifts along the line-of-sight. Notes: ------ - If `z_low` is provided, that redshift will be the lowest included, even if there is no coeval cube at exactly that redshift. This may result in subtle differences from results calculated with the old `freq_box` routine. - If `filenames` is a dictionary, it should have redshift values as keys and corresponding file names as values. ''' if not interpolation in ['linear', 'step', 'sigmoid', 'step_cell']: raise ValueError('Unknown interpolation type: %s' % interpolation) if isinstance(filenames,dict): if reading_function is None: reading_function = lambda x: x file_redshifts = np.sort(list(filenames.keys())) filenames = {ii: filenames[zi] for ii,zi in enumerate(file_redshifts)} if reading_function is None: #Figure out output redshifts, file names and size of output filenames = _get_filenames(filenames) file_redshifts = _get_file_redshifts(file_redshifts, filenames) mesh_size = get_mesh_size(filenames[0]) else: assert file_redshifts is not None mesh_size = reading_function(filenames[0]).shape assert len(file_redshifts) == len(filenames) output_z = _get_output_z(file_redshifts, z_low, z_high, mesh_size[0], box_length_mpc=box_length_mpc, dnu=depth_mhz) #Make the output 32-bit to save memory lightcone = np.zeros((mesh_size[0], mesh_size[1], len(output_z)), dtype='float32') comoving_pos_idx = 0 z_bracket_low = None; z_bracket_high = None data_low = None; data_high = None # Make the lightcone, one slice at a time # print_msg('Making lightcone between %f < z < %f' % (output_z.min(), output_z.max())) print('Making lightcone between %f < z < %f' % (output_z.min(), output_z.max())) time.sleep(1) for ii in tqdm(range(len(output_z))): z = output_z[ii] z_bracket_low_new = file_redshifts[file_redshifts <= z].max() z_bracket_high_new = file_redshifts[file_redshifts > z].min() #Do we need a new file for the low z? if z_bracket_low_new != z_bracket_low: z_bracket_low = z_bracket_low_new file_idx = np.argmin(np.abs(file_redshifts - z_bracket_low)) if data_high is None: if reading_function is None: data_low, datatype = get_data_and_type(filenames[file_idx], cbin_bits, cbin_order, raw_density) else: data_low = reading_function(filenames[file_idx])#; print('yes') else: #No need to read the file again data_low = data_high #Do we need a new file for the high z? if z_bracket_high_new != z_bracket_high: z_bracket_high = z_bracket_high_new file_idx = np.argmin(np.abs(file_redshifts - z_bracket_high)) if reading_function is None: data_high, datatype = get_data_and_type(filenames[file_idx], cbin_bits, cbin_order, raw_density) else: data_high = reading_function(filenames[file_idx]) #Make the slice by interpolating, then move to next index data_interp = _get_interp_slice(data_high, data_low, z_bracket_high, \ z_bracket_low, z, comoving_pos_idx, los_axis, interpolation) lightcone[:,:,comoving_pos_idx] = data_interp # print('%.2f %% completed.'%(100*(ii+1)/output_z.size)) comoving_pos_idx += 1 print('...done') return lightcone, output_z
[docs] def make_velocity_lightcone(vel_filenames, dens_filenames, z_low = None, \ z_high = None, file_redshifts = None, los_axis = 0, vel_reading_function=None, dens_reading_function=None): ''' Generate a lightcone from velocity data. Since velocity files contain momentum rather than actual velocity, filenames for both velocity and density must be specified. Parameters: ----------- vel_filenames : str, array-like, or dict The coeval velocity cubes. Can be specified in one of the following formats: - An array of file names. - A text file containing the file names. - A dictionary with redshift values as keys and data as values. dens_filenames : str, array-like, or dict The coeval density cubes in the same way as `vel_filenames`. z_low : float, optional The lowest redshift for the lightcone. If not provided, the redshift of the lowest-z coeval cube is used. z_high : float, optional The highest redshift for the lightcone. If not provided, the redshift of the highest-z coeval cube is used. file_redshifts : str or array-like, optional The redshifts corresponding to the coeval cubes. This can be: - None: redshifts are inferred from file names. - Array: an array containing the redshift of each coeval cube. - Filename: a file from which redshifts will be read. los_axis : int, default=0 The axis to use as the line-of-sight for the coeval cubes. Returns: -------- tuple A tuple containing: - lightcone (ndarray): The lightcone volume where the first two axes have the same size as the input cubes. - z (ndarray): An array containing the redshifts along the line-of-sight. Notes: ------ If `z_low` is provided, that redshift will be the lowest included, even if there is no coeval cube at exactly that redshift. This may result in subtle differences from results calculated with the old `freq_box` routine. If `vel_filenames` or `dens_filenames` is provided as a dictionary, it should have redshift values as keys and corresponding file names as values. ''' if isinstance(vel_filenames, dict): file_redshifts = np.sort(list(vel_filenames.keys())) vel_filenames = {ii: vel_filenames[zi] for ii, zi in enumerate(file_redshifts)} dens_filenames = {ii: dens_filenames[zi] for ii, zi in enumerate(file_redshifts)} if vel_reading_function is None: vel_reading_function = lambda x: x if dens_reading_function is None: dens_reading_function = lambda x: x mesh_size = dens_reading_function(dens_filenames[0]).shape get_kms = lambda vel_file, dfile: vel_file else: dens_filenames = _get_filenames(dens_filenames) file_redshifts = _get_file_redshifts(file_redshifts, dens_filenames) vel_filenames = _get_filenames(vel_filenames) mesh_size = get_mesh_size(dens_filenames[0]) if vel_reading_function is None: vel_reading_function = lambda x: VelocityFile(x) if dens_reading_function is None: dens_reading_function = lambda x: DensityFile(x) get_kms = lambda vel_file, dfile: vel_file.get_kms_from_density(dfile) assert(len(file_redshifts) == len(vel_filenames)) assert(len(vel_filenames) == len(dens_filenames)) output_z = _get_output_z(file_redshifts, z_low, z_high, mesh_size[0]) lightcone = np.zeros((3, mesh_size[0], mesh_size[1], len(output_z)), dtype='float32') comoving_pos_idx = 0 z_bracket_low = None; z_bracket_high = None print('Making velocity lightcone between %f < z < %f' % (output_z.min(), output_z.max())) time.sleep(1) for ii in tqdm(range(len(output_z))): z = output_z[ii] z_bracket_low_new = file_redshifts[file_redshifts <= z].max() z_bracket_high_new = file_redshifts[file_redshifts > z].min() if z_bracket_low_new != z_bracket_low: z_bracket_low = z_bracket_low_new file_idx = np.argmin(np.abs(file_redshifts - z_bracket_low)) dfile = dens_reading_function(dens_filenames[file_idx]) vel_file = vel_reading_function(vel_filenames[file_idx]) data_low = get_kms(vel_file,dfile) del dfile del vel_file if z_bracket_high_new != z_bracket_high: z_bracket_high = z_bracket_high_new file_idx = np.argmin(np.abs(file_redshifts - z_bracket_high)) dfile = dens_reading_function(dens_filenames[file_idx]) vel_file = vel_reading_function(vel_filenames[file_idx]) data_high = get_kms(vel_file,dfile) del dfile del vel_file data_interp = _get_interp_slice(data_high, data_low, z_bracket_high, \ z_bracket_low, z, comoving_pos_idx, los_axis) lightcone[:,:,:,comoving_pos_idx] = data_interp comoving_pos_idx += 1 print('...done') return lightcone, output_z
def _get_output_z(file_redshifts, z_low, z_high, box_grid_n, box_length_mpc=None, dnu=None): ''' Determine the output redshifts. For internal use. ''' if z_low is None: z_low = file_redshifts.min() if z_high is None: z_high = file_redshifts.max() if(dnu): output_z = redshifts_at_equal_frequncy_width(z_low, z_high, dnu) else: output_z = redshifts_at_equal_comoving_distance(z_low, z_high, box_grid_n, box_length_mpc=box_length_mpc) if min(output_z) < min(file_redshifts) or max(output_z) > max(file_redshifts): print('Warning! You have specified a redshift range of %.3f < z < %.3f' % (min(output_z), max(output_z))) print('but you only have files for the range %.3f < z < %.3f.' % (min(file_redshifts), max(file_redshifts))) print('The redshift range will be truncated.') output_z = output_z[output_z >= min(file_redshifts)] output_z = output_z[output_z <= max(file_redshifts)] if len(output_z) < 1: raise Exception('No valid redshifts in range!') return output_z
[docs] def redshifts_at_equal_comoving_distance(z_low, z_high, box_grid_n=256, box_length_mpc=None): ''' Make a frequency axis vector with equal spacing in co-moving LOS coordinates. The comoving distance between each frequency will be the same as the cell size of the box. Parameters: z_low (float): The lower redshift z_high (float): The upper redhisft box_grid_n = 256 (int): the number of slices in an input box box_length_mpc (float): the size of the box in cMpc. If None, set to conv.LB Returns: numpy array containing the redshifts ''' if box_length_mpc is None: box_length_mpc = conv.LB assert(z_high > z_low) z = z_low z_array = [] while z < z_high: z_array.append(z) nu = const.nu0/(1.0+z) dnu = const.nu0*const.Hz(z)*box_length_mpc/(1.0 + z)**2/const.c/float(box_grid_n) z = const.nu0/(nu - dnu) - 1.0 return np.array(z_array)
[docs] def redshifts_at_equal_frequncy_width(z_low, z_high, dnu): ''' Make a redshift vector with equally space in the frequency domain. Parameters: z_low (float): The lower redshift z_high (float): The upper redhisft dnu (float): The frequency channel width in MHz Returns: numpy array containing the redshifts ''' assert(z_high > z_low) z = z_high z_array = [] while z > z_low: z_array.append(z) z = (z - dnu/const.nu0 * (1+z))/(1 + dnu/const.nu0 * (1+z)) return np.array(z_array)[::-1]
[docs] def get_lightcone_subvolume(lightcone, redshifts, central_z, \ depth_mhz=None, depth_mpc=None, odd_num_cells=True, \ subtract_mean=True, fov_Mpc=None, verbose=True): ''' Extract a subvolume from a lightcone, at a given central redshift, and with a given depth. The depth can be specified in Mpc or MHz. You must give exactly one of these parameters. Parameters: ligthcone (numpy array): the lightcone redshifts (numpy array): the redshifts along the LOS central_z (float): the central redshift of the subvolume depth_mhz (float): the depth of the subvolume in MHz depth_mpc (float): the depth of the subvolume in Mpc odd_num_cells (bool): if true, the depth of the box will always be an odd number of cells. This avoids problems with power spectrum calculations. subtract_mean (bool): if true, subtract the mean of the signal (Default: True) fov_Mpc (float): the FoV size in Mpc verbose (bool) : turning the verbosity. Returns: Tuple with (subvolume, dims, sub_redshifts) where dims is a tuple with the dimensions of the subvolume in Mpc ''' assert len(np.nonzero([depth_mhz, depth_mpc])) == 1 if fov_Mpc is None: fov_Mpc = conv.LB central_nu = cm.z_to_nu(central_z) if depth_mpc != None: #Depth is given in Mpc central_dist = cm.nu_to_cdist(central_nu) min_cdist, max_cdist = central_dist-depth_mpc/2., central_dist+depth_mpc/2. low_z = cm.cdist_to_z(min_cdist) high_z = cm.cdist_to_z(max_cdist) min_freq, max_freq = cm.z_to_nu(high_z), cm.z_to_nu(low_z) else: #Depth is given in MHz min_freq, max_freq = central_nu-depth_mhz/2., central_nu+depth_mhz/2. min_cdist, max_cdist = cm.z_to_cdist(cm.nu_to_z(max_freq)), cm.z_to_cdist(cm.nu_to_z(min_freq)) low_z = cm.nu_to_z(max_freq) high_z = cm.nu_to_z(min_freq) if low_z < redshifts.min(): raise Exception('Lowest z is outside range') if high_z > redshifts.max(): raise Exception('Highest z is outside range') #low_n = int(find_idx(redshifts, low_z)) #high_n = int(find_idx(redshifts, high_z)) low_n = np.argmin(np.abs(redshifts - low_z)) high_n = np.argmin(np.abs(redshifts - high_z)) if (high_n-low_n) % 2 == 0 and odd_num_cells: high_n += 1 subbox = lightcone[:,:,low_n:high_n] if subtract_mean: subbox = st.subtract_mean_signal(subbox, los_axis=2) #box_depth = float(subbox.shape[2])/lightcone.shape[1]*fov_Mpc box_depth = max_cdist-min_cdist box_dims = (fov_Mpc, fov_Mpc, box_depth) if(verbose): print('Slice redshift: %.3f %.3f' %(redshifts[low_n], redshifts[high_n])) print('Slice frequencies (MHz): %.1f %.1f' %(min_freq, max_freq)) print('Slice comoving distances (cMpc): %.2f %.2f' %(min_cdist, max_cdist)) print('Slice indices:', low_n, high_n) print('Lightcone Subvolume : %s, [%.2f, %.2f, %.2f] cMpc' %(subbox.shape, fov_Mpc, fov_Mpc, box_depth)) return subbox, box_dims, redshifts[low_n:high_n]
def _get_interp_slice(data_high, data_low, z_bracket_high, z_bracket_low, z, \ comoving_pos_idx, los_axis, interpolation='linear'): ''' Interpolate between two data slices. For internal use. ''' slice_ind = comoving_pos_idx % data_low.shape[1] slice_low = _get_slice(data_low, slice_ind, los_axis) slice_high = _get_slice(data_high, slice_ind, los_axis) if interpolation == 'linear': slice_interp = ((z-z_bracket_low)*slice_high + \ (z_bracket_high - z)*slice_low)/(z_bracket_high-z_bracket_low) elif interpolation == 'step': transition_z = (z_bracket_high-z_bracket_low)/2. if z < transition_z: slice_interp = slice_low.copy() else: slice_interp = slice_high.copy() elif interpolation == 'sigmoid': zp = -10. + 20.*(z-z_bracket_low)/(z_bracket_high-z_bracket_low) beta = 2. g = 1./(1.+np.exp(-beta*zp)) slice_interp = slice_low*(1.-g) + slice_high*g elif interpolation == 'step_cell': slice_interp = _get_step_weighted_slice(slice_low, slice_high, z_bracket_high, z_bracket_low, z) else: raise Exception('Unknown interpolation method: %s' % interpolation) return slice_interp def _get_step_weighted_slice(slice_low, slice_high, z_bracket_high, z_bracket_low, z): ''' Interpolate using a step function where the step position is based on the proximity to an ionized region ''' smoothed = smoothing.smooth_gauss(slice_high, sigma=4.) diff = np.abs(slice_high-slice_low) #smoothed=1 means early transition. smoothed=0 means late #smoothed -= changed_cells.min() #smoothed /= (changed_cells.max()-changed_cells.min()) step_transitions = _get_step_transitions(smoothed, diff>1.e-3) step_transitions = z_bracket_low + step_transitions*(z_bracket_high-z_bracket_low) interp_slice = slice_high.copy() interp_slice[z < step_transitions] = slice_low[z < step_transitions] return interp_slice def _get_step_transitions(cell_dist, diff_idx): #Replace each value in cell_dist with the number of cells #with a lower value. Then normalize to be between 0 and 1 values_flat = cell_dist[diff_idx].flatten() values_sorted = sorted(values_flat+np.random.random(len(values_flat))*1.e-9) values_uniform = np.linspace(values_sorted[0], values_sorted[-1], len(values_sorted)) f = scipy.interpolate.interp1d(values_sorted, values_uniform, bounds_error=False, fill_value=0.) output_values = f(cell_dist) output_values -= output_values.min() output_values /= output_values.max() return output_values def _get_slice(data, idx, los_axis, slice_depth=1): ''' Slice a data cube along a given axis. For internal use. ''' assert len(data.shape) == 3 or len(data.shape) == 4 assert los_axis >= 0 and los_axis < 3 idx1 = idx idx2 = idx1+slice_depth if len(data.shape) == 3: #scalar field if los_axis == 0: return np.squeeze(data[idx1:idx2,:,:]) elif los_axis == 1: return np.squeeze(data[:,idx1:idx2,:]) return np.squeeze(data[:,:,idx1:idx2]) else: #Vector field if los_axis == 0: return np.squeeze(data[:,idx1:idx2,:,:]) elif los_axis == 1: return np.squeeze(data[:,:,idx1:idx2,:]) return np.squeeze(data[:,:,:,idx1:idx2]) def _get_filenames(filenames_in): ''' If filenames_in is a list of files, return as it is If it is a directory, make sure it only contains data files, then return the list of files in the directory If it is a text file, read the list of files from the file ''' if hasattr(filenames_in, '__iter__'): filenames_out = filenames_in elif os.path.isdir(filenames_in): files_in_dir = glob.glob(filenames_in + '/*') extensions = [os.path.splitext(f)[-1] for f in files_in_dir] if not _all_same(extensions): raise Exception('The directory may only contain one file type.') filenames_out = files_in_dir elif os.path.isfile(filenames_in): f = open(filenames_in) names = [l.strip() for l in f.readlines()] f.close() filenames_out = names else: raise Exception('Invalid filenames input') return np.array(filenames_out) def _get_file_redshifts(redshifts_in, filenames): ''' If redshifts_in is None, try to determine from file names If it's a directory, read the redshifts Else, return as is ''' if hasattr(redshifts_in, '__iter__'): redshifts_out = redshifts_in elif redshifts_in is None: redshifts_out = [determine_redshift_from_filename(f) for f in filenames] redshifts_out = np.array(redshifts_out) elif os.path.exists(redshifts_in): redshifts_out = np.loadtxt(redshifts_in) else: raise Exception('Invalid data for file redshifts.') return redshifts_out def _all_same(items): return all(x == items[0] for x in items)