'''
Methods to construct lightcones.
'''
from . import const, conv
import numpy as np
import os, glob, time
from .helper_functions import get_mesh_size, \
determine_redshift_from_filename, get_data_and_type, print_msg, find_idx
from .density_file import DensityFile
from .vel_file import VelocityFile
from . import cosmo as cm
from . import statistics as st
from . import smoothing
import scipy.interpolate
from tqdm import tqdm
[docs]
def make_lightcone(filenames, z_low = None, z_high = None, depth_mhz = None, file_redshifts = None, cbin_bits = 32, cbin_order = 'c', los_axis = 0, raw_density = False, interpolation='linear', reading_function=None, box_length_mpc=None):
'''
Generate a lightcone from xfrac, density, or dT data cubes.
Parameters:
-----------
filenames : str, array-like, or dict
The coeval cubes can be specified in one of the following formats:
- An array of file names.
- A text file containing the file names.
- A dictionary with redshift values as keys and data as values.
z_low : float, optional
The lowest redshift for the lightcone. If not provided, the redshift of
the lowest-z coeval cube is used.
z_high : float, optional
The highest redshift for the lightcone. If not provided, the redshift of
the highest-z coeval cube is used.
depth_mhz : float, optional
The frequency depth of the lightcone in MHz. Used in determining the
output redshifts.
file_redshifts : str or array-like, optional
The redshifts corresponding to the coeval cubes. This can be:
- None: redshifts are inferred from file names.
- Array: an array containing the redshift of each coeval cube.
- Filename: a file from which redshifts will be read.
cbin_bits : int, default=32
The number of bits in the cbin format data files, if applicable.
cbin_order : char, default='c'
The data order in the cbin format files ('c' for C-order, 'f' for Fortran-order).
los_axis : int, default=0
The axis to use as the line-of-sight for the coeval cubes.
raw_density : bool, default=False
If True and the data is from a density file, the raw simulation units will be
returned instead of the density in cgs units.
interpolation : str, default='linear'
The interpolation method to use for slices between output redshifts.
Options: 'linear', 'step', 'sigmoid', 'step_cell'.
reading_function : callable, optional
A custom function to read the data cubes. If not provided, the function will
use a default method to read the files.
box_length_mpc : float, optional
The length of the simulation box in comoving megaparsecs (Mpc). Used to
calculate the output redshifts.
Returns:
--------
tuple
A tuple containing:
- lightcone (ndarray): The lightcone volume, where the first two axes
have the same size as the input cubes.
- z (ndarray): An array containing the redshifts along the line-of-sight.
Notes:
------
- If `z_low` is provided, that redshift will be the lowest included, even if
there is no coeval cube at exactly that redshift. This may result in subtle
differences from results calculated with the old `freq_box` routine.
- If `filenames` is a dictionary, it should have redshift values as keys and
corresponding file names as values.
'''
if not interpolation in ['linear', 'step', 'sigmoid', 'step_cell']:
raise ValueError('Unknown interpolation type: %s' % interpolation)
if isinstance(filenames,dict):
if reading_function is None:
reading_function = lambda x: x
file_redshifts = np.sort(list(filenames.keys()))
filenames = {ii: filenames[zi] for ii,zi in enumerate(file_redshifts)}
if reading_function is None:
#Figure out output redshifts, file names and size of output
filenames = _get_filenames(filenames)
file_redshifts = _get_file_redshifts(file_redshifts, filenames)
mesh_size = get_mesh_size(filenames[0])
else:
assert file_redshifts is not None
mesh_size = reading_function(filenames[0]).shape
assert len(file_redshifts) == len(filenames)
output_z = _get_output_z(file_redshifts, z_low, z_high, mesh_size[0], box_length_mpc=box_length_mpc, dnu=depth_mhz)
#Make the output 32-bit to save memory
lightcone = np.zeros((mesh_size[0], mesh_size[1], len(output_z)), dtype='float32')
comoving_pos_idx = 0
z_bracket_low = None; z_bracket_high = None
data_low = None; data_high = None
# Make the lightcone, one slice at a time
# print_msg('Making lightcone between %f < z < %f' % (output_z.min(), output_z.max()))
print('Making lightcone between %f < z < %f' % (output_z.min(), output_z.max()))
time.sleep(1)
for ii in tqdm(range(len(output_z))):
z = output_z[ii]
z_bracket_low_new = file_redshifts[file_redshifts <= z].max()
z_bracket_high_new = file_redshifts[file_redshifts > z].min()
#Do we need a new file for the low z?
if z_bracket_low_new != z_bracket_low:
z_bracket_low = z_bracket_low_new
file_idx = np.argmin(np.abs(file_redshifts - z_bracket_low))
if data_high is None:
if reading_function is None: data_low, datatype = get_data_and_type(filenames[file_idx], cbin_bits, cbin_order, raw_density)
else: data_low = reading_function(filenames[file_idx])#; print('yes')
else: #No need to read the file again
data_low = data_high
#Do we need a new file for the high z?
if z_bracket_high_new != z_bracket_high:
z_bracket_high = z_bracket_high_new
file_idx = np.argmin(np.abs(file_redshifts - z_bracket_high))
if reading_function is None: data_high, datatype = get_data_and_type(filenames[file_idx], cbin_bits, cbin_order, raw_density)
else: data_high = reading_function(filenames[file_idx])
#Make the slice by interpolating, then move to next index
data_interp = _get_interp_slice(data_high, data_low, z_bracket_high, \
z_bracket_low, z, comoving_pos_idx, los_axis, interpolation)
lightcone[:,:,comoving_pos_idx] = data_interp
# print('%.2f %% completed.'%(100*(ii+1)/output_z.size))
comoving_pos_idx += 1
print('...done')
return lightcone, output_z
[docs]
def make_velocity_lightcone(vel_filenames, dens_filenames, z_low = None, \
z_high = None, file_redshifts = None, los_axis = 0,
vel_reading_function=None, dens_reading_function=None):
'''
Generate a lightcone from velocity data. Since velocity files contain momentum
rather than actual velocity, filenames for both velocity and density must be specified.
Parameters:
-----------
vel_filenames : str, array-like, or dict
The coeval velocity cubes. Can be specified in one of the following formats:
- An array of file names.
- A text file containing the file names.
- A dictionary with redshift values as keys and data as values.
dens_filenames : str, array-like, or dict
The coeval density cubes in the same way as `vel_filenames`.
z_low : float, optional
The lowest redshift for the lightcone. If not provided, the redshift of
the lowest-z coeval cube is used.
z_high : float, optional
The highest redshift for the lightcone. If not provided, the redshift of
the highest-z coeval cube is used.
file_redshifts : str or array-like, optional
The redshifts corresponding to the coeval cubes. This can be:
- None: redshifts are inferred from file names.
- Array: an array containing the redshift of each coeval cube.
- Filename: a file from which redshifts will be read.
los_axis : int, default=0
The axis to use as the line-of-sight for the coeval cubes.
Returns:
--------
tuple
A tuple containing:
- lightcone (ndarray): The lightcone volume where the first two axes
have the same size as the input cubes.
- z (ndarray): An array containing the redshifts along the line-of-sight.
Notes:
------
If `z_low` is provided, that redshift will be the lowest included, even if
there is no coeval cube at exactly that redshift. This may result in subtle
differences from results calculated with the old `freq_box` routine.
If `vel_filenames` or `dens_filenames` is provided as a dictionary, it should
have redshift values as keys and corresponding file names as values.
'''
if isinstance(vel_filenames, dict):
file_redshifts = np.sort(list(vel_filenames.keys()))
vel_filenames = {ii: vel_filenames[zi] for ii, zi in enumerate(file_redshifts)}
dens_filenames = {ii: dens_filenames[zi] for ii, zi in enumerate(file_redshifts)}
if vel_reading_function is None:
vel_reading_function = lambda x: x
if dens_reading_function is None:
dens_reading_function = lambda x: x
mesh_size = dens_reading_function(dens_filenames[0]).shape
get_kms = lambda vel_file, dfile: vel_file
else:
dens_filenames = _get_filenames(dens_filenames)
file_redshifts = _get_file_redshifts(file_redshifts, dens_filenames)
vel_filenames = _get_filenames(vel_filenames)
mesh_size = get_mesh_size(dens_filenames[0])
if vel_reading_function is None:
vel_reading_function = lambda x: VelocityFile(x)
if dens_reading_function is None:
dens_reading_function = lambda x: DensityFile(x)
get_kms = lambda vel_file, dfile: vel_file.get_kms_from_density(dfile)
assert(len(file_redshifts) == len(vel_filenames))
assert(len(vel_filenames) == len(dens_filenames))
output_z = _get_output_z(file_redshifts, z_low, z_high, mesh_size[0])
lightcone = np.zeros((3, mesh_size[0], mesh_size[1], len(output_z)), dtype='float32')
comoving_pos_idx = 0
z_bracket_low = None; z_bracket_high = None
print('Making velocity lightcone between %f < z < %f' % (output_z.min(), output_z.max()))
time.sleep(1)
for ii in tqdm(range(len(output_z))):
z = output_z[ii]
z_bracket_low_new = file_redshifts[file_redshifts <= z].max()
z_bracket_high_new = file_redshifts[file_redshifts > z].min()
if z_bracket_low_new != z_bracket_low:
z_bracket_low = z_bracket_low_new
file_idx = np.argmin(np.abs(file_redshifts - z_bracket_low))
dfile = dens_reading_function(dens_filenames[file_idx])
vel_file = vel_reading_function(vel_filenames[file_idx])
data_low = get_kms(vel_file,dfile)
del dfile
del vel_file
if z_bracket_high_new != z_bracket_high:
z_bracket_high = z_bracket_high_new
file_idx = np.argmin(np.abs(file_redshifts - z_bracket_high))
dfile = dens_reading_function(dens_filenames[file_idx])
vel_file = vel_reading_function(vel_filenames[file_idx])
data_high = get_kms(vel_file,dfile)
del dfile
del vel_file
data_interp = _get_interp_slice(data_high, data_low, z_bracket_high, \
z_bracket_low, z, comoving_pos_idx, los_axis)
lightcone[:,:,:,comoving_pos_idx] = data_interp
comoving_pos_idx += 1
print('...done')
return lightcone, output_z
def _get_output_z(file_redshifts, z_low, z_high, box_grid_n, box_length_mpc=None, dnu=None):
'''
Determine the output redshifts. For internal use.
'''
if z_low is None:
z_low = file_redshifts.min()
if z_high is None:
z_high = file_redshifts.max()
if(dnu):
output_z = redshifts_at_equal_frequncy_width(z_low, z_high, dnu)
else:
output_z = redshifts_at_equal_comoving_distance(z_low, z_high, box_grid_n, box_length_mpc=box_length_mpc)
if min(output_z) < min(file_redshifts) or max(output_z) > max(file_redshifts):
print('Warning! You have specified a redshift range of %.3f < z < %.3f' % (min(output_z), max(output_z)))
print('but you only have files for the range %.3f < z < %.3f.' % (min(file_redshifts), max(file_redshifts)))
print('The redshift range will be truncated.')
output_z = output_z[output_z >= min(file_redshifts)]
output_z = output_z[output_z <= max(file_redshifts)]
if len(output_z) < 1:
raise Exception('No valid redshifts in range!')
return output_z
[docs]
def redshifts_at_equal_comoving_distance(z_low, z_high, box_grid_n=256, box_length_mpc=None):
'''
Make a frequency axis vector with equal spacing in co-moving LOS coordinates.
The comoving distance between each frequency will be the same as the cell
size of the box.
Parameters:
z_low (float): The lower redshift
z_high (float): The upper redhisft
box_grid_n = 256 (int): the number of slices in an input box
box_length_mpc (float): the size of the box in cMpc. If None,
set to conv.LB
Returns:
numpy array containing the redshifts
'''
if box_length_mpc is None:
box_length_mpc = conv.LB
assert(z_high > z_low)
z = z_low
z_array = []
while z < z_high:
z_array.append(z)
nu = const.nu0/(1.0+z)
dnu = const.nu0*const.Hz(z)*box_length_mpc/(1.0 + z)**2/const.c/float(box_grid_n)
z = const.nu0/(nu - dnu) - 1.0
return np.array(z_array)
[docs]
def redshifts_at_equal_frequncy_width(z_low, z_high, dnu):
'''
Make a redshift vector with equally space in the frequency domain.
Parameters:
z_low (float): The lower redshift
z_high (float): The upper redhisft
dnu (float): The frequency channel width in MHz
Returns:
numpy array containing the redshifts
'''
assert(z_high > z_low)
z = z_high
z_array = []
while z > z_low:
z_array.append(z)
z = (z - dnu/const.nu0 * (1+z))/(1 + dnu/const.nu0 * (1+z))
return np.array(z_array)[::-1]
[docs]
def get_lightcone_subvolume(lightcone, redshifts, central_z, \
depth_mhz=None, depth_mpc=None, odd_num_cells=True, \
subtract_mean=True, fov_Mpc=None, verbose=True):
'''
Extract a subvolume from a lightcone, at a given central redshift,
and with a given depth. The depth can be specified in Mpc or MHz.
You must give exactly one of these parameters.
Parameters:
ligthcone (numpy array): the lightcone
redshifts (numpy array): the redshifts along the LOS
central_z (float): the central redshift of the subvolume
depth_mhz (float): the depth of the subvolume in MHz
depth_mpc (float): the depth of the subvolume in Mpc
odd_num_cells (bool): if true, the depth of the box will always
be an odd number of cells. This avoids problems with
power spectrum calculations.
subtract_mean (bool): if true, subtract the mean of the signal (Default: True)
fov_Mpc (float): the FoV size in Mpc
verbose (bool) : turning the verbosity.
Returns:
Tuple with (subvolume, dims, sub_redshifts) where dims is a tuple
with the dimensions of the subvolume in Mpc
'''
assert len(np.nonzero([depth_mhz, depth_mpc])) == 1
if fov_Mpc is None:
fov_Mpc = conv.LB
central_nu = cm.z_to_nu(central_z)
if depth_mpc != None: #Depth is given in Mpc
central_dist = cm.nu_to_cdist(central_nu)
min_cdist, max_cdist = central_dist-depth_mpc/2., central_dist+depth_mpc/2.
low_z = cm.cdist_to_z(min_cdist)
high_z = cm.cdist_to_z(max_cdist)
min_freq, max_freq = cm.z_to_nu(high_z), cm.z_to_nu(low_z)
else: #Depth is given in MHz
min_freq, max_freq = central_nu-depth_mhz/2., central_nu+depth_mhz/2.
min_cdist, max_cdist = cm.z_to_cdist(cm.nu_to_z(max_freq)), cm.z_to_cdist(cm.nu_to_z(min_freq))
low_z = cm.nu_to_z(max_freq)
high_z = cm.nu_to_z(min_freq)
if low_z < redshifts.min():
raise Exception('Lowest z is outside range')
if high_z > redshifts.max():
raise Exception('Highest z is outside range')
#low_n = int(find_idx(redshifts, low_z))
#high_n = int(find_idx(redshifts, high_z))
low_n = np.argmin(np.abs(redshifts - low_z))
high_n = np.argmin(np.abs(redshifts - high_z))
if (high_n-low_n) % 2 == 0 and odd_num_cells:
high_n += 1
subbox = lightcone[:,:,low_n:high_n]
if subtract_mean:
subbox = st.subtract_mean_signal(subbox, los_axis=2)
#box_depth = float(subbox.shape[2])/lightcone.shape[1]*fov_Mpc
box_depth = max_cdist-min_cdist
box_dims = (fov_Mpc, fov_Mpc, box_depth)
if(verbose):
print('Slice redshift: %.3f %.3f' %(redshifts[low_n], redshifts[high_n]))
print('Slice frequencies (MHz): %.1f %.1f' %(min_freq, max_freq))
print('Slice comoving distances (cMpc): %.2f %.2f' %(min_cdist, max_cdist))
print('Slice indices:', low_n, high_n)
print('Lightcone Subvolume : %s, [%.2f, %.2f, %.2f] cMpc' %(subbox.shape, fov_Mpc, fov_Mpc, box_depth))
return subbox, box_dims, redshifts[low_n:high_n]
def _get_interp_slice(data_high, data_low, z_bracket_high, z_bracket_low, z, \
comoving_pos_idx, los_axis, interpolation='linear'):
'''
Interpolate between two data slices. For internal use.
'''
slice_ind = comoving_pos_idx % data_low.shape[1]
slice_low = _get_slice(data_low, slice_ind, los_axis)
slice_high = _get_slice(data_high, slice_ind, los_axis)
if interpolation == 'linear':
slice_interp = ((z-z_bracket_low)*slice_high + \
(z_bracket_high - z)*slice_low)/(z_bracket_high-z_bracket_low)
elif interpolation == 'step':
transition_z = (z_bracket_high-z_bracket_low)/2.
if z < transition_z:
slice_interp = slice_low.copy()
else:
slice_interp = slice_high.copy()
elif interpolation == 'sigmoid':
zp = -10. + 20.*(z-z_bracket_low)/(z_bracket_high-z_bracket_low)
beta = 2.
g = 1./(1.+np.exp(-beta*zp))
slice_interp = slice_low*(1.-g) + slice_high*g
elif interpolation == 'step_cell':
slice_interp = _get_step_weighted_slice(slice_low, slice_high, z_bracket_high, z_bracket_low, z)
else:
raise Exception('Unknown interpolation method: %s' % interpolation)
return slice_interp
def _get_step_weighted_slice(slice_low, slice_high, z_bracket_high, z_bracket_low, z):
'''
Interpolate using a step function where the step position is based
on the proximity to an ionized region
'''
smoothed = smoothing.smooth_gauss(slice_high, sigma=4.)
diff = np.abs(slice_high-slice_low)
#smoothed=1 means early transition. smoothed=0 means late
#smoothed -= changed_cells.min()
#smoothed /= (changed_cells.max()-changed_cells.min())
step_transitions = _get_step_transitions(smoothed, diff>1.e-3)
step_transitions = z_bracket_low + step_transitions*(z_bracket_high-z_bracket_low)
interp_slice = slice_high.copy()
interp_slice[z < step_transitions] = slice_low[z < step_transitions]
return interp_slice
def _get_step_transitions(cell_dist, diff_idx):
#Replace each value in cell_dist with the number of cells
#with a lower value. Then normalize to be between 0 and 1
values_flat = cell_dist[diff_idx].flatten()
values_sorted = sorted(values_flat+np.random.random(len(values_flat))*1.e-9)
values_uniform = np.linspace(values_sorted[0], values_sorted[-1], len(values_sorted))
f = scipy.interpolate.interp1d(values_sorted, values_uniform, bounds_error=False, fill_value=0.)
output_values = f(cell_dist)
output_values -= output_values.min()
output_values /= output_values.max()
return output_values
def _get_slice(data, idx, los_axis, slice_depth=1):
'''
Slice a data cube along a given axis. For internal use.
'''
assert len(data.shape) == 3 or len(data.shape) == 4
assert los_axis >= 0 and los_axis < 3
idx1 = idx
idx2 = idx1+slice_depth
if len(data.shape) == 3: #scalar field
if los_axis == 0:
return np.squeeze(data[idx1:idx2,:,:])
elif los_axis == 1:
return np.squeeze(data[:,idx1:idx2,:])
return np.squeeze(data[:,:,idx1:idx2])
else: #Vector field
if los_axis == 0:
return np.squeeze(data[:,idx1:idx2,:,:])
elif los_axis == 1:
return np.squeeze(data[:,:,idx1:idx2,:])
return np.squeeze(data[:,:,:,idx1:idx2])
def _get_filenames(filenames_in):
'''
If filenames_in is a list of files, return as it is
If it is a directory, make sure it only contains data files,
then return the list of files in the directory
If it is a text file, read the list of files from the file
'''
if hasattr(filenames_in, '__iter__'):
filenames_out = filenames_in
elif os.path.isdir(filenames_in):
files_in_dir = glob.glob(filenames_in + '/*')
extensions = [os.path.splitext(f)[-1] for f in files_in_dir]
if not _all_same(extensions):
raise Exception('The directory may only contain one file type.')
filenames_out = files_in_dir
elif os.path.isfile(filenames_in):
f = open(filenames_in)
names = [l.strip() for l in f.readlines()]
f.close()
filenames_out = names
else:
raise Exception('Invalid filenames input')
return np.array(filenames_out)
def _get_file_redshifts(redshifts_in, filenames):
'''
If redshifts_in is None, try to determine from file names
If it's a directory, read the redshifts
Else, return as is
'''
if hasattr(redshifts_in, '__iter__'):
redshifts_out = redshifts_in
elif redshifts_in is None:
redshifts_out = [determine_redshift_from_filename(f) for f in filenames]
redshifts_out = np.array(redshifts_out)
elif os.path.exists(redshifts_in):
redshifts_out = np.loadtxt(redshifts_in)
else:
raise Exception('Invalid data for file redshifts.')
return redshifts_out
def _all_same(items):
return all(x == items[0] for x in items)