-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5 from CosmologicalEmulators/develop
Develop
- Loading branch information
Showing
7 changed files
with
272 additions
and
91 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,13 @@ | ||
name = "EmulatorsTrainer" | ||
uuid = "ef107171-9c1d-4869-9860-ac18fb1a7e36" | ||
authors = ["Marco Bonici <[email protected]>"] | ||
version = "0.1.0" | ||
version = "0.2.0" | ||
|
||
[deps] | ||
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" | ||
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" | ||
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" | ||
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" | ||
NPZ = "15e1cf62-19b3-5cfa-8e77-841668bca605" | ||
QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b" | ||
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
using Distributed | ||
using NPZ | ||
using ClusterManagers | ||
using EmulatorsTrainer | ||
using JSON3 | ||
using Random | ||
|
||
|
||
#mkdir("/home/mbonici/test_camb") | ||
addprocs_lsf(100; bsub_flags=`-q medium -n 2 -M 6094`)#this because I am using a lsf cluster. Use the appropriate one! | ||
@everywhere begin | ||
using NPZ, EmulatorsTrainer, JSON3, Random, PyCall | ||
camb = pyimport("camb") | ||
pars = ["ln10As", "ns", "H0", "ombh2", "omch2", "tau"] | ||
lb = [2.5, 0.88, 40., 0.1933, 0.08, 0.02] | ||
ub = [3.5, 1.05, 100., 0.2533, 0.2, 0.12] | ||
|
||
|
||
n = 1000 | ||
s = EmulatorsTrainer.create_training_dataset(n, lb, ub) | ||
|
||
root_dir = "/home/mbonici/test_camb"#this is tuned to my dir, use the right one for you! | ||
|
||
function camb_script(CosmoDict, root_path) | ||
rand_str = root_path*"/"*randstring(10) | ||
mkdir(rand_str) | ||
lmax = 10000 | ||
|
||
# set_for_lmax | ||
lens_potential_accuracy = 8 | ||
lens_margin = 2050 | ||
|
||
# set_matter_power | ||
kmax = 10 | ||
k_per_logint = 130 | ||
nonlinear = true | ||
accurate_massive_neutrinos = true | ||
|
||
# set_accuracy | ||
AccuracyBoost = 2 | ||
lSampleBoost = 2 | ||
lAccuracyBoost = 2 | ||
DoLateRadTruncation = false | ||
|
||
pars = camb.CAMBparams() | ||
pars.set_cosmology(H0 = round(CosmoDict["H0"]; sigdigits=6), | ||
ombh2= round(CosmoDict["ombh2"]; sigdigits=6), | ||
omch2= round(CosmoDict["omch2"]; sigdigits=6), | ||
mnu =0.06, | ||
omk =0., | ||
tau = round(CosmoDict["tau"]; sigdigits=6), | ||
Alens=1., | ||
neutrino_hierarchy="normal", | ||
num_massive_neutrinos=1, | ||
) | ||
|
||
pars.InitPower.set_params(As=round(exp(CosmoDict["ln10As"])*10^(-10); sigdigits=6), | ||
ns=round(CosmoDict["ns"]; sigdigits=6), | ||
r=0) | ||
pars.set_for_lmax(lmax, | ||
lens_potential_accuracy=lens_potential_accuracy) | ||
|
||
pars.set_dark_energy(w=-1., wa=0., dark_energy_model="fluid") | ||
pars.WantTransfer = 1 | ||
pars.NonLinear = camb.model.NonLinear_both # Non-linear matter power & lensing, HMcode | ||
pars.Transfer.kmax = kmax | ||
pars.Transfer.k_per_logint = k_per_logint | ||
pars.Transfer.accurate_massive_neutrinos = accurate_massive_neutrinos | ||
|
||
|
||
pars.set_accuracy(AccuracyBoost=AccuracyBoost, | ||
lSampleBoost=lSampleBoost, | ||
lAccuracyBoost=lAccuracyBoost, | ||
DoLateRadTruncation=DoLateRadTruncation) | ||
|
||
results = camb.get_results(pars) | ||
|
||
powers = results.get_cmb_power_spectra(pars, CMB_unit="muK") | ||
|
||
totCL=powers["total"] | ||
unlensedCL=powers["unlensed_scalar"] | ||
lens_potential=powers["lens_potential"] | ||
|
||
|
||
|
||
|
||
#ll = np.arange(totCL.shape[0]) | ||
#len_l, _ = size(totCL) | ||
#ll = Array(0:len_l) | ||
clTT = totCL[:,1] | ||
clEE = totCL[:,2] | ||
clBB = totCL[:,3]# @fbianchini? is this right? | ||
clTE = totCL[:,4] | ||
clPP = lens_potential[:,1] | ||
|
||
npzwrite(rand_str*"/cl_TT.npy", clTT) | ||
npzwrite(rand_str*"/cl_EE.npy", clEE) | ||
npzwrite(rand_str*"/cl_TE.npy", clTE) | ||
npzwrite(rand_str*"/cl_BB.npy", clBB) | ||
npzwrite(rand_str*"/cl_PP.npy", clPP) | ||
|
||
open(rand_str*"/capse_dict.json", "w") do io | ||
JSON3.write(io, CosmoDict) | ||
end | ||
|
||
end | ||
|
||
end | ||
|
||
EmulatorsTrainer.compute_dataset(s, pars, root_dir, camb_script) | ||
|
||
for i in workers() | ||
rmprocs(i) | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
using Distributed | ||
using NPZ | ||
using ClusterManagers | ||
using EmulatorsTrainer | ||
using JSON3 | ||
using Random | ||
|
||
addprocs_lsf(10)#this because I am using a lsf cluster. Use the appropriate one! | ||
|
||
@everywhere begin | ||
using NPZ, EmulatorsTrainer, JSON3, Random | ||
|
||
pars = ["a", "b", "c"] | ||
lb = [0., 0., 0.] | ||
ub = [1., 1., 1.] | ||
n = 1000 | ||
s = EmulatorsTrainer.create_training_dataset(n, lb, ub) | ||
|
||
root_dir = "/home/mbonici/test_emu"#this is tuned to my dir, use the right one for you! | ||
|
||
function test_script(my_dict::Dict, root_path) | ||
rand_str = root_path*"/"*randstring(10) | ||
mkdir(rand_str) | ||
sleep(10) | ||
a = my_dict["a"] | ||
b = my_dict["b"] | ||
c = my_dict["c"] | ||
x = Array(LinRange(0,10,100)) | ||
y = a .* x .^ 2 .+ b .* x .+ c | ||
npzwrite(rand_str*"/result.npy", y) | ||
open(rand_str*"/dict.json", "w") do io | ||
JSON3.write(io, my_dict) | ||
end | ||
end | ||
end | ||
|
||
EmulatorsTrainer.compute_dataset(s, pars, root_dir, test_script) | ||
|
||
for i in workers() | ||
rmprocs(i) | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,99 +1,14 @@ | ||
module EmulatorsTrainer | ||
|
||
using DataFrames | ||
using Distributions | ||
using Distributed | ||
using JSON3 | ||
using NPZ | ||
using QuasiMonteCarlo | ||
using Random | ||
|
||
function add_observable_df!(df::DataFrames.DataFrame, location::String, param_file::String, | ||
observable_file::String, first_idx::Int, last_idx::Int, get_tuple::Function) | ||
json_string = read(location*param_file, String) | ||
cosmo_pars = JSON3.read(json_string) | ||
|
||
observable = npzread(location*observable_file, "r")[first_idx:last_idx] | ||
observable_filtered = get_tuple(cosmo_pars, observable) | ||
push!(df, observable_filtered) | ||
return nothing | ||
end | ||
|
||
function load_df_directory!(df::DataFrames.DataFrame, Directory, | ||
add_observable_df!::Function) | ||
for (root, dirs, files) in walkdir(Directory) | ||
for file in files | ||
file_extension = file[findlast(isequal('.'),file):end] | ||
if file_extension == ".json" | ||
add_observable_df!(df, root) | ||
end | ||
end | ||
end | ||
end | ||
|
||
function extract_input_output_df(df, n_input_features::Int, n_output_features::Int) | ||
array_df = Matrix(df)#convert df to matrix | ||
#TODO check wether this can be done WITHOUT n_input and n_output | ||
array_input = transpose(Array(array_df[:,1:n_input_features]))#6 as the number of input features | ||
array_output = zeros(n_output_features, length(array_input[1,:]))#2499 as the number of output features | ||
for element in 1:length(array_input[1,:]) | ||
array_output[:,element] = array_df[element,n_input_features+1] | ||
end | ||
return array_input, array_output | ||
end | ||
|
||
function get_minmax_in(df, array_pars_in) | ||
#TODO check wether this can work on array_input | ||
in_MinMax = zeros(length(array_pars_in),2) | ||
for (idx, key) in enumerate(array_pars_in) | ||
in_MinMax[idx,1] = minimum(df[!,key]) | ||
in_MinMax[idx,2] = maximum(df[!,key]) | ||
end | ||
return in_MinMax | ||
end | ||
|
||
function get_minmax_out(array_out, n_output_features) | ||
out_MinMax = zeros(n_output_features,2) | ||
|
||
#this 3 is related to the number of selected features | ||
for i in 1:n_output_features | ||
out_MinMax[i, 1] = minimum(array_out[i,:]) | ||
out_MinMax[i, 2] = maximum(array_out[i,:]) | ||
end | ||
return out_MinMax | ||
end | ||
|
||
function maximin_df!(df, in_MinMax, out_MinMax) | ||
n_input_features, _ = size(in_MinMax) | ||
for i in 1:n_input_features | ||
df[!,i] .-= in_MinMax[i,1] | ||
df[!,i] ./= (in_MinMax[i,2]-in_MinMax[i,1]) | ||
end | ||
for i in 1:nrow(df) | ||
df[!,"observable"][i] .-= out_MinMax[:,1] | ||
df[!,"observable"][i] ./= (out_MinMax[:,2]-out_MinMax[:,1]) | ||
end | ||
end | ||
|
||
function splitdf(df, pct) | ||
@assert 0 <= pct <= 1 | ||
ids = collect(axes(df, 1)) | ||
shuffle!(ids) | ||
sel = ids .<= nrow(df) .* pct | ||
return view(df, sel, :), view(df, .!sel, :) | ||
end | ||
|
||
function traintest_split(df, test) | ||
te, tr = splitdf(df, test) | ||
return tr, te | ||
end | ||
|
||
function getdata(df, n_input_features::Int, n_output_features::Int) | ||
ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" | ||
|
||
train_df, test_df = traintest_split(df, 0.2) | ||
|
||
xtrain, ytrain = extract_input_output_df(train_df, n_input_features, n_output_features) | ||
xtest, ytest = extract_input_output_df(test_df, n_input_features, n_output_features) | ||
|
||
return Float64.(xtrain), Float64.(ytrain), Float64.(xtest), Float64.(ytest) | ||
end | ||
include("trainer.jl") | ||
include("farmer.jl") | ||
|
||
end # module EmulatorsTrainer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
function create_training_dataset(n::Int, lb::Array, ub::Array) | ||
return QuasiMonteCarlo.sample(n, lb, ub, LatinHypercubeSample()) | ||
end | ||
|
||
function create_training_dict(training_matrix::Matrix, idx_comb::Int, params::Array{String}) | ||
return Dict([(value, training_matrix[idx_par, idx_comb]) | ||
for (idx_par, value) in enumerate(params)]) | ||
end | ||
|
||
function compute_dataset(training_matrix::Matrix, params::Array{String}, root_dir::String, script_func::Function) | ||
n_pars, n_combs = size(training_matrix) | ||
mkdir(root_dir) | ||
@sync @distributed for idx in 1:n_combs | ||
train_dict = create_training_dict(training_matrix, idx, params) | ||
script_func(train_dict, root_dir) | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
function add_observable_df!(df::DataFrames.DataFrame, location::String, param_file::String, | ||
observable_file::String, first_idx::Int, last_idx::Int, get_tuple::Function) | ||
json_string = read(location*param_file, String) | ||
cosmo_pars = JSON3.read(json_string) | ||
|
||
observable = npzread(location*observable_file, "r")[first_idx:last_idx] | ||
observable_filtered = get_tuple(cosmo_pars, observable) | ||
push!(df, observable_filtered) | ||
return nothing | ||
end | ||
|
||
function load_df_directory!(df::DataFrames.DataFrame, Directory, | ||
add_observable_df!::Function) | ||
for (root, dirs, files) in walkdir(Directory) | ||
for file in files | ||
file_extension = file[findlast(isequal('.'),file):end] | ||
if file_extension == ".json" | ||
add_observable_df!(df, root) | ||
end | ||
end | ||
end | ||
end | ||
|
||
function extract_input_output_df(df, n_input_features::Int, n_output_features::Int) | ||
array_df = Matrix(df)#convert df to matrix | ||
#TODO check wether this can be done WITHOUT n_input and n_output | ||
array_input = transpose(Array(array_df[:,1:n_input_features]))#6 as the number of input features | ||
array_output = zeros(n_output_features, length(array_input[1,:]))#2499 as the number of output features | ||
for element in 1:length(array_input[1,:]) | ||
array_output[:,element] = array_df[element,n_input_features+1] | ||
end | ||
return array_input, array_output | ||
end | ||
|
||
function get_minmax_in(df, array_pars_in) | ||
#TODO check wether this can work on array_input | ||
in_MinMax = zeros(length(array_pars_in),2) | ||
for (idx, key) in enumerate(array_pars_in) | ||
in_MinMax[idx,1] = minimum(df[!,key]) | ||
in_MinMax[idx,2] = maximum(df[!,key]) | ||
end | ||
return in_MinMax | ||
end | ||
|
||
function get_minmax_out(array_out, n_output_features) | ||
out_MinMax = zeros(n_output_features,2) | ||
|
||
#this 3 is related to the number of selected features | ||
for i in 1:n_output_features | ||
out_MinMax[i, 1] = minimum(array_out[i,:]) | ||
out_MinMax[i, 2] = maximum(array_out[i,:]) | ||
end | ||
return out_MinMax | ||
end | ||
|
||
function maximin_df!(df, in_MinMax, out_MinMax) | ||
n_input_features, _ = size(in_MinMax) | ||
for i in 1:n_input_features | ||
df[!,i] .-= in_MinMax[i,1] | ||
df[!,i] ./= (in_MinMax[i,2]-in_MinMax[i,1]) | ||
end | ||
for i in 1:nrow(df) | ||
df[!,"observable"][i] .-= out_MinMax[:,1] | ||
df[!,"observable"][i] ./= (out_MinMax[:,2]-out_MinMax[:,1]) | ||
end | ||
end | ||
|
||
function splitdf(df, pct) | ||
@assert 0 <= pct <= 1 | ||
ids = collect(axes(df, 1)) | ||
shuffle!(ids) | ||
sel = ids .<= nrow(df) .* pct | ||
return view(df, sel, :), view(df, .!sel, :) | ||
end | ||
|
||
function traintest_split(df, test) | ||
te, tr = splitdf(df, test) | ||
return tr, te | ||
end | ||
|
||
function getdata(df, n_input_features::Int, n_output_features::Int) | ||
ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" | ||
|
||
train_df, test_df = traintest_split(df, 0.2) | ||
|
||
xtrain, ytrain = extract_input_output_df(train_df, n_input_features, n_output_features) | ||
xtest, ytest = extract_input_output_df(test_df, n_input_features, n_output_features) | ||
|
||
return Float64.(xtrain), Float64.(ytrain), Float64.(xtest), Float64.(ytest) | ||
end |