Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
e0cef89
adding log_x option for plotting; llp lnt/t study for NN input features
Nov 18, 2019
367645b
save
Nov 20, 2019
4d2bb17
merge w/ R
Nov 20, 2019
1110e17
Merge branch 'rmanzoni-master' into py3_merge
Nov 20, 2019
62b136f
fixing some log_x merge failure
Nov 20, 2019
f548fa0
saving llp plots
Nov 20, 2019
1253457
adding newest trainings
Nov 21, 2019
0b391b3
save; fixing labels to work in progress
Nov 22, 2019
430a388
adding llp plot script
Jan 16, 2020
c3b3cc4
merging
Jan 16, 2020
0b02035
merging
Jan 16, 2020
48cf6b4
Merge branch 'rmanzoni-master' into py3_merge
Jan 16, 2020
08d4298
merging complete
Jan 16, 2020
7984687
fixing training for 2017 ntuples, towards full runII
Jan 16, 2020
3d2790d
2017 works
Jan 16, 2020
72ca9fb
2016 works
Jan 16, 2020
2bc3c46
plotting for 16, 17 working
Jan 16, 2020
36cafc4
unifying plotting in plot_all.cfg.py; adding the full trainings for a…
Jan 17, 2020
80914ff
fixing datacards towards limits
Jan 17, 2020
f1f3a52
fixing limits prod
Jan 17, 2020
46205ed
fixing user dependencies
Jan 17, 2020
a532c5b
make (re)producing datacards an option; check first if the files are …
Jan 20, 2020
3662e10
fixing plotting 2D limits (had bugs)
Jan 20, 2020
0aa32c3
saving
Jan 20, 2020
9c10beb
towards reading syncs from DC
Jan 20, 2020
671e82d
adding sync script for yields
Jan 22, 2020
4f0d197
adding script to sync yields
Jan 22, 2020
6523fad
adding R's trainings for future reference
Jan 22, 2020
aad0800
fix selections bug in mmm: dz .1, iso .1
Feb 10, 2020
e5c6f20
towards new datacards
Feb 10, 2020
f8f016e
fix cfg
Feb 10, 2020
e96f554
adding validiation training (all_channels_200211_16h_13m) for mmm w/o…
Feb 11, 2020
740e962
bug fix
Feb 11, 2020
4499d7d
adding combined 3y training
Feb 11, 2020
677806e
fixing 16, 17 for per-year training
Feb 13, 2020
7c8f87b
adding 3yr cmbd training and bug fixes for trainer
Feb 13, 2020
a5f841a
latest and greatest, including numbers for the sync
Feb 14, 2020
1de11bf
just a safe
Feb 25, 2020
f0d2dc0
saving
Mar 3, 2020
94acd99
saving sync
Mar 4, 2020
6f094aa
saving
Mar 16, 2020
2780a66
new MR trainings with latest; SR and MR cfgs and overview plots
Mar 24, 2020
1bff32d
checking prompt contributions in traning MR
Mar 25, 2020
633af52
tests with and without subtraction for training and plotting (and hnl…
Mar 26, 2020
95f8765
updating the overview
Mar 26, 2020
6b64492
check
Apr 1, 2020
6370a9f
fixing conda env for setup
May 12, 2020
da580ff
fixing instructions
May 12, 2020
5c6e94e
adding path dummies for AM & MG
May 12, 2020
0416708
save
Jun 5, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
*.pyc
*.txt
*.DS_Store
*.zip

plots/
ana_overview_*/
*.swp
*.pdf
*.png
Expand Down
6 changes: 2 additions & 4 deletions NN/cfg/train_all.cfg.py → NN/cfg/train_all_3ycmbd.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
from collections import OrderedDict
from os import environ as env

ch = 'mmm'
set_paths(ch, 2018)
set_paths('mmm', 2018) #FIXME this is just a dummy

extra_selections = [
'hnl_2d_disp_sig>20',
'hnl_pt_12>15',
'sv_cos>0.99',
'sv_prob>0.001',
Expand Down Expand Up @@ -87,6 +85,7 @@
'hnl_m_12' ,
'sv_prob' ,
'hnl_2d_disp',
'year' ,
'channel' ,],

composed_features = composed_features,
Expand All @@ -104,7 +103,6 @@
selection_mc_eem = selection_eem + [cuts_eem.selections['is_prompt_lepton']],

selection_tight = cuts_mmm.selections_pd['tight'],
lumi = 59700.,

epochs = 40,
)
Expand Down
120 changes: 120 additions & 0 deletions NN/cfg/train_all_CR_MRloose.cfg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import numpy as np
from NN.nn_parametric_trainer import Trainer
from plotter.selections import Selections
from plotter.utils import set_paths
from collections import OrderedDict
from os import environ as env

year = 2016

set_paths('mmm', year) #FIXME channel is just a dummy

extra_selections = [
'hnl_pt_12>15',
'sv_cos>0.90', ## MARTINA FOR STATS
# 'sv_prob>0.001', ## get rid of this for more stats, by MARTINA ...
]

cuts_mmm = Selections('mmm')
selection_mmm = [
cuts_mmm.selections['pt_iso'],
cuts_mmm.selections['baseline'],
cuts_mmm.selections['vetoes_12_OS'],
cuts_mmm.selections['vetoes_01_OS'],
cuts_mmm.selections['vetoes_02_OS'],
cuts_mmm.selections['sideband'],
] + extra_selections

cuts_mem = Selections('mem')
selection_mem = [
cuts_mem.selections['pt_iso'],
cuts_mem.selections['baseline'],
cuts_mem.selections['sideband'],
cuts_mem.selections['vetoes_02_OS'],
] + extra_selections

cuts_eee = Selections('eee')
selection_eee = [
cuts_eee.selections['pt_iso'],
cuts_eee.selections['baseline'],
cuts_eee.selections['vetoes_12_OS'],
cuts_eee.selections['vetoes_01_OS'],
cuts_eee.selections['vetoes_02_OS'],
cuts_eee.selections['sideband'],
] + extra_selections

cuts_eem = Selections('eem')
selection_eem = [
cuts_eem.selections['pt_iso'],
cuts_eem.selections['baseline'],
cuts_eem.selections['sideband'],
cuts_eem.selections['vetoes_01_OS'],
] + extra_selections

if year == 2017:
selection_eee.append('l0_pt > 35')
selection_eem.append('l0_pt > 35')
if year == 2018:
selection_eee.append('l0_pt > 32')
selection_eem.append('l0_pt > 32')

composed_features = OrderedDict()

composed_features['abs_l0_eta' ] = lambda df : np.abs(df.l0_eta)
composed_features['abs_l1_eta' ] = lambda df : np.abs(df.l1_eta)
composed_features['abs_l2_eta' ] = lambda df : np.abs(df.l2_eta)
composed_features['log_abs_l0_dxy'] = lambda df : np.log10(np.abs(df.l0_dxy))
composed_features['log_abs_l0_dz' ] = lambda df : np.log10(np.abs(df.l0_dz ))
composed_features['log_abs_l1_dxy'] = lambda df : np.log10(np.abs(df.l1_dxy))
composed_features['log_abs_l1_dz' ] = lambda df : np.log10(np.abs(df.l1_dz ))
composed_features['log_abs_l2_dxy'] = lambda df : np.log10(np.abs(df.l2_dxy))
composed_features['log_abs_l2_dz' ] = lambda df : np.log10(np.abs(df.l2_dz ))
composed_features['abs_q_02' ] = lambda df : np.abs(df.hnl_q_02)
composed_features['abs_q_01' ] = lambda df : np.abs(df.hnl_q_01)

# https://stackoverflow.com/questions/20528328/numpy-logical-or-for-more-than-two-arguments
# save a label to distinguish different channels
# 1 = mmm
# 2 = mem_os
# 3 = mem_ss
# 4 = eee
# 5 = eem_os
# 6 = eem_ss
# composed_features['channel' ] = lambda df : 1 * (np.abs(df.l0_pdgid)==13 and np.abs(df.l1_pdgid)==13 and np.abs(df.l2_pdgid)==13) + 2 * (np.abs(df.l0_pdgid)==13 and np.abs(df.l1_pdgid)==11 and np.abs(df.l2_pdgid)==13 and df.hnl_q_02!=0) + 3 * (np.abs(df.l0_pdgid)==13 and np.abs(df.l1_pdgid)==11 and np.abs(df.l2_pdgid)==13 and df.hnl_q_02==0) + 4 * (np.abs(df.l0_pdgid)==11 and np.abs(df.l1_pdgid)==11 and np.abs(df.l2_pdgid)==11) + 5 * (np.abs(df.l0_pdgid)==11 and np.abs(df.l1_pdgid)==11 and np.abs(df.l2_pdgid)==13 and df.hnl_q_02!=0) + 6 * (np.abs(df.l0_pdgid)==11 and np.abs(df.l1_pdgid)==11 and np.abs(df.l2_pdgid)==13 and df.hnl_q_02==0)

trainer = Trainer (channel = 'all_channels',
base_dir = env['NTUPLE_DIR'],
#post_fix = 'HNLTreeProducer_%s/tree.root' %ch,
post_fix = 'HNLTreeProducer/tree.root',

years = [year] ,
features = ['l0_pt' ,
'l1_pt' ,
'l2_pt' ,
'hnl_dr_12' ,
'hnl_m_12' ,
'sv_prob' ,
'hnl_2d_disp',
'channel' ,],

composed_features = composed_features,

selection_data_mmm = selection_mmm,
selection_mc_mmm = selection_mmm + [cuts_mmm.selections['is_prompt_lepton']],

selection_data_mem = selection_mem,
selection_mc_mem = selection_mem + [cuts_mem.selections['is_prompt_lepton']],

selection_data_eee = selection_eee,
selection_mc_eee = selection_eee + [cuts_eee.selections['is_prompt_lepton']],

selection_data_eem = selection_eem,
selection_mc_eem = selection_eem + [cuts_eem.selections['is_prompt_lepton']],

selection_tight = cuts_mmm.selections_pd['tight'],

epochs = 20,
)

if __name__ == '__main__':
trainer.train()
120 changes: 120 additions & 0 deletions NN/cfg/train_all_CR_bj.cfg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import numpy as np
from NN.nn_parametric_trainer import Trainer
from plotter.selections import Selections
from plotter.utils import set_paths
from collections import OrderedDict
from os import environ as env

year = 2017

set_paths('mmm', year) #FIXME channel is just a dummy

extra_selections = [
'hnl_pt_12>15',
'sv_cos>0.90', ## MARTINA FOR STATS
# 'sv_prob>0.001', ## get rid of this for more stats, by MARTINA ...
]

cuts_mmm = Selections('mmm')
selection_mmm = [
cuts_mmm.selections['pt_iso'],
cuts_mmm.selections['baseline'],
cuts_mmm.selections['vetoes_12_OS'],
cuts_mmm.selections['vetoes_01_OS'],
cuts_mmm.selections['vetoes_02_OS'],
cuts_mmm.selections['CR_bj'],
] + extra_selections

cuts_mem = Selections('mem')
selection_mem = [
cuts_mem.selections['pt_iso'],
cuts_mem.selections['baseline'],
cuts_mem.selections['CR_bj'],
cuts_mem.selections['vetoes_02_OS'],
] + extra_selections

cuts_eee = Selections('eee')
selection_eee = [
cuts_eee.selections['pt_iso'],
cuts_eee.selections['baseline'],
cuts_eee.selections['vetoes_12_OS'],
cuts_eee.selections['vetoes_01_OS'],
cuts_eee.selections['vetoes_02_OS'],
cuts_eee.selections['CR_bj'],
] + extra_selections

cuts_eem = Selections('eem')
selection_eem = [
cuts_eem.selections['pt_iso'],
cuts_eem.selections['baseline'],
cuts_eem.selections['CR_bj'],
cuts_eem.selections['vetoes_01_OS'],
] + extra_selections

if year == 2017:
selection_eee.append('l0_pt > 35')
selection_eem.append('l0_pt > 35')
if year == 2018:
selection_eee.append('l0_pt > 32')
selection_eem.append('l0_pt > 32')

composed_features = OrderedDict()

composed_features['abs_l0_eta' ] = lambda df : np.abs(df.l0_eta)
composed_features['abs_l1_eta' ] = lambda df : np.abs(df.l1_eta)
composed_features['abs_l2_eta' ] = lambda df : np.abs(df.l2_eta)
composed_features['log_abs_l0_dxy'] = lambda df : np.log10(np.abs(df.l0_dxy))
composed_features['log_abs_l0_dz' ] = lambda df : np.log10(np.abs(df.l0_dz ))
composed_features['log_abs_l1_dxy'] = lambda df : np.log10(np.abs(df.l1_dxy))
composed_features['log_abs_l1_dz' ] = lambda df : np.log10(np.abs(df.l1_dz ))
composed_features['log_abs_l2_dxy'] = lambda df : np.log10(np.abs(df.l2_dxy))
composed_features['log_abs_l2_dz' ] = lambda df : np.log10(np.abs(df.l2_dz ))
composed_features['abs_q_02' ] = lambda df : np.abs(df.hnl_q_02)
composed_features['abs_q_01' ] = lambda df : np.abs(df.hnl_q_01)

# https://stackoverflow.com/questions/20528328/numpy-logical-or-for-more-than-two-arguments
# save a label to distinguish different channels
# 1 = mmm
# 2 = mem_os
# 3 = mem_ss
# 4 = eee
# 5 = eem_os
# 6 = eem_ss
# composed_features['channel' ] = lambda df : 1 * (np.abs(df.l0_pdgid)==13 and np.abs(df.l1_pdgid)==13 and np.abs(df.l2_pdgid)==13) + 2 * (np.abs(df.l0_pdgid)==13 and np.abs(df.l1_pdgid)==11 and np.abs(df.l2_pdgid)==13 and df.hnl_q_02!=0) + 3 * (np.abs(df.l0_pdgid)==13 and np.abs(df.l1_pdgid)==11 and np.abs(df.l2_pdgid)==13 and df.hnl_q_02==0) + 4 * (np.abs(df.l0_pdgid)==11 and np.abs(df.l1_pdgid)==11 and np.abs(df.l2_pdgid)==11) + 5 * (np.abs(df.l0_pdgid)==11 and np.abs(df.l1_pdgid)==11 and np.abs(df.l2_pdgid)==13 and df.hnl_q_02!=0) + 6 * (np.abs(df.l0_pdgid)==11 and np.abs(df.l1_pdgid)==11 and np.abs(df.l2_pdgid)==13 and df.hnl_q_02==0)

trainer = Trainer (channel = 'all_channels',
base_dir = env['NTUPLE_DIR'],
#post_fix = 'HNLTreeProducer_%s/tree.root' %ch,
post_fix = 'HNLTreeProducer/tree.root',

years = [year] ,
features = ['l0_pt' ,
'l1_pt' ,
'l2_pt' ,
'hnl_dr_12' ,
'hnl_m_12' ,
'sv_prob' ,
'hnl_2d_disp',
'channel' ,],

composed_features = composed_features,

selection_data_mmm = selection_mmm,
selection_mc_mmm = selection_mmm + [cuts_mmm.selections['is_prompt_lepton']],

selection_data_mem = selection_mem,
selection_mc_mem = selection_mem + [cuts_mem.selections['is_prompt_lepton']],

selection_data_eee = selection_eee,
selection_mc_eee = selection_eee + [cuts_eee.selections['is_prompt_lepton']],

selection_data_eem = selection_eem,
selection_mc_eem = selection_eem + [cuts_eem.selections['is_prompt_lepton']],

selection_tight = cuts_mmm.selections_pd['tight'],

epochs = 20,
)

if __name__ == '__main__':
trainer.train()
Loading