Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Future dev #2

Open
wants to merge 31 commits into
base: multigpu
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
ac1bd0c
training works now with ragged inputs. missing: distribute strategy a…
jkiesele Dec 18, 2023
a92c7d0
clean up
jkiesele Dec 18, 2023
e9a39e3
snapshot
jkiesele Dec 19, 2023
0b8fae9
revert for fresh start
jkiesele Dec 19, 2023
4464bd8
working mgpu training (no callbacks)
jkiesele Dec 19, 2023
c602aff
added compatibility
jkiesele Dec 20, 2023
e200fe3
training callbacks are back
jkiesele Dec 20, 2023
788dad0
status update for tf2.14
jkiesele Dec 29, 2023
7c805b1
compat between tf2.4 and tf2.14
jkiesele Dec 29, 2023
e841ce3
compile error
jkiesele Dec 29, 2023
46e379f
fixes for tf2.14 and new cuda (may have affected earlier trainings al…
jkiesele Dec 31, 2023
0e4c084
debug switch off
jkiesele Dec 31, 2023
f412471
getting ready for new training
jkiesele Jan 1, 2024
57dc99b
dead zone hinge and pushed batch size (for 80gb A100)
jkiesele Jan 1, 2024
df6bf73
moved to new djc changes and put wandb in the relevant places
jkiesele Jan 2, 2024
c13a486
readme
jkiesele Jan 2, 2024
0218110
some perf tweaks ans small fixes
jkiesele Jan 2, 2024
ee520b3
some performance increasing bits
jkiesele Jan 2, 2024
d7e9060
some more fixes
jkiesele Jan 8, 2024
fec58f2
compat
jkiesele Jan 8, 2024
6729aa6
wandb metrics
jkiesele Jan 8, 2024
8ee7015
convenience function - untested
jkiesele Jan 13, 2024
e123e44
small helper now tested
jkiesele Jan 13, 2024
db76678
more docu
jkiesele Jan 13, 2024
d3387b5
small fixes
jkiesele Jan 22, 2024
a28850c
fraction regressor loss
jkiesele Jan 22, 2024
35905f2
small fix
jkiesele Jan 23, 2024
ee3202d
new reduce layer
jkiesele Jan 23, 2024
e1917b3
new reduce layer fix
jkiesele Jan 23, 2024
2dc561a
another layer
jkiesele Jan 23, 2024
63fdeec
Future dev (#8)
jkiesele Feb 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ HGCalML
===============================================================================

Requirements
* DeepJetCore 3.X (``https://github.com/DL4Jets/DeepJetCore``)
* DeepJetCore 3.X container (or latest version in general)
* DeepJetCore 4.X (``https://github.com/DL4Jets/DeepJetCore``)
* DeepJetCore 4.X container (or latest version in general)

For CERN (or any machine with cvmfs mounted), a script to start the latest container use this script:
```
Expand All @@ -23,7 +23,7 @@ sing=`which singularity`
unset PATH
cd

$sing run -B /eos -B /afs $gpuopt /cvmfs/unpacked.cern.ch/registry.hub.docker.com/cernml4reco/deepjetcore3:latest
$sing run -B /eos -B /afs $gpuopt /cvmfs/unpacked.cern.ch/registry.hub.docker.com/cernml4reco/deepjetcore4:latest
```

The package follows the structure and logic of all DeepJetCore subpackages (also the example in DeepJetCore). So as a fresh starting point, it can be a good idea to follow the DeepJetCore example first.
Expand Down
41 changes: 27 additions & 14 deletions Train/config_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@
from Layers import MixWhere
from Layers import RaggedGravNet
from Layers import PlotCoordinates
from Layers import DistanceWeightedMessagePassing
from Layers import DistanceWeightedMessagePassing, AccumulateNeighbours
from Layers import LLFillSpace
from Layers import LLExtendedObjectCondensation
from Layers import DictModel,RaggedDictModel
from Layers import DictModel
from Layers import RaggedGlobalExchange
from Layers import SphereActivation
from Layers import Multi
from Layers import ShiftDistance
from Layers import ShiftDistance, KNN
from Layers import LLRegulariseGravNetSpace
from Regularizers import AverageDistanceRegularizer
from model_blocks import tiny_pc_pool, condition_input
Expand Down Expand Up @@ -132,18 +132,21 @@ def config_model(Inputs, td, debug_outdir=None, plot_debug_every=2000):
x = ScaledGooeyBatchNorm2(
fluidity_decay=0.01,
max_viscosity=0.999999,
learn=True,
no_gaus=False)([x, is_track])

x = ScaledGooeyBatchNorm2(
fluidity_decay=0.01,
max_viscosity=0.999999,
invert_condition=True,
learn=True,
no_gaus=False)([x, is_track])

c_coords = prime_coords
c_coords = ScaledGooeyBatchNorm2(
name='batchnorm_ccoords',
fluidity_decay=0.01,
learn=True,
max_viscosity=0.999999)(c_coords)
c_coords = PlotCoordinates(
plot_every=plot_debug_every,
Expand All @@ -156,14 +159,20 @@ def config_model(Inputs, td, debug_outdir=None, plot_debug_every=2000):
x = Concatenate()([x, c_coords, is_track])
x = Dense(64, name='dense_pre_loop', activation=DENSE_ACTIVATION)(x)

allfeat = []
allfeat = [c_coords]
print("Available keys: ", pre_processed.keys())

## testing ##

#nidx,dist = KNN(8, use_approximate_knn=True)([prime_coords, rs])
#x = Concatenate()([x, dist])
#x = Concatenate()([x, DistanceWeightedMessagePassing([16])([x,nidx,dist]) ])

###########################################################################
### Loop over GravNet Layers ##############################################
###########################################################################

gravnet_regs = [0.01, 0.01, 0.01]
gravnet_reg = 0.01

for i in range(GRAVNET_ITERATIONS):

Expand All @@ -189,14 +198,18 @@ def config_model(Inputs, td, debug_outdir=None, plot_debug_every=2000):
)([x, rs])

gndist = LLRegulariseGravNetSpace(
scale=gravnet_regs[i],
scale=gravnet_reg,
record_metrics=False,
name=f'regularise_gravnet_{i}')([gndist, prime_coords, gnnidx])

x_rand = random_sampling_block(
xgn, rs, gncoords, gnnidx, gndist, is_track,
reduction=6, layer_norm=True, name=f"RSU_{i}")
x_rand = ScaledGooeyBatchNorm2(**BATCHNORM_OPTIONS)(x_rand)
x = DistanceWeightedMessagePassing(
[32,32,32,32,32,32,32],
activation='elu')([x,gnnidx,gndist])

#x_rand = random_sampling_block(
# xgn, rs, gncoords, gnnidx, gndist, is_track,
# reduction=6, layer_norm=True, name=f"RSU_{i}")
#x_rand = ScaledGooeyBatchNorm2(**BATCHNORM_OPTIONS)(x_rand)

gndist = AverageDistanceRegularizer(
strength=1e-3,
Expand All @@ -214,8 +227,8 @@ def config_model(Inputs, td, debug_outdir=None, plot_debug_every=2000):
# x_rand = ScalarMultiply(0.1)(x_rand)
# gndist = ScalarMultiply(0.01)(gndist)
# gncoords = ScalarMultiply(0.01)(gncoords)
x = Concatenate()([x_pre, xgn, x_rand, gndist, gncoords])
x = Dense(d_shape,
x = Concatenate()([x, x_pre, xgn, gndist, gncoords])
x = Dense(2*d_shape,
name=f"dense_post_gravnet_1_iteration_{i}",
activation=DENSE_ACTIVATION,
kernel_regularizer=DENSE_REGULARIZER)(x)
Expand Down Expand Up @@ -270,7 +283,7 @@ def config_model(Inputs, td, debug_outdir=None, plot_debug_every=2000):

pred_beta = LLExtendedObjectCondensation(scale=1.,
use_energy_weights=True,
record_metrics=False,
record_metrics=True,
print_loss=True,
name="ExtendedOCLoss",
implementation = loss_implementation,
Expand Down Expand Up @@ -304,7 +317,7 @@ def config_model(Inputs, td, debug_outdir=None, plot_debug_every=2000):
# 'no_noise_rs': pre_processed['no_noise_rs'],
}

return RaggedDictModel(inputs=Inputs, outputs=model_outputs)
return DictModel(inputs=Inputs, outputs=model_outputs)
#return DictModel(inputs=Inputs, outputs=model_outputs)


Expand Down
Loading