diff --git a/examples/element_internal_record/000_internal_record.py b/examples/element_internal_record/000_internal_record.py index 71c272137..0c6c5719f 100644 --- a/examples/element_internal_record/000_internal_record.py +++ b/examples/element_internal_record/000_internal_record.py @@ -36,20 +36,22 @@ class TestElementRecord(xo.HybridClass): # element number. track_method_source = r''' -/*gpufun*/ -void TestElement_track_local_particle(TestElementData el, LocalParticle* part0){ +#include "xtrack/headers/track.h" +GPUFUN +void TestElement_track_local_particle(TestElementData el, LocalParticle* part0) +{ // Extract the record and record_index TestElementRecordData record = TestElementData_getp_internal_record(el, part0); RecordIndex record_index = NULL; - if (record){ + if (record) { record_index = TestElementRecordData_getp__index(record); } int64_t n_kicks = TestElementData_get_n_kicks(el); printf("n_kicks %d\n", (int)n_kicks); - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); for (int64_t i = 0; i < n_kicks; i++) { double rr = 1e-6 * RandomUniform_generate(part); @@ -72,8 +74,7 @@ class TestElementRecord(xo.HybridClass): } } - - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } ''' diff --git a/examples/element_internal_record/001_multirecord.py b/examples/element_internal_record/001_multirecord.py index 2d081187c..d4f21212c 100644 --- a/examples/element_internal_record/001_multirecord.py +++ b/examples/element_internal_record/001_multirecord.py @@ -41,7 +41,9 @@ class TestElementRecord(xo.HybridClass): # code of the beam element. TestElement_track_method_source = r''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void TestElement_track_local_particle(TestElementData el, LocalParticle* part0){ // Extract the record and record_index @@ -60,7 +62,7 @@ class TestElementRecord(xo.HybridClass): int64_t n_kicks = TestElementData_get_n_kicks(el); printf("n_kicks %d\n", (int)n_kicks); - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); // Record in table1 info about the ingoing particle if (record){ @@ -106,7 +108,7 @@ class TestElementRecord(xo.HybridClass): } } - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } ''' diff --git a/examples/element_internal_record/002_record_in_individual_element.py b/examples/element_internal_record/002_record_in_individual_element.py index 57cc4581f..a144876b9 100644 --- a/examples/element_internal_record/002_record_in_individual_element.py +++ b/examples/element_internal_record/002_record_in_individual_element.py @@ -40,7 +40,9 @@ class TestElementRecord(xo.HybridClass): # code of the beam element. TestElement_track_method_source = r''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void TestElement_track_local_particle(TestElementData el, LocalParticle* part0){ // Extract the record and record_index @@ -59,7 +61,7 @@ class TestElementRecord(xo.HybridClass): int64_t n_kicks = TestElementData_get_n_kicks(el); // printf("n_kicks %d\n", (int)n_kicks); - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); // Record in table1 info about the ingoing particle if (record){ @@ -105,7 +107,7 @@ class TestElementRecord(xo.HybridClass): } } - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } ''' diff --git a/examples/random_number_generator/001_usage_in_beam_element.py b/examples/random_number_generator/001_usage_in_beam_element.py index 32cc71e71..e91908af8 100644 --- a/examples/random_number_generator/001_usage_in_beam_element.py +++ b/examples/random_number_generator/001_usage_in_beam_element.py @@ -27,12 +27,15 @@ class TestElement(xt.BeamElement): _extra_c_sources = [ ''' - /*gpufun*/ - void TestElement_track_local_particle(TestElementData el, LocalParticle* part0){ - //start_per_particle_block (part0->part) + #include "xtrack/headers/track.h" + + GPUFUN + void TestElement_track_local_particle(TestElementData el, LocalParticle* part0) + { + START_PER_PARTICLE_BLOCK(part0, part); double rr = !!GENERATOR!!_generate(part); LocalParticle_set_x(part, rr); - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } '''.replace('!!GENERATOR!!', generator_to_test) ] diff --git a/examples/spin_lep/chirp_kicker.py b/examples/spin_lep/chirp_kicker.py index e0a0b377d..de8079743 100644 --- a/examples/spin_lep/chirp_kicker.py +++ b/examples/spin_lep/chirp_kicker.py @@ -19,22 +19,22 @@ class VerticalChirpKicker(xt.BeamElement): _extra_c_sources =[''' - #include - #include + #include "xtrack/headers/track.h" + #include "xtrack/beam_elements/elements_src/track_magnet.h" - /*gpufun*/ + GPUFUN void VerticalChirpKicker_track_local_particle( - VerticalChirpKickerData el, LocalParticle* part0){ - + VerticalChirpKickerData el, LocalParticle* part0) + { double const k0sl = VerticalChirpKickerData_get_k0sl(el); double const q_start = VerticalChirpKickerData_get_q_start(el); double const q_end = q_start + VerticalChirpKickerData_get_q_span(el); double const num_turns = VerticalChirpKickerData_get_num_turns(el); double const length = VerticalChirpKickerData_get_length(el); - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); double const at_turn = LocalParticle_get_at_turn(part); - if (at_turn < num_turns){ + if (at_turn < num_turns) { // integrating to get the instantaneous phase double const phi = 2 * PI * q_start * at_turn + PI * (q_end - q_start) / ((double) num_turns) * ((double) at_turn * at_turn); @@ -60,9 +60,9 @@ class VerticalChirpKicker(xt.BeamElement): 0, // frame curvature length, length // lpath - same for a thin element - ); - #endif + ); + #endif } - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } '''] diff --git a/pyproject.toml b/pyproject.toml index 966ba860e..3516aed9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,62 @@ [build-system] build-backend = 'setuptools.build_meta' requires = [ - 'setuptools >= 43.0.0', - 'numpy', + 'setuptools >= 61.0.0', ] + +[project] +name = "xtrack" +dynamic = ["version"] +description = "Tracking library for particle accelerators" +readme = "README.md" +requires-python = ">=3.9" +license = { text = "Apache-2.0" } +authors = [ + { name = "G. Iadarola et al." } +] +dependencies = [ + "numpy>=1.0", + "pandas>=2.0", + "scipy", + "tqdm", + "requests", + "xobjects", + "xdeps", +] + +[project.urls] +"Homepage" = "https://xsuite.readthedocs.io/" +"Bug Tracker" = "https://github.com/xsuite/xsuite/issues" +"Documentation" = "https://xsuite.readthedocs.io/" +"Source Code" = "https://github.com/xsuite/xtrack" +"Download" = "https://pypi.python.org/pypi/xtrack" + +[project.optional-dependencies] +tests = [ + "cpymad", + "nafflib", + "PyHEADTAIL", + "pytest", + "pytest-mock", + "pymadng", + "requests-mock", + "tfs-pandas", +] +notebooks = [ + "jupyter", + "ipympl", + "xplt" +] + +[tool.setuptools.packages.find] +where = ["."] +include = ["xtrack", "ducktrack"] + +[tool.setuptools.dynamic] +version = { attr = "xtrack._version.__version__" } + +[tool.setuptools] +include-package-data = true + +[project.entry-points.xobjects] +include = "xtrack" diff --git a/setup.py b/setup.py deleted file mode 100644 index ff4151360..000000000 --- a/setup.py +++ /dev/null @@ -1,56 +0,0 @@ -# copyright ############################### # -# This file is part of the Xtrack Package. # -# Copyright (c) CERN, 2021. # -# ######################################### # - -from setuptools import setup, find_packages, Extension -from pathlib import Path - -####################################### -# Prepare list of compiled extensions # -####################################### - -extensions = [] - -######### -# Setup # -######### - -version_file = Path(__file__).parent / 'xtrack/_version.py' -dd = {} -with open(version_file.absolute(), 'r') as fp: - exec(fp.read(), dd) -__version__ = dd['__version__'] - -setup( - name='xtrack', - version=__version__, - description='Tracking library for particle accelerators', - long_description='Tracking library for particle accelerators', - url='https://xsuite.readthedocs.io/', - author='G. Iadarola et al.', - license='Apache 2.0', - download_url="https://pypi.python.org/pypi/xtrack", - project_urls={ - "Bug Tracker": "https://github.com/xsuite/xsuite/issues", - "Documentation": 'https://xsuite.readthedocs.io/', - "Source Code": "https://github.com/xsuite/xtrack", - }, - packages=find_packages(), - ext_modules = extensions, - include_package_data=True, - install_requires=[ - 'numpy>=1.0', - "pandas>=2.0", - 'scipy', - 'tqdm', - 'requests', - 'xobjects', - 'xdeps' - ], - extras_require={ - 'tests': ['cpymad', 'nafflib', 'PyHEADTAIL', 'pytest', 'pytest-mock', - 'pymadng', 'requests-mock', 'tfs-pandas'], - 'notebooks': ['jupyter', 'ipympl', 'xplt'], - }, -) diff --git a/tests/test_atomic_add.py b/tests/test_atomic_add.py deleted file mode 100644 index 97a5afbca..000000000 --- a/tests/test_atomic_add.py +++ /dev/null @@ -1,90 +0,0 @@ -# copyright ############################### # -# This file is part of the Xtrack Package. # -# Copyright (c) CERN, 2025. # -# ######################################### # - -import pytest -import numpy as np - -import xtrack as xt -import xobjects as xo -from xobjects.test_helpers import for_all_test_contexts - - -@for_all_test_contexts -@pytest.mark.parametrize("overload", [True, False], ids=['overload', 'no_overload']) -@pytest.mark.parametrize("ctype", [xo.Int8, xo.Int16, xo.Int32, xo.Int64, - xo.UInt8, xo.UInt16, xo.UInt32, - xo.UInt64, xo.Float32, xo.Float64]) -def test_atomic(overload, ctype, test_context): - if overload: - func_name = 'atomicAdd' - else: - func_name = f'atomicAdd_{ctype.__name__.lower()[0]}{ctype.__name__.split("t")[1]}' - class TestAtomic(xt.BeamElement): - _xofields = {f'val': ctype} - allow_track = False - _extra_c_sources = [f''' - #include - #include - - GPUKERN - void run_atomic_test(TestAtomicData el, GPUGLMEM {ctype._c_type}* increments, - GPUGLMEM {ctype._c_type}* retvals, int length) {{ - VECTORIZE_OVER(ii, length); - GPUGLMEM {ctype._c_type}* val = TestAtomicData_getp_val(el); - {ctype._c_type} ret = {func_name}(val, increments[ii]); - retvals[ii] = ret; - END_VECTORIZE; - }} - '''] - - _kernels = { - 'run_atomic_test': xo.Kernel( - c_name='run_atomic_test', - args=[xo.Arg(xo.ThisClass, name='el'), - xo.Arg(ctype, pointer=True, name='increments'), - xo.Arg(ctype, pointer=True, name='retvals'), - xo.Arg(xo.Int32, name='length')], - n_threads='length') - } - - atomic = TestAtomic(_context=test_context, val=0) - assert atomic.val == 0 - - # Test with all increments = 1, so we can check the return values easily. - num_steps = 10000 - if ctype.__name__.startswith('Int') or ctype.__name__.startswith('Uint'): - # Less steps to avoid overflow - num_steps = min(num_steps, 2**(8*ctype._size - 1) - 1) - increments = test_context.zeros(shape=(num_steps,), dtype=ctype._dtype) + 1 - retvals = test_context.zeros(shape=(num_steps,), dtype=ctype._dtype) - atomic.run_atomic_test(increments=increments, retvals=retvals, length=num_steps) - assert atomic.val == num_steps - retvals = np.sort(test_context.nparray_from_context_array(retvals)) - assert np.allclose(retvals, np.arange(num_steps, dtype=ctype._dtype), atol=1.e-15, rtol=1.e-15) - - # Test with random increments, where we now only can check the total sum - # (retvals can be anything). Watch out: overflow is undefined behaviour, - # except for unsigned integers, so we skip this test for signed integers. - atomic.val = 0 - retvals = test_context.zeros(shape=(num_steps,), dtype=ctype._dtype) - if ctype.__name__.startswith('Uint'): - low = 0 - high = 2**(8*ctype._size) - 1 - increments = np.random.randint(low, high+1, size=num_steps, dtype=ctype._dtype) - increments = test_context.nparray_to_context_array(increments) - atomic.run_atomic_test(increments=increments, retvals=retvals, length=num_steps) - increments = test_context.nparray_from_context_array(increments) - assert atomic.val == (np.sum(increments).item() % (2**(8*ctype._size))) - - elif ctype.__name__.startswith('Float'): - increments = np.zeros(shape=(num_steps,), dtype=ctype._dtype) - increments += np.random.uniform(0, 10, size=num_steps) - increments = test_context.nparray_to_context_array(increments) - atomic.run_atomic_test(increments=increments, retvals=retvals, length=num_steps) - increments = test_context.nparray_from_context_array(increments) - if ctype == xo.Float32: - assert np.isclose(atomic.val, np.sum(increments), atol=10., rtol=1.e-4) - else: - assert np.isclose(atomic.val, np.sum(increments), atol=1.e-6, rtol=1.e-12) diff --git a/tests/test_element_internal_record.py b/tests/test_element_internal_record.py index c61a6d555..1034872ea 100644 --- a/tests/test_element_internal_record.py +++ b/tests/test_element_internal_record.py @@ -26,7 +26,9 @@ class TestElementRecord(xo.HybridClass): extra_src = [] extra_src.append(r''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void TestElement_track_local_particle(TestElementData el, LocalParticle* part0){ // Extract the record and record_index @@ -39,8 +41,7 @@ class TestElementRecord(xo.HybridClass): int64_t n_kicks = TestElementData_get_n_kicks(el); //printf("n_kicks %d\n", (int)n_kicks); - //start_per_particle_block (part0->part) - + START_PER_PARTICLE_BLOCK(part0, part); for (int64_t i = 0; i < n_kicks; i++) { double rr = 1e-6 * RandomUniform_generate(part); LocalParticle_add_to_px(part, rr); @@ -61,9 +62,7 @@ class TestElementRecord(xo.HybridClass): } } } - - - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } ''') @@ -205,7 +204,9 @@ class TestElementRecord(xo.HybridClass): extra_src = [] extra_src.append(r''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void TestElement_track_local_particle(TestElementData el, LocalParticle* part0){ // Extract the record and record_index @@ -218,8 +219,7 @@ class TestElementRecord(xo.HybridClass): int64_t n_kicks = TestElementData_get_n_kicks(el); //printf("n_kicks %d\n", (int)n_kicks); - //start_per_particle_block (part0->part) - + START_PER_PARTICLE_BLOCK(part0, part); for (int64_t i = 0; i < n_kicks; i++) { // We don't apply the kick, otherwise the twiss fails double rr = 1e-6 * RandomUniform_generate(part); @@ -240,9 +240,7 @@ class TestElementRecord(xo.HybridClass): } } } - - - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } ''') @@ -308,7 +306,9 @@ class TestElementRecord(xo.HybridClass): extra_src = [] extra_src.append(r''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void TestElement_track_local_particle(TestElementData el, LocalParticle* part0){ // Extract the record and record_index @@ -327,8 +327,7 @@ class TestElementRecord(xo.HybridClass): int64_t n_kicks = TestElementData_get_n_kicks(el); // printf("n_kicks %d\n", (int)n_kicks); - //start_per_particle_block (part0->part) - + START_PER_PARTICLE_BLOCK(part0, part); // Record in table1 info about the ingoing particle if (record){ // Get a slot in table1 @@ -372,8 +371,7 @@ class TestElementRecord(xo.HybridClass): } } } - - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } ''') @@ -565,7 +563,9 @@ class TestElementRecord(xo.HybridClass): extra_src = [] extra_src.append(r''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void TestElement_track_local_particle(TestElementData el, LocalParticle* part0){ // Extract the record and record_index @@ -584,8 +584,7 @@ class TestElementRecord(xo.HybridClass): int64_t n_kicks = TestElementData_get_n_kicks(el); // printf("n_kicks %d\n", (int)n_kicks); - //start_per_particle_block (part0->part) - + START_PER_PARTICLE_BLOCK(part0, part); // Record in table1 info about the ingoing particle if (record){ // Get a slot in table1 @@ -629,8 +628,7 @@ class TestElementRecord(xo.HybridClass): } } } - - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } ''') diff --git a/tests/test_elements.py b/tests/test_elements.py index 8d82f13e4..ef337229c 100644 --- a/tests/test_elements.py +++ b/tests/test_elements.py @@ -542,34 +542,36 @@ def test_exciter(test_context): test_source = r""" -/*gpufun*/ +#include "xtrack/headers/track.h" + +GPUFUN void test_function(TestElementData el, LocalParticle* part0, - /*gpuglmem*/ double* b){ + GPUGLMEM double* b){ double const a = TestElementData_get_a(el); - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); const int64_t ipart = part->ipart; double const val = b[ipart]; LocalParticle_add_to_s(part, val + a); - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } -/*gpufun*/ +GPUFUN void TestElement_track_local_particle(TestElementData el, LocalParticle* part0){ double const a = TestElementData_get_a(el); - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); LocalParticle_set_s(part, a); - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } """ diff --git a/tests/test_particles.py b/tests/test_particles.py index 5827b04de..372fd2939 100644 --- a/tests/test_particles.py +++ b/tests/test_particles.py @@ -20,18 +20,19 @@ class TestElement(xt.BeamElement): } _extra_c_sources = [""" + #include "xtrack/headers/track.h" #define XT_OMP_SKIP_REORGANIZE - /*gpufun*/ + GPUFUN void TestElement_track_local_particle( TestElementData el, LocalParticle* part0 ) { - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); int64_t state = check_is_active(part); int64_t id = LocalParticle_get_particle_id(part); TestElementData_set_states(el, id, state); - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } """] @@ -96,16 +97,18 @@ class TestElement(xt.BeamElement): } _extra_c_sources = [""" - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void TestElement_track_local_particle( TestElementData el, LocalParticle* part0 ) { - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); int64_t state = check_is_active(part); int64_t id = LocalParticle_get_particle_id(part); TestElementData_set_states(el, id, state); - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } """] diff --git a/tests/test_particles_basics.py b/tests/test_particles_basics.py index 79e537ad3..4217937c1 100644 --- a/tests/test_particles_basics.py +++ b/tests/test_particles_basics.py @@ -345,14 +345,16 @@ class TestElement(xt.BeamElement): 'pz_only': xo.Int64, } _extra_c_sources = [''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void TestElement_track_local_particle( TestElementData el, LocalParticle* part0){ double const value = TestElementData_get_value(el); int const pz_only = (int) TestElementData_get_pz_only(el); - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); LocalParticle_add_to_energy(part, value, pz_only); - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } '''] @@ -419,13 +421,15 @@ class TestElement(xt.BeamElement): } _extra_c_sources =[''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void TestElement_track_local_particle( TestElementData el, LocalParticle* part0){ double const value = TestElementData_get_value(el); - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); LocalParticle_update_delta(part, value); - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } '''] @@ -460,13 +464,15 @@ class TestElement(xt.BeamElement): } _extra_c_sources = [''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void TestElement_track_local_particle( TestElementData el, LocalParticle* part0){ double const value = TestElementData_get_value(el); - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); LocalParticle_update_ptau(part, value); - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } '''] @@ -500,14 +506,16 @@ class TestElement(xt.BeamElement): 'value': xo.Float64, } _extra_c_sources = [''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void TestElement_track_local_particle( TestElementData el, LocalParticle* part0){ double const value = TestElementData_get_value(el); - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); double const pzeta = LocalParticle_get_pzeta(part); LocalParticle_update_pzeta(part, pzeta+value); - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } '''] @@ -543,13 +551,15 @@ class TestElement(xt.BeamElement): 'value': xo.Float64, } _extra_c_sources = [''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void TestElement_track_local_particle( TestElementData el, LocalParticle* part0){ double const value = TestElementData_get_value(el); - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); LocalParticle_update_p0c(part, value); - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } '''] @@ -590,18 +600,20 @@ class ScaleAng(xt.BeamElement): 'scale_y2': xo.Float64, } _extra_c_sources = [''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void ScaleAng_track_local_particle( ScaleAngData el, LocalParticle* part0){ double const scale_x = ScaleAngData_get_scale_x(el); double const scale_y = ScaleAngData_get_scale_y(el); double const scale_x2 = ScaleAngData_get_scale_x2(el); double const scale_y2 = ScaleAngData_get_scale_y2(el); - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); LocalParticle_scale_xp(part, scale_x); LocalParticle_scale_yp(part, scale_y); LocalParticle_scale_xp_yp(part, scale_x2, scale_y2); - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } '''] @@ -613,18 +625,20 @@ class KickAng(xt.BeamElement): 'kick_y2': xo.Float64, } _extra_c_sources = [''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void KickAng_track_local_particle( KickAngData el, LocalParticle* part0){ double const kick_x = KickAngData_get_kick_x(el); double const kick_y = KickAngData_get_kick_y(el); double const kick_x2 = KickAngData_get_kick_x2(el); double const kick_y2 = KickAngData_get_kick_y2(el); - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); LocalParticle_add_to_xp(part, kick_x); LocalParticle_add_to_yp(part, kick_y); LocalParticle_add_to_xp_yp(part, kick_x2, kick_y2); - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } '''] @@ -648,18 +662,20 @@ class ScaleAngExact(xt.BeamElement): 'scale_y2': xo.Float64, } _extra_c_sources = [''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void ScaleAngExact_track_local_particle( ScaleAngExactData el, LocalParticle* part0){ double const scale_x = ScaleAngExactData_get_scale_x(el); double const scale_y = ScaleAngExactData_get_scale_y(el); double const scale_x2 = ScaleAngExactData_get_scale_x2(el); double const scale_y2 = ScaleAngExactData_get_scale_y2(el); - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); LocalParticle_scale_exact_xp(part, scale_x); LocalParticle_scale_exact_yp(part, scale_y); LocalParticle_scale_exact_xp_yp(part, scale_x2, scale_y2); - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } '''] @@ -671,18 +687,20 @@ class KickAngExact(xt.BeamElement): 'kick_y2': xo.Float64, } _extra_c_sources = [''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void KickAngExact_track_local_particle( KickAngExactData el, LocalParticle* part0){ double const kick_x = KickAngExactData_get_kick_x(el); double const kick_y = KickAngExactData_get_kick_y(el); double const kick_x2 = KickAngExactData_get_kick_x2(el); double const kick_y2 = KickAngExactData_get_kick_y2(el); - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); LocalParticle_add_to_exact_xp(part, kick_x); LocalParticle_add_to_exact_yp(part, kick_y); LocalParticle_add_to_exact_xp_yp(part, kick_x2, kick_y2); - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } '''] diff --git a/tests/test_random_gen.py b/tests/test_random_gen.py index b3a9d3f2f..378a8ce07 100644 --- a/tests/test_random_gen.py +++ b/tests/test_random_gen.py @@ -31,13 +31,15 @@ class TestElement(xt.BeamElement): _extra_c_sources = [ ''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void TestElement_track_local_particle( TestElementData el, LocalParticle* part0){ - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); double rr = !!GENERATOR!!_generate(part); LocalParticle_set_x(part, rr); - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } '''.replace('!!GENERATOR!!', generator) ] diff --git a/tests/test_random_gen_exp.py b/tests/test_random_gen_exp.py index ea6b3ec01..3614fbd1d 100644 --- a/tests/test_random_gen_exp.py +++ b/tests/test_random_gen_exp.py @@ -29,13 +29,15 @@ class TestElement(xt.BeamElement): _extra_c_sources = [ ''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void TestElement_track_local_particle( TestElementData el, LocalParticle* part0){ - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); double rr = RandomExponential_generate(part); LocalParticle_set_x(part, rr); - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } ''' ] diff --git a/tests/test_random_gen_gauss.py b/tests/test_random_gen_gauss.py index 1e92441cf..5579124d9 100644 --- a/tests/test_random_gen_gauss.py +++ b/tests/test_random_gen_gauss.py @@ -28,13 +28,15 @@ class TestElement(xt.BeamElement): _extra_c_sources = [ ''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void TestElement_track_local_particle( TestElementData el, LocalParticle* part0){ - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); double rr = RandomNormal_generate(part); LocalParticle_set_x(part, rr); - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } ''' ] diff --git a/tests/test_random_gen_ruth.py b/tests/test_random_gen_ruth.py index 7139444d1..bd624190b 100644 --- a/tests/test_random_gen_ruth.py +++ b/tests/test_random_gen_ruth.py @@ -36,14 +36,16 @@ class TestElement(xt.BeamElement): _extra_c_sources = [ ''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void TestElement_track_local_particle( TestElementData el, LocalParticle* part0){ RandomRutherfordData rng = TestElementData_getp_rng(el); - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); double rr = RandomRutherford_generate(rng, part); LocalParticle_set_x(part, rr); - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } ''' ] diff --git a/tests/test_tracker.py b/tests/test_tracker.py index 0b726955d..4c9a17679 100644 --- a/tests/test_tracker.py +++ b/tests/test_tracker.py @@ -381,12 +381,14 @@ class TestElement(xt.BeamElement): 'dummy': xo.Float64, } _extra_c_sources = [""" - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void TestElement_track_local_particle( TestElementData el, LocalParticle* part0) { - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); #if TEST_FLAG == 2 LocalParticle_set_x(part, 7); @@ -396,7 +398,7 @@ class TestElement(xt.BeamElement): LocalParticle_set_y(part, 42); #endif - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } """] @@ -745,7 +747,9 @@ class TestElement(xt.BeamElement): _extra_c_sources = [ r''' - /*gpufun*/ + #include "xtrack/headers/track.h" + + GPUFUN void TestElement_track_local_particle(TestElementData el, LocalParticle* part0){ // Extract the record and record_index TestElementRecordData record = TestElementData_getp_internal_record(el, part0); @@ -756,7 +760,7 @@ class TestElement(xt.BeamElement): int64_t elem_field = TestElementData_get_element_field(el); - //start_per_particle_block (part0->part) + START_PER_PARTICLE_BLOCK(part0, part); if (record) { // Record exists // Get a slot in the record (this is thread safe) int64_t i_slot = RecordIndex_get_slot(record_index); @@ -774,7 +778,7 @@ class TestElement(xt.BeamElement): ); } } - //end_per_particle_block + END_PER_PARTICLE_BLOCK; } ''' ] diff --git a/xtrack/base_element.py b/xtrack/base_element.py index 3df0677e6..1cddf66db 100644 --- a/xtrack/base_element.py +++ b/xtrack/base_element.py @@ -43,7 +43,7 @@ } """ -def _handle_per_particle_blocks(sources, local_particle_src): +def _handle_per_particle_blocks(sources): if isinstance(sources, str): sources = (sources, ) @@ -59,8 +59,6 @@ def _handle_per_particle_blocks(sources, local_particle_src): else: strss = ss - strss = strss.replace('/*placeholder_for_local_particle_src*/', - local_particle_src) if '//start_per_particle_block' in strss: lines = strss.splitlines() @@ -73,7 +71,7 @@ def _handle_per_particle_blocks(sources, local_particle_src): # TODO: this is very dirty, just for check!!!!! out.append('\n'.join(lines)) else: - out.append(ss) + out.append(strss) if wasstring: @@ -440,9 +438,7 @@ def init_pipeline(self, pipeline_manager, name, partners_names=[]): def compile_kernels(self, extra_classes=(), *args, **kwargs): if 'apply_to_source' not in kwargs.keys(): kwargs['apply_to_source'] = [] - kwargs['apply_to_source'].append( - partial(_handle_per_particle_blocks, - local_particle_src=Particles.gen_local_particle_api())) + kwargs['apply_to_source'].append(_handle_per_particle_blocks) context = self._context cls = self.__class__ diff --git a/xtrack/headers/atomicadd.h b/xtrack/headers/atomicadd.h index 360dda64d..0a44925b3 100644 --- a/xtrack/headers/atomicadd.h +++ b/xtrack/headers/atomicadd.h @@ -3,348 +3,11 @@ // Copyright (c) CERN, 2025. // // ########################################### // -#ifndef _ATOMICADD_H_ -#define _ATOMICADD_H_ +#ifndef XTRACK_ATOMICADD_H_DEPRECATED +#define XTRACK_ATOMICADD_H_DEPRECATED -#include +// The xtrack/headers/atomicadd.h header is deprecated. Use xobjects/headers/atomicadd.h instead. +#include "xobjects/headers/atomicadd.h" -// Ensure no conflicting atomicAdd macro is defined earlier for CPU or OpenCL -#ifndef XO_CONTEXT_CUDA -#ifdef atomicAdd - #warning "Warning: atomicAdd macro already defined, undefining it." - #undef atomicAdd -#endif // atomicAdd -#endif // XO_CONTEXT_CUDA - - -// ################################# // -// ### CPU Contexts ### // -// ################################# // - -#ifdef XO_CONTEXT_CPU -#include - -#ifdef XO_CONTEXT_CPU_OPENMP - #ifndef _OPENMP - #error "XO_CONTEXT_CPU_OPENMP set, but compiled without -fopenmp" - #endif - // OpenMP atomic capture gives us read+write atomically - #define OMP_ATOMIC_CAPTURE _Pragma("omp atomic capture") -#else - #define OMP_ATOMIC_CAPTURE // No OpenMP: non-atomic fallback -#endif // XO_CONTEXT_CPU_OPENMP - -// Macro to define atomicAdd for different types, will be overloaded via _Generic. -#define DEF_ATOMIC_ADD(T, SUF) \ -GPUFUN T atomicAdd_##SUF(GPUGLMEM T *addr, T val) { \ - T old; \ - const T inc = (T)val; \ - OMP_ATOMIC_CAPTURE \ - { old = *addr; *addr = *addr + inc; } \ - return old; \ -} - -DEF_ATOMIC_ADD(int8_t , i8) -DEF_ATOMIC_ADD(int16_t, i16) -DEF_ATOMIC_ADD(int32_t, i32) -DEF_ATOMIC_ADD(int64_t, i64) -DEF_ATOMIC_ADD(uint8_t , u8) -DEF_ATOMIC_ADD(uint16_t, u16) -DEF_ATOMIC_ADD(uint32_t, u32) -DEF_ATOMIC_ADD(uint64_t, u64) -DEF_ATOMIC_ADD(float , f32) -DEF_ATOMIC_ADD(double , f64) - -// Using _Generic to select the right function based on type (since C11). -// See https://en.cppreference.com/w/c/language/generic.html -#define atomicAdd(addr, val) _Generic((addr), \ - int8_t*: atomicAdd_i8, \ - int16_t*: atomicAdd_i16, \ - int32_t*: atomicAdd_i32, \ - int64_t*: atomicAdd_i64, \ - uint8_t*: atomicAdd_u8, \ - uint16_t*: atomicAdd_u16, \ - uint32_t*: atomicAdd_u32, \ - uint64_t*: atomicAdd_u64, \ - float*: atomicAdd_f32, \ - double*: atomicAdd_f64 \ -)(addr, (val)) -#endif // XO_CONTEXT_CPU - - -// ################################# // -// ### GPU Contexts ### // -// ################################# // - -/* -------------- Design notes for GPU atomics --------------------------------- -* We will go for a unified approach for CUDA and OpenCL, with macros where needed. -* We implement 8/16-bit atomic add by atomically CAS'ing the *containing* - * 32-bit word. Only the target byte/halfword lane is modified; the neighbor - * lane is preserved. This is linearisable: each successful CAS is one atomic - * RMW on the 32-bit word. - * Assumptions: little-endian lane layout (true on NVIDIA) and natural alignment - * of the 16-bit addresses (addr % 2 == 0). 8-bit has no alignment requirement. - * Return value matches CUDA semantics: the **old** value at *addr* (fetch-add). - * ---------------------------------------------------------------------------*/ - -// Info: a CAS function (Compare-And-Swap) takes three arguments: a pointer to -// the value to be modified, the expected old value, and the new value. The -// second argument helps to recognise if another thread has modified the value -// in the meantime. The function compares the value at the address with the -// expected values, and if they are the same, it writes the new value at the -// address. In any case, the function returns the actual old value at the -// address. If the returned value is different from the expected value, it -// means that another thread has modified the value in the meantime. - -// Define types and macros for CUDA and OpenCL -// ------------------------------------------- -#if defined(XO_CONTEXT_CUDA) - // CUDA compiler may not have , so define the types if needed. - #ifdef __CUDACC_RTC__ - // NVRTC (CuPy RawModule default) can’t see , so detect it via __CUDACC_RTC__ - typedef signed char int8_t; - typedef short int16_t; - typedef int int32_t; - typedef long long int64_t; - typedef unsigned char uint8_t; - typedef unsigned short uint16_t; - typedef unsigned int uint32_t; - typedef unsigned long long uint64_t; - #else - // Alternatively, NVCC path is fine with host headers - #include - #endif // __CUDACC_RTC__ - #define GPUVOLATILE GPUGLMEM - #define __XT_CAS_U32(ptr, exp, val) atomicCAS((GPUVOLATILE uint32_t*)(ptr), (uint32_t)(exp), (uint32_t)(val)) - #define __XT_CAS_U64(ptr, exp, val) atomicCAS((GPUVOLATILE uint64_t*)(ptr), (uint64_t)(exp), (uint64_t)(val)) - #define __XT_AS_U32_FROM_F32(x) __float_as_uint(x) - #define __XT_AS_F32_FROM_U32(x) __uint_as_float(x) - #define __XT_AS_U64_FROM_F64(x) __double_as_longlong(x) - #define __XT_AS_F64_FROM_U64(x) __longlong_as_double(x) -#elif defined(XO_CONTEXT_CL) - // It seems OpenCL already has the types from defined. - // typedef char int8_t; - // typedef short int16_t; - // typedef int int32_t; - // typedef long int64_t; - // typedef unsigned char uint8_t; - // typedef unsigned short uint16_t; - // typedef unsigned int uint32_t; - // typedef unsigned long uint64_t; - #define GPUVOLATILE GPUGLMEM volatile - #if __OPENCL_C_VERSION__ < 110 - // Map 1.0 "atom_*" names to 1.1+ "atomic_*" - #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable - #define atomic_add atom_add - #define atomic_cmpxchg atom_cmpxchg - #endif - #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable - #pragma OPENCL EXTENSION cl_khr_fp64 : enable - #define __XT_CAS_U32(ptr, exp, val) atomic_cmpxchg((GPUVOLATILE uint32_t*)(ptr), (uint32_t)(exp), (uint32_t)(val)) - #define __XT_CAS_U64(ptr, exp, val) atom_cmpxchg((GPUVOLATILE uint64_t*)(ptr), (uint64_t)(exp), (uint64_t)(val)) - #define __XT_AS_U32_FROM_F32(x) as_uint(x) - #define __XT_AS_F32_FROM_U32(x) as_float(x) - #define __XT_AS_U64_FROM_F64(x) as_ulong(x) - #define __XT_AS_F64_FROM_U64(x) as_double(x) -#endif // XO_CONTEXT_CUDA / XO_CONTEXT_CL - -// Define atomic functions per type -// -------------------------------- -#if defined(XO_CONTEXT_CUDA) || defined(XO_CONTEXT_CL) - // Helper: compute (base 32-bit word pointer, shift, mask) for a byte in that word. - GPUFUN inline void __xt_lane8(GPUVOLATILE void* addr, GPUVOLATILE uint32_t** w, uint32_t* sh, uint32_t* mask){ - size_t a = (size_t)addr; - *w = (GPUVOLATILE uint32_t*)(a & ~(size_t)3); // word: align down to 4-byte boundary - *sh = (uint32_t)((a & (size_t)3) * 8U); // shift 0,8,16,24 depending on byte lane - *mask = (uint32_t)(0xFFu << *sh); - } - // Helper: same for a halfword (16-bit) in the containing 32-bit word. - GPUFUN inline void __xt_lane16(GPUVOLATILE void* addr, GPUVOLATILE uint32_t** w, uint32_t* sh, uint32_t* mask){ - size_t a = (size_t)addr; - *w = (GPUVOLATILE uint32_t*)(a & ~(size_t)3); // word: align down to 4-byte boundary - *sh = (uint32_t)((a & (size_t)2) ? 16U : 0U); // shift0 or 16 depending on halfword - *mask = (uint32_t)(0xFFFFU << *sh); - } - - // ---------------- 8-bit: int8_t / uint8_t (CAS on 32-bit word) -------------- - GPUFUN int8_t atomicAdd_i8(GPUVOLATILE int8_t* addr, int8_t val){ - GPUVOLATILE uint32_t *w; - uint32_t sh, mask; - __xt_lane8(addr, &w, &sh, &mask); - uint32_t old = *w, assumed, b, nb, nw; // byte, newbyte, newword - do { - assumed = old; - b = (assumed & mask) >> sh; // Extract current 8-bit lane - nb = (uint32_t)((uint8_t)b + (uint8_t)val); // Add in modulo-256 (two's complement) - nw = (assumed & ~mask) | ((nb & 0xFFU) << sh); // Merge back updated lane; leave neighbor lanes intact - // Try to publish; if someone raced us, retry with their value - old = __XT_CAS_U32(w, assumed, nw); - } while (old != assumed); - return (int8_t)((assumed & mask) >> sh); - } - GPUFUN uint8_t atomicAdd_u8(GPUVOLATILE uint8_t* addr, uint8_t val){ - GPUVOLATILE uint32_t *w; - uint32_t sh, mask; - __xt_lane8(addr, &w, &sh, &mask); - uint32_t old = *w, assumed, b, nb, nw; - do { - assumed = old; - b = (assumed & mask) >> sh; - nb = (uint32_t)(b + val); /* mod 256 */ - nw = (assumed & ~mask) | ((nb & 0xFFU) << sh); - old = __XT_CAS_U32(w, assumed, nw); - } while (old != assumed); - return (uint8_t)((assumed & mask) >> sh); - } - - // ---------------- 16-bit: int16_t / uint16_t (CAS on 32-bit word) ----------- - GPUFUN int16_t atomicAdd_i16(GPUVOLATILE int16_t* addr, int16_t val){ - GPUVOLATILE uint32_t* w; - uint32_t sh, mask; - __xt_lane16(addr, &w, &sh, &mask); - uint32_t old = *w, assumed, b, nb, nw; - do { - assumed = old; - b = (assumed & mask) >> sh; - nb = (uint32_t)((uint16_t)b + (uint16_t)val); - nw = (assumed & ~mask) | ((nb & 0xFFFFU) << sh); - old = __XT_CAS_U32(w, assumed, nw); - } while (old != assumed); - return (int16_t)((assumed & mask) >> sh); - } - GPUFUN uint16_t atomicAdd_u16(GPUVOLATILE uint16_t* addr, uint16_t val){ - GPUVOLATILE uint32_t* w; - uint32_t sh, mask; - __xt_lane16(addr, &w, &sh, &mask); - uint32_t old = *w, assumed, b, nb, nw; - do { - assumed = old; - b = (assumed & mask) >> sh; - nb = (uint32_t)(b + val); - nw = (assumed & ~mask) | ((nb & 0xFFFFU) << sh); - old = __XT_CAS_U32(w, assumed, nw); - } while (old != assumed); - return (uint16_t)((assumed & mask) >> sh); - } - - // ---------------- 32-bit: int32_t / uint32_t (built-in) ----------- - GPUFUN int32_t atomicAdd_i32(GPUVOLATILE int32_t* addr, int32_t val){ - #ifdef XO_CONTEXT_CUDA - return atomicAdd(addr, val); - #else // XO_CONTEXT_CL - return atomic_add(addr, val); - #endif // XO_CONTEXT_CUDA / XO_CONTEXT_CL - } - GPUFUN uint32_t atomicAdd_u32(GPUVOLATILE uint32_t* addr, uint32_t val){ - #ifdef XO_CONTEXT_CUDA - return atomicAdd(addr, val); - #else // XO_CONTEXT_CL - return atomic_add(addr, val); - #endif // XO_CONTEXT_CUDA / XO_CONTEXT_CL - } - - // ---------------- 64-bit: int64_t / uint64_t (built-in or CAS on 64-bit word) ----------- - GPUFUN int64_t atomicAdd_i64(GPUVOLATILE int64_t* addr, int64_t val){ - uint64_t old, nw; - do { - old = *addr; - nw = old + val; - } while (__XT_CAS_U64((GPUVOLATILE uint64_t*)addr, old, nw) != old); - return old; - } - GPUFUN uint64_t atomicAdd_u64(GPUVOLATILE uint64_t* addr, uint64_t val){ - #ifdef XO_CONTEXT_CUDA - return atomicAdd(addr, val); - #else // XO_CONTEXT_CL - return atom_add(addr, val); - #endif // XO_CONTEXT_CUDA / XO_CONTEXT_CL - } - - // ---------------- 32-bit: float (built-in or CAS on 32-bit word) ----------- - GPUFUN float atomicAdd_f32(GPUVOLATILE float* addr, float val){ - #ifdef XO_CONTEXT_CUDA - return atomicAdd(addr, val); - #else // XO_CONTEXT_CL - uint32_t old, nw; - do { - old = __XT_AS_U32_FROM_F32(*addr); - nw = __XT_AS_U32_FROM_F32(__XT_AS_F32_FROM_U32(old) + val); - } while (__XT_CAS_U32((GPUVOLATILE uint32_t*)addr, old, nw) != old); - return __XT_AS_F32_FROM_U32(old); - #endif // XO_CONTEXT_CUDA / XO_CONTEXT_CL - } - - // ---------------- 64-bit: float (built-in or CAS on 64-bit word) ----------- - GPUFUN double atomicAdd_f64(GPUVOLATILE double* addr, double val){ - #if __CUDA_ARCH__ >= 600 - return atomicAdd(addr, val); - #else // XO_CONTEXT_CL || __CUDA_ARCH__ < 600 - uint64_t old, nw; - do { - old = __XT_AS_U64_FROM_F64(*addr); - nw = __XT_AS_U64_FROM_F64(__XT_AS_F64_FROM_U64(old) + val); - } while (__XT_CAS_U64((GPUVOLATILE uint64_t*)addr, old, nw) != old); - return __XT_AS_F64_FROM_U64(old); - #endif // __CUDA_ARCH__ >= 600 / XO_CONTEXT_CL - } -#endif // defined(XO_CONTEXT_CUDA) || defined(XO_CONTEXT_CL) - -// Define the overloaded function -// ------------------------------ -#ifdef XO_CONTEXT_CUDA - // NVRTC (CuPy RawModule) usually compiles under extern "C". - // In C, function overloading is not possible, but we can cheat by doing it in - // C++ (with a different name to avoid clashes with the built-in atomicAdd). - // This function will then be remapped to atomicAdd() via a macro in C. - #ifdef __cplusplus - extern "C++" { - - GPUFUN int8_t xt_atomicAdd(GPUVOLATILE int8_t* p, int8_t v) { return atomicAdd_i8 (p, v); } - GPUFUN uint8_t xt_atomicAdd(GPUVOLATILE uint8_t* p, uint8_t v) { return atomicAdd_u8 (p, v); } - GPUFUN int16_t xt_atomicAdd(GPUVOLATILE int16_t* p, int16_t v) { return atomicAdd_i16(p, v); } - GPUFUN uint16_t xt_atomicAdd(GPUVOLATILE uint16_t*p, uint16_t v) { return atomicAdd_u16(p, v); } - GPUFUN int64_t xt_atomicAdd(GPUVOLATILE int64_t*p, int64_t v) { return atomicAdd_i64(p, v); } - - // Existing type definitions: forward to CUDA built-ins - GPUFUN int32_t xt_atomicAdd(GPUVOLATILE int32_t* p, int32_t v) { return ::atomicAdd(p, v); } - GPUFUN uint32_t xt_atomicAdd(GPUVOLATILE uint32_t* p, uint32_t v) { return ::atomicAdd(p, v); } - GPUFUN uint64_t xt_atomicAdd(GPUVOLATILE uint64_t* p, uint64_t v) { return ::atomicAdd(p, v); } - GPUFUN float xt_atomicAdd(GPUVOLATILE float* p, float v) { return ::atomicAdd(p, v); } - #if __CUDA_ARCH__ >= 600 - GPUFUN double xt_atomicAdd(GPUVOLATILE double* p, double v) { return ::atomicAdd(p, v); } - #else - GPUFUN double xt_atomicAdd(GPUVOLATILE double* p, double v) { return atomicAdd_f64(p, v); } - #endif - - } - #endif // __cplusplus - - // ---------- Global remap of the public name on device code ---------- - // Define AFTER the wrappers so we don't macro-rewrite our own calls. - #ifdef atomicAdd - #undef atomicAdd - #endif - #define atomicAdd(ptr, val) xt_atomicAdd((ptr), (val)) -#endif /* XO_CONTEXT_CUDA */ - -#ifdef XO_CONTEXT_CL - #if !__has_attribute(overloadable) - #error "The current OpenCL compiler/architecture does not support __attribute__((overloadable))" - #endif - #define OCL_OVERLOAD __attribute__((overloadable)) - GPUFUN int8_t OCL_OVERLOAD atomicAdd(GPUVOLATILE int8_t* p, int8_t v) { return atomicAdd_i8 (p, v); } - GPUFUN uint8_t OCL_OVERLOAD atomicAdd(GPUVOLATILE uint8_t* p, uint8_t v) { return atomicAdd_u8 (p, v); } - GPUFUN int16_t OCL_OVERLOAD atomicAdd(GPUVOLATILE int16_t* p, int16_t v) { return atomicAdd_i16(p, v); } - GPUFUN uint16_t OCL_OVERLOAD atomicAdd(GPUVOLATILE uint16_t*p, uint16_t v) { return atomicAdd_u16(p, v); } - GPUFUN int64_t OCL_OVERLOAD atomicAdd(GPUVOLATILE int64_t*p, int64_t v) { return atomicAdd_i64(p, v); } - GPUFUN float OCL_OVERLOAD atomicAdd(GPUVOLATILE float* p, float v) { return atomicAdd_f32(p, v); } - GPUFUN double OCL_OVERLOAD atomicAdd(GPUVOLATILE double* p, double v) { return atomicAdd_f64(p, v); } - - // Existing type definitions: forward to OpenCL built-ins - GPUFUN int32_t OCL_OVERLOAD atomicAdd(GPUVOLATILE int32_t* p, int32_t v) { return atomic_add(p, v); } - GPUFUN uint32_t OCL_OVERLOAD atomicAdd(GPUVOLATILE uint32_t* p, uint32_t v) { return atomic_add(p, v); } - GPUFUN uint64_t OCL_OVERLOAD atomicAdd(GPUVOLATILE uint64_t* p, uint64_t v) { return atom_add(p, v); } -#endif // XO_CONTEXT_CL - -#endif //_ATOMICADD_H_ +#endif // XTRACK_ATOMICADD_H_DEPRECATED diff --git a/xtrack/headers/constants.h b/xtrack/headers/constants.h index 9159b38ff..d876cc4df 100644 --- a/xtrack/headers/constants.h +++ b/xtrack/headers/constants.h @@ -58,4 +58,20 @@ #define MASS_ELECTRON (9.1093837e-31) #endif /* !defined( MASS_ELECTRON ) */ +#if !defined( SQRT3 ) + #define SQRT3 1.732050807568877 +#endif /* !defined( SQRT3 ) */ + +#if !defined( SQRT2 ) + #define SQRT2 (1.414213562373095048801688724209698078569671875376948073176679738) +#endif /* !defined( SQRT2 ) */ + +#if !defined( TWO_OVER_SQRT_PI ) + #define TWO_OVER_SQRT_PI (1.128379167095512573896158903121545171688101258657997713688171443418) +#endif /* !defined( TWO_OVER_SQRT_PI ) */ + +#if !defined( ALPHA_EM ) + #define ALPHA_EM 0.0072973525693 +#endif /* !defined( ALPHA_EM ) */ + #endif /* XTRACK_CONSTANTS_H */ diff --git a/xtrack/headers/track.h b/xtrack/headers/track.h index d803bf46b..b55b4b6bc 100644 --- a/xtrack/headers/track.h +++ b/xtrack/headers/track.h @@ -1,7 +1,14 @@ +// copyright ############################### // +// This file is part of the Xtrack Package. // +// Copyright (c) CERN, 2025. // +// ######################################### // + #ifndef XTRACK_TRACK_H #define XTRACK_TRACK_H -#include +#include "xobjects/headers/common.h" +#include "xobjects/headers/atomicadd.h" +#include "xtrack/headers/constants.h" /* The particle tracking "decorators" for all the contexts. @@ -22,12 +29,6 @@ #define END_PER_PARTICLE_BLOCK \ } \ } - - #define VECTORIZE_OVER(INDEX_NAME, COUNT) \ - for (int INDEX_NAME = 0; INDEX_NAME < (COUNT); INDEX_NAME++) { - - #define END_VECTORIZE \ - } #endif // XO_CONTEXT_CPU_SERIAL #ifdef XO_CONTEXT_CPU_OPENMP @@ -48,14 +49,6 @@ } \ } \ } - - #define VECTORIZE_OVER(INDEX_NAME, COUNT) \ - _Pragma("omp parallel for") \ - for (int INDEX_NAME = 0; INDEX_NAME < (COUNT); INDEX_NAME++) { - - #define END_VECTORIZE \ - } - #endif // XO_CONTEXT_CPU_OPENMP @@ -67,14 +60,6 @@ #define END_PER_PARTICLE_BLOCK \ } - - #define VECTORIZE_OVER(INDEX_NAME, COUNT) { \ - int INDEX_NAME = blockDim.x * blockIdx.x + threadIdx.x; \ - if (INDEX_NAME < (COUNT)) { - - #define END_VECTORIZE \ - } \ - } #endif // XO_CONTEXT_CUDA @@ -86,54 +71,9 @@ #define END_PER_PARTICLE_BLOCK \ } - - #define VECTORIZE_OVER(INDEX_NAME, COUNT) \ - { \ - int INDEX_NAME = get_global_id(0); - - #define END_VECTORIZE \ - } #endif // XO_CONTEXT_CL -/* - Qualifier keywords for GPU and optimisation -*/ - -#ifdef XO_CONTEXT_CPU // for both serial and OpenMP - #define GPUKERN - #define GPUFUN static inline - #define GPUGLMEM - #define RESTRICT restrict -#endif - - -#ifdef XO_CONTEXT_CUDA - #define GPUKERN __global__ - #define GPUFUN __device__ - #define GPUGLMEM - #define RESTRICT -#endif // XO_CONTEXT_CUDA - - -#ifdef XO_CONTEXT_CL - #define GPUKERN __kernel - #define GPUFUN - #define GPUGLMEM __global - #define RESTRICT -#endif // XO_CONTEXT_CL - - -/* - Common maths-related macros -*/ - -#define POW2(X) ((X)*(X)) -#define POW3(X) ((X)*(X)*(X)) -#define POW4(X) ((X)*(X)*(X)*(X)) -#define NONZERO(X) ((X) != 0.0) - - #ifndef START_PER_PARTICLE_BLOCK #error "Unknown context, or the expected context (XO_CONTEXT_*) flag undefined. Try updating Xobjects?" #endif diff --git a/xtrack/internal_record.py b/xtrack/internal_record.py index 4aa1d78b6..fe4f1560f 100644 --- a/xtrack/internal_record.py +++ b/xtrack/internal_record.py @@ -33,8 +33,8 @@ ''' _RecordIndex_get_slot_source = r''' -#include -#include +#include "xobjects/headers/common.h" +#include "xobjects/headers/atomicadd.h" GPUFUN int64_t RecordIndex_get_slot(RecordIndex record_index){ diff --git a/xtrack/monitors/beam_position_monitor.h b/xtrack/monitors/beam_position_monitor.h index 81e50569e..f8821eb6c 100644 --- a/xtrack/monitors/beam_position_monitor.h +++ b/xtrack/monitors/beam_position_monitor.h @@ -9,7 +9,7 @@ #ifndef XTRACK_BEAM_POSITION_MONITOR_H #define XTRACK_BEAM_POSITION_MONITOR_H -#include +#include "xtrack/headers/track.h" GPUFUN diff --git a/xtrack/monitors/beam_position_monitor.py b/xtrack/monitors/beam_position_monitor.py index 65b3e8ba9..3780261ab 100644 --- a/xtrack/monitors/beam_position_monitor.py +++ b/xtrack/monitors/beam_position_monitor.py @@ -12,7 +12,6 @@ from ..base_element import BeamElement from ..beam_elements import Marker from ..internal_record import RecordIndex -from ..general import _pkg_root class BeamPositionMonitorRecord(xo.Struct): @@ -40,8 +39,7 @@ class BeamPositionMonitor(BeamElement): properties = [field.name for field in BeamPositionMonitorRecord._fields] _extra_c_sources = [ - _pkg_root.joinpath('headers/atomicadd.h'), - _pkg_root.joinpath('monitors/beam_position_monitor.h') + '#include "xtrack/monitors/beam_position_monitor.h"', ] def __init__(self, *, particle_id_range=None, particle_id_start=None, num_particles=None, diff --git a/xtrack/monitors/beam_profile_monitor.h b/xtrack/monitors/beam_profile_monitor.h index 37daff113..dd720c7e8 100644 --- a/xtrack/monitors/beam_profile_monitor.h +++ b/xtrack/monitors/beam_profile_monitor.h @@ -9,7 +9,7 @@ #ifndef XTRACK_BEAM_PROFILE_MONITOR_H #define XTRACK_BEAM_PROFILE_MONITOR_H -#include +#include "xtrack/headers/track.h" GPUFUN diff --git a/xtrack/monitors/beam_profile_monitor.py b/xtrack/monitors/beam_profile_monitor.py index 24c7853ed..d23727010 100644 --- a/xtrack/monitors/beam_profile_monitor.py +++ b/xtrack/monitors/beam_profile_monitor.py @@ -12,7 +12,6 @@ from ..base_element import BeamElement from ..beam_elements import Marker from ..internal_record import RecordIndex -from ..general import _pkg_root class BeamProfileMonitorRecord(xo.Struct): counts_x = xo.Float64[:] @@ -45,8 +44,7 @@ class BeamProfileMonitor(BeamElement): properties = [field.name for field in BeamProfileMonitorRecord._fields] _extra_c_sources = [ - _pkg_root.joinpath('headers/atomicadd.h'), - _pkg_root.joinpath('monitors/beam_profile_monitor.h') + '#include "xtrack/monitors/beam_profile_monitor.h"', ] diff --git a/xtrack/monitors/beam_size_monitor.h b/xtrack/monitors/beam_size_monitor.h index 66bfa23cf..cb4102bad 100644 --- a/xtrack/monitors/beam_size_monitor.h +++ b/xtrack/monitors/beam_size_monitor.h @@ -13,8 +13,8 @@ GPUFUN -void BeamSizeMonitor_track_local_particle(BeamSizeMonitorData el, LocalParticle* part0){ - +void BeamSizeMonitor_track_local_particle(BeamSizeMonitorData el, LocalParticle* part0) +{ // get parameters int64_t const start_at_turn = BeamSizeMonitorData_get_start_at_turn(el); int64_t particle_id_start = BeamSizeMonitorData_get_particle_id_start(el); @@ -44,19 +44,19 @@ void BeamSizeMonitor_track_local_particle(BeamSizeMonitorData el, LocalParticle* double x = LocalParticle_get_x(part); double y = LocalParticle_get_y(part); - /*gpuglmem*/ double* count = BeamSizeMonitorRecord_getp1_count(record, slot); + GPUGLMEM double* count = BeamSizeMonitorRecord_getp1_count(record, slot); atomicAdd(count, 1); - /*gpuglmem*/ double * x_sum = BeamSizeMonitorRecord_getp1_x_sum(record, slot); + GPUGLMEM double * x_sum = BeamSizeMonitorRecord_getp1_x_sum(record, slot); atomicAdd(x_sum, x); - /*gpuglmem*/ double * y_sum = BeamSizeMonitorRecord_getp1_y_sum(record, slot); + GPUGLMEM double * y_sum = BeamSizeMonitorRecord_getp1_y_sum(record, slot); atomicAdd(y_sum, y); - /*gpuglmem*/ double * x2_sum = BeamSizeMonitorRecord_getp1_x2_sum(record, slot); + GPUGLMEM double * x2_sum = BeamSizeMonitorRecord_getp1_x2_sum(record, slot); atomicAdd(x2_sum, x*x); - /*gpuglmem*/ double * y2_sum = BeamSizeMonitorRecord_getp1_y2_sum(record, slot); + GPUGLMEM double * y2_sum = BeamSizeMonitorRecord_getp1_y2_sum(record, slot); atomicAdd(y2_sum, y*y); } } diff --git a/xtrack/monitors/beam_size_monitor.py b/xtrack/monitors/beam_size_monitor.py index dff81bbb7..be66c4010 100644 --- a/xtrack/monitors/beam_size_monitor.py +++ b/xtrack/monitors/beam_size_monitor.py @@ -8,11 +8,10 @@ import numpy as np import xobjects as xo - from ..base_element import BeamElement from ..beam_elements import Marker from ..internal_record import RecordIndex -from ..general import _pkg_root + class BeamSizeMonitorRecord(xo.Struct): count = xo.Float64[:] @@ -40,8 +39,7 @@ class BeamSizeMonitor(BeamElement): properties = [field.name for field in BeamSizeMonitorRecord._fields] _extra_c_sources = [ - _pkg_root.joinpath('headers/atomicadd.h'), - _pkg_root.joinpath('monitors/beam_size_monitor.h') + '#include "xtrack/monitors/beam_size_monitor.h"', ] def __init__(self, *, particle_id_range=None, particle_id_start=None, num_particles=None, diff --git a/xtrack/monitors/particles_monitor.h b/xtrack/monitors/particles_monitor.h index d10faba96..5c889def4 100644 --- a/xtrack/monitors/particles_monitor.h +++ b/xtrack/monitors/particles_monitor.h @@ -48,8 +48,11 @@ void ParticlesMonitor_track_local_particle(ParticlesMonitorData el, } else if (n_repetitions > 1){ if (at_turn < start_at_turn){ - return; //only_for_context cuda opencl - break; //only_for_context cpu_serial cpu_openmp + #ifdef XO_CONTEXT_CPU + break; + #else + return; + #endif } int64_t const i_frame = (at_turn - start_at_turn) / repetition_period; if (i_frame < n_repetitions diff --git a/xtrack/multisetter/multisetter.h b/xtrack/multisetter/multisetter.h new file mode 100644 index 000000000..ce15d8bf0 --- /dev/null +++ b/xtrack/multisetter/multisetter.h @@ -0,0 +1,76 @@ +// copyright ############################### // +// This file is part of the Xtrack Package. // +// Copyright (c) CERN, 2025. // +// ######################################### // + +#ifndef XTRACK_MULTISETTER_H +#define XTRACK_MULTISETTER_H + +#include "xobjects/headers/common.h" + + +GPUKERN +void get_values_at_offsets_float64( + MultiSetterData data, + GPUGLMEM int8_t* buffer, + GPUGLMEM double* out){ + + int64_t num_offsets = MultiSetterData_len_offsets(data); + + VECTORIZE_OVER(ii, num_offsets); + int64_t offs = MultiSetterData_get_offsets(data, ii); + + double val = *((GPUGLMEM double*)(buffer + offs)); + out[ii] = val; + END_VECTORIZE; +} + +GPUKERN +void get_values_at_offsets_int64( + MultiSetterData data, + GPUGLMEM int8_t* buffer, + GPUGLMEM int64_t* out){ + + int64_t num_offsets = MultiSetterData_len_offsets(data); + + VECTORIZE_OVER(ii, num_offsets); + int64_t offs = MultiSetterData_get_offsets(data, ii); + + int64_t val = *((GPUGLMEM int64_t*)(buffer + offs)); + out[ii] = val; + END_VECTORIZE; +} + +GPUKERN +void set_values_at_offsets_float64( + MultiSetterData data, + GPUGLMEM int8_t* buffer, + GPUGLMEM double* input){ + + int64_t num_offsets = MultiSetterData_len_offsets(data); + + VECTORIZE_OVER(ii, num_offsets); + int64_t offs = MultiSetterData_get_offsets(data, ii); + + double val = input[ii]; + *((GPUGLMEM double*)(buffer + offs)) = val; + END_VECTORIZE; +} + +GPUKERN +void set_values_at_offsets_int64( + MultiSetterData data, + GPUGLMEM int8_t* buffer, + GPUGLMEM int64_t* input){ + + int64_t num_offsets = MultiSetterData_len_offsets(data); + + VECTORIZE_OVER(ii, num_offsets); + int64_t offs = MultiSetterData_get_offsets(data, ii); + + int64_t val = input[ii]; + *((GPUGLMEM int64_t*)(buffer + offs)) = val; + END_VECTORIZE; +} + +#endif /* XTRACK_MULTISETTER_H */ \ No newline at end of file diff --git a/xtrack/multisetter/multisetter.py b/xtrack/multisetter/multisetter.py index e963eece6..b3cdb1b1c 100644 --- a/xtrack/multisetter/multisetter.py +++ b/xtrack/multisetter/multisetter.py @@ -3,75 +3,6 @@ import xobjects as xo import xtrack as xt -source = """ - -/*gpukern*/ -void get_values_at_offsets_float64( - MultiSetterData data, - /*gpuglmem*/ int8_t* buffer, - /*gpuglmem*/ double* out){ - - int64_t num_offsets = MultiSetterData_len_offsets(data); - - for (int64_t ii = 0; ii < num_offsets; ii++) { //vectorize_over ii num_offsets - int64_t offs = MultiSetterData_get_offsets(data, ii); - - double val = *((/*gpuglmem*/ double*)(buffer + offs)); - out[ii] = val; - } //end_vectorize -} - -/*gpukern*/ -void get_values_at_offsets_int64( - MultiSetterData data, - /*gpuglmem*/ int8_t* buffer, - /*gpuglmem*/ int64_t* out){ - - int64_t num_offsets = MultiSetterData_len_offsets(data); - - for (int64_t ii = 0; ii < num_offsets; ii++) { //vectorize_over ii num_offsets - int64_t offs = MultiSetterData_get_offsets(data, ii); - - int64_t val = *((/*gpuglmem*/ int64_t*)(buffer + offs)); - out[ii] = val; - } //end_vectorize -} - -/*gpukern*/ -void set_values_at_offsets_float64( - MultiSetterData data, - /*gpuglmem*/ int8_t* buffer, - /*gpuglmem*/ double* input){ - - int64_t num_offsets = MultiSetterData_len_offsets(data); - - for (int64_t ii = 0; ii < num_offsets; ii++) { //vectorize_over ii num_offsets - int64_t offs = MultiSetterData_get_offsets(data, ii); - - double val = input[ii]; - *((/*gpuglmem*/ double*)(buffer + offs)) = val; - } //end_vectorize -} - -/*gpukern*/ -void set_values_at_offsets_int64( - MultiSetterData data, - /*gpuglmem*/ int8_t* buffer, - /*gpuglmem*/ int64_t* input){ - - int64_t num_offsets = MultiSetterData_len_offsets(data); - - for (int64_t ii = 0; ii < num_offsets; ii++) { //vectorize_over ii num_offsets - int64_t offs = MultiSetterData_get_offsets(data, ii); - - int64_t val = input[ii]; - *((/*gpuglmem*/ int64_t*)(buffer + offs)) = val; - } //end_vectorize -} - - - -""" class MultiSetter(xo.HybridClass): _xofields = { @@ -79,7 +10,7 @@ class MultiSetter(xo.HybridClass): } _extra_c_sources = [ - source, + '#include "xtrack/multisetter/multisetter.h"', ] _kernels = { diff --git a/xtrack/particles/local_particle_custom_api.h b/xtrack/particles/local_particle_custom_api.h new file mode 100644 index 000000000..19b9ce078 --- /dev/null +++ b/xtrack/particles/local_particle_custom_api.h @@ -0,0 +1,289 @@ +// copyright ############################### // +// This file is part of the Xtrack Package. // +// Copyright (c) CERN, 2025. // +// ######################################### // + +#ifndef XTRACK_LOCAL_PARTICLE_CUSTOM_API_H +#define XTRACK_LOCAL_PARTICLE_CUSTOM_API_H + +#include "xtrack/headers/track.h" + + +GPUFUN +double LocalParticle_get_energy0(LocalParticle* part) { + double const p0c = LocalParticle_get_p0c(part); + double const m0 = LocalParticle_get_mass0(part); + return sqrt(p0c * p0c + m0 * m0); +} + + +GPUFUN +void LocalParticle_update_ptau(LocalParticle* part, double new_ptau_value) { + double const beta0 = LocalParticle_get_beta0(part); + double const ptau = new_ptau_value; + double const irpp = sqrt(ptau * ptau + 2 * ptau / beta0 + 1); + double const new_rpp = 1. / irpp; + double const new_rvv = irpp / (1 + beta0 * ptau); + + LocalParticle_set_delta(part, irpp - 1); + LocalParticle_set_rvv(part, new_rvv); + LocalParticle_set_ptau(part, ptau); + LocalParticle_set_rpp(part, new_rpp); +} + + +GPUFUN +void LocalParticle_update_delta(LocalParticle* part, double new_delta_value) { + double const beta0 = LocalParticle_get_beta0(part); + double const delta_beta0 = new_delta_value * beta0; + double const ptau_beta0 = sqrt(delta_beta0 * delta_beta0 + 2 * delta_beta0 * beta0 + 1) - 1; + double const one_plus_delta = 1 + new_delta_value; + double const rvv = (one_plus_delta) / (1 + ptau_beta0); + double const rpp = 1 / one_plus_delta; + double const ptau = ptau_beta0 / beta0; + + LocalParticle_set_delta(part, new_delta_value); + LocalParticle_set_rvv(part, rvv); + LocalParticle_set_rpp(part, rpp); + LocalParticle_set_ptau(part, ptau); +} + + +GPUFUN +double LocalParticle_get_pzeta(LocalParticle* part) { + double const ptau = LocalParticle_get_ptau(part); + double const beta0 = LocalParticle_get_beta0(part); + return ptau / beta0; +} + + +GPUFUN +void LocalParticle_update_pzeta(LocalParticle* part, double new_pzeta_value) { + double const beta0 = LocalParticle_get_beta0(part); + LocalParticle_update_ptau(part, beta0 * new_pzeta_value); +} + + +GPUFUN +void increment_at_element(LocalParticle* part0, int64_t const increment) { + START_PER_PARTICLE_BLOCK(part0, part); + LocalParticle_add_to_at_element(part, increment); + END_PER_PARTICLE_BLOCK; +} + + +GPUFUN +void increment_at_turn(LocalParticle* part0, int flag_reset_s){ + START_PER_PARTICLE_BLOCK(part0, part); + LocalParticle_add_to_at_turn(part, 1); + LocalParticle_set_at_element(part, 0); + if (flag_reset_s > 0) { + LocalParticle_set_s(part, 0.); + } + END_PER_PARTICLE_BLOCK; +} + + +GPUFUN +void increment_at_turn_backtrack( + LocalParticle* part0, + int flag_reset_s, + double const line_length, + int64_t const num_elements +) { + START_PER_PARTICLE_BLOCK(part0, part); + LocalParticle_add_to_at_turn(part, -1); + LocalParticle_set_at_element(part, num_elements); + if (flag_reset_s > 0) { + LocalParticle_set_s(part, line_length); + } + END_PER_PARTICLE_BLOCK; +} + + +/* check_is_active has different implementation on CPU and GPU */ +#if defined(XO_CONTEXT_CPU_SERIAL) + + GPUFUN + int64_t check_is_active(LocalParticle* part) { + int64_t ipart = 0; + while (ipart < part->_num_active_particles){ + #ifdef XSUITE_RESTORE_LOSS + ipart++; + #else + if (part->state[ipart] < 1) { + LocalParticle_exchange(part, ipart, part->_num_active_particles - 1); + part->_num_active_particles--; + part->_num_lost_particles++; + } else { + ipart++; + } + #endif + } + + if (part->_num_active_particles == 0){ + return 0; //All particles lost + } else { + return 1; //Some stable particles are still present + } + } + +#elif defined(XO_CONTEXT_CPU_OPENMP) + + GPUFUN + int64_t check_is_active(LocalParticle* part) { + #ifndef SKIP_SWAPS + int64_t ipart = part->ipart; + int64_t endpart = part->endpart; + int64_t left = ipart; + int64_t right = endpart - 1; + int64_t swap_made = 0; + int64_t has_alive = 0; + + if (left == right) + return part->state[left] > 0; + + while (left < right) { + if (part->state[left] > 0) { + left++; + has_alive = 1; + } + else if (part->state[right] <= 0) + right--; + else { + LocalParticle_exchange(part, left, right); + left++; + right--; + swap_made = 1; + } + } + return swap_made || has_alive; + #else + return 1; + #endif + } + + + GPUFUN + void count_reorganized_particles(LocalParticle* part) { + int64_t num_active = 0; + int64_t num_lost = 0; + + for (int64_t i = part->ipart; i < part->endpart; i++) { + if (part->state[i] <= -999999999) + break; + else if (part->state[i] > 0) + num_active++; + else + num_lost++; + } + + part->_num_active_particles = num_active; + part->_num_lost_particles = num_lost; + } + +#else // not XO_CONTEXT_CPU_SERIAL and not XO_CONTEXT_CPU_OPENMP + + GPUFUN + int64_t check_is_active(LocalParticle* part) { + return LocalParticle_get_state(part) > 0; + }; + +#endif + + +GPUFUN +void LocalParticle_add_to_energy(LocalParticle* part, double delta_energy, int pz_only ) +{ + double ptau = LocalParticle_get_ptau(part); + double const p0c = LocalParticle_get_p0c(part); + double const charge_ratio = LocalParticle_get_charge_ratio(part); + double const chi = LocalParticle_get_chi(part); + double const mass_ratio = charge_ratio / chi; + + ptau += delta_energy / p0c / mass_ratio; + + double const old_rpp = LocalParticle_get_rpp(part); + + LocalParticle_update_ptau(part, ptau); + + if (!pz_only) { + double const new_rpp = LocalParticle_get_rpp(part); + double const f = old_rpp / new_rpp; + LocalParticle_scale_px(part, f); + LocalParticle_scale_py(part, f); + } +} + + +GPUFUN +void LocalParticle_update_p0c(LocalParticle* part, double new_p0c_value) +{ + double const mass0 = LocalParticle_get_mass0(part); + double const old_p0c = LocalParticle_get_p0c(part); + double const old_delta = LocalParticle_get_delta(part); + double const old_beta0 = LocalParticle_get_beta0(part); + + double const ppc = old_p0c * old_delta + old_p0c; + double const new_delta = (ppc - new_p0c_value) / new_p0c_value; + + double const new_energy0 = sqrt(new_p0c_value * new_p0c_value + mass0 * mass0); + double const new_beta0 = new_p0c_value / new_energy0; + double const new_gamma0 = new_energy0 / mass0; + + LocalParticle_set_p0c(part, new_p0c_value); + LocalParticle_set_gamma0(part, new_gamma0); + LocalParticle_set_beta0(part, new_beta0); + + LocalParticle_update_delta(part, new_delta); + + LocalParticle_scale_px(part, old_p0c / new_p0c_value); + LocalParticle_scale_py(part, old_p0c / new_p0c_value); + + LocalParticle_scale_zeta(part, new_beta0 / old_beta0); + +} + +GPUFUN +void LocalParticle_kill_particle(LocalParticle* part, int64_t kill_state) { + LocalParticle_set_x(part, 1e30); + LocalParticle_set_px(part, 1e30); + LocalParticle_set_y(part, 1e30); + LocalParticle_set_py(part, 1e30); + LocalParticle_set_zeta(part, 1e30); + LocalParticle_update_delta(part, -1); // zero energy + LocalParticle_set_state(part, kill_state); +} + + +#ifdef XTRACK_GLOBAL_XY_LIMIT + + GPUFUN + void global_aperture_check(LocalParticle* part0) + { + if (LocalParticle_check_track_flag(part0, XS_FLAG_IGNORE_GLOBAL_APERTURE)) + { + return; + } + + START_PER_PARTICLE_BLOCK(part0, part); + double const x = LocalParticle_get_x(part); + double const y = LocalParticle_get_y(part); + + int64_t const is_alive = (int64_t)( + (x >= -XTRACK_GLOBAL_XY_LIMIT) && + (x <= XTRACK_GLOBAL_XY_LIMIT) && + (y >= -XTRACK_GLOBAL_XY_LIMIT) && + (y <= XTRACK_GLOBAL_XY_LIMIT) + ); + + // I assume that if I am in the function is because + if (!is_alive) { + LocalParticle_set_state(part, -1); + } + END_PER_PARTICLE_BLOCK; + } + +#endif + +#endif /* XTRACK_LOCAL_PARTICLE_CUSTOM_API_H */ diff --git a/xtrack/particles/particles.py b/xtrack/particles/particles.py index eca5b324a..b7a5438b9 100644 --- a/xtrack/particles/particles.py +++ b/xtrack/particles/particles.py @@ -23,69 +23,398 @@ LAST_INVALID_STATE = -999999999 +size_vars = ( + (xo.Int64, '_capacity'), + (xo.Int64, '_num_active_particles'), + (xo.Int64, '_num_lost_particles'), + (xo.Int64, 'start_tracking_at_element'), +) +# Capacity is always kept up to date +# the other two are placeholders to be used if needed +# i.e. on ContextCpu + +scalar_vars = ( + (xo.Float64, 'q0'), + (xo.Float64, 'mass0'), + (xo.Float64, 't_sim'), +) + +part_energy_vars = ( + (xo.Float64, 'ptau'), + (xo.Float64, 'delta'), + (xo.Float64, 'rpp'), + (xo.Float64, 'rvv'), +) + +per_particle_vars = ( + ( + (xo.Float64, 'p0c'), + (xo.Float64, 'gamma0'), + (xo.Float64, 'beta0'), + (xo.Float64, 's'), + (xo.Float64, 'zeta'), + (xo.Float64, 'x'), + (xo.Float64, 'y'), + (xo.Float64, 'px'), + (xo.Float64, 'py'), + ) + + part_energy_vars + + ( + (xo.Float64, 'chi'), + (xo.Float64, 'charge_ratio'), + (xo.Float64, 'weight'), + (xo.Float64, 'ax'), + (xo.Float64, 'ay'), + (xo.Float64, 'spin_x'), + (xo.Float64, 'spin_y'), + (xo.Float64, 'spin_z'), + (xo.Float64, 'anomalous_magnetic_moment'), + (xo.Int64, 'pdg_id'), + (xo.Int64, 'particle_id'), + (xo.Int64, 'at_element'), + (xo.Int64, 'at_turn'), + (xo.Int64, 'state'), + (xo.Int64, 'parent_particle_id'), + (xo.UInt32, '_rng_s1'), + (xo.UInt32, '_rng_s2'), + (xo.UInt32, '_rng_s3'), + (xo.UInt32, '_rng_s4') + ) +) -class Particles(xo.HybridClass): - _cname = 'ParticlesData' - size_vars = ( - (xo.Int64, '_capacity'), - (xo.Int64, '_num_active_particles'), - (xo.Int64, '_num_lost_particles'), - (xo.Int64, 'start_tracking_at_element'), +def gen_local_particle_api(): + src_lines = [ + '#include "xobjects/headers/common.h"', + '#include "xtrack/particles/rng_src/base_rng.h"', + '#include "xtrack/particles/rng_src/particles_rng.h"', + ] + for name, mass in mass__dict__.items(): + if name.endswith('_MASS_EV'): + src_lines.append(f'#define {name} {mass}') + + src_lines.append('typedef struct {') + + for tt, vv in size_vars + scalar_vars: + src_lines.append(' ' + tt._c_type + ' ' + vv + ';') + + for tt, vv in per_particle_vars: + src_lines.append(' GPUGLMEM ' + tt._c_type + '* ' + vv + ';') + + src_lines.append(' int64_t ipart;') + src_lines.append(' int64_t endpart;') + src_lines.append(' uint64_t track_flags;') + src_lines.append(' GPUGLMEM int8_t* io_buffer;') + src_lines.append('} LocalParticle;') + src_typedef = '\n'.join(src_lines) + + # Get io buffer + src_lines = [] + src_lines.append( + ''' + GPUFUN + GPUGLMEM int8_t* LocalParticle_get_io_buffer(LocalParticle* part){ + return part->io_buffer; + } + + ''' ) - # Capacity is always kept up to date - # the other two are placeholders to be used if needed - # i.e. on ContextCpu - - scalar_vars = ( - (xo.Float64, 'q0'), - (xo.Float64, 'mass0'), - (xo.Float64, 't_sim'), + + # Get track flag + src_lines.append( + ''' + GPUFUN + uint64_t LocalParticle_check_track_flag(LocalParticle* part, uint8_t index){ + return (part->track_flags >> index) & 1; + } + ''' ) - part_energy_vars = ( - (xo.Float64, 'ptau'), - (xo.Float64, 'delta'), - (xo.Float64, 'rpp'), - (xo.Float64, 'rvv'), + # Particles_to_LocalParticle + src_lines.append( + ''' + GPUFUN + void Particles_to_LocalParticle(ParticlesData source, + LocalParticle* dest, + int64_t id, + int64_t eid){''' + ) + for _, vv in size_vars + scalar_vars: + src_lines.append( + f' dest->{vv} = ParticlesData_get_' + vv + '(source);' + ) + + for _, vv in per_particle_vars: + src_lines.append( + f' dest->{vv} = ParticlesData_getp1_' + vv + '(source, 0);' + ) + + src_lines.append(' dest->ipart = id;') + src_lines.append(' dest->endpart = eid;') + src_lines.append('}') + src_particles_to_local = '\n'.join(src_lines) + + # LocalParticle_to_Particles + src_lines = [] + src_lines.append( + ''' + GPUFUN + void LocalParticle_to_Particles(LocalParticle* source, + ParticlesData dest, + int64_t id, + int64_t set_scalar){''' ) + src_lines.append('if (set_scalar){') + for _, vv in size_vars + scalar_vars: + src_lines.append( + ' ParticlesData_set_' + vv + '(dest,' + f' LocalParticle_get_{vv}(source));' + ) + src_lines.append('}') + + for _, vv in per_particle_vars: + src_lines.append( + ' ParticlesData_set_' + vv + '(dest, id, ' + f' LocalParticle_get_{vv}(source));' + ) + src_lines.append('}') + src_local_to_particles = '\n'.join(src_lines) + + # Adders + src_lines = [] + for tt, vv in per_particle_vars: + src_lines.append( + ''' + GPUFUN + void LocalParticle_add_to_''' + vv + f'(LocalParticle* part, {tt._c_type} value)' + + '{' + ) + src_lines.append(f'#ifndef FREEZE_VAR_{vv}') + src_lines.append(f' part->{vv}[part->ipart] += value;') + src_lines.append('#endif') + src_lines.append('}\n') + src_adders = '\n'.join(src_lines) + + # Scalers + src_lines = [] + for tt, vv in per_particle_vars: + src_lines.append( + ''' + GPUFUN + void LocalParticle_scale_''' + vv + f'(LocalParticle* part, {tt._c_type} value)' + + '{' + ) + src_lines.append(f'#ifndef FREEZE_VAR_{vv}') + src_lines.append(f' part->{vv}[part->ipart] *= value;') + src_lines.append('#endif') + src_lines.append('}\n') + src_scalers = '\n'.join(src_lines) + + # Setters + src_lines = [] + for tt, vv in per_particle_vars: + src_lines.append( + ''' + GPUFUN + void LocalParticle_set_''' + vv + f'(LocalParticle* part, {tt._c_type} value)' + + '{' + ) + src_lines.append(f'#ifndef FREEZE_VAR_{vv}') + src_lines.append(f' part->{vv}[part->ipart] = value;') + src_lines.append('#endif') + src_lines.append('}') + src_setters = '\n'.join(src_lines) + + # Getters + src_lines = [] - per_particle_vars = ( - ( - (xo.Float64, 'p0c'), - (xo.Float64, 'gamma0'), - (xo.Float64, 'beta0'), - (xo.Float64, 's'), - (xo.Float64, 'zeta'), - (xo.Float64, 'x'), - (xo.Float64, 'y'), - (xo.Float64, 'px'), - (xo.Float64, 'py'), + for tt, vv in size_vars + scalar_vars: + src_lines.append('GPUFUN') + src_lines.append( + f'{tt._c_type} LocalParticle_get_' + vv + + '(LocalParticle* part)' + + '{' + ) + src_lines.append(f' return part->{vv};') + src_lines.append('}') + + for tt, vv in per_particle_vars: + src_lines.append('GPUFUN') + src_lines.append( + f'{tt._c_type} LocalParticle_get_' + vv + + '(LocalParticle* part)' + + '{' + ) + src_lines.append(f' return part->{vv}[part->ipart];') + src_lines.append('}') + + src_getters = '\n'.join(src_lines) + + # Angles + src_angles_lines = [] + for exact in ['', 'exact_']: + # xp (py as transverse) and vice versa + for xx, yy in [['x', 'y'], ['y', 'x']]: + # Getter + src_angles_lines.append('GPUFUN') + src_angles_lines.append( + f'double LocalParticle_get_{exact}{xx}p(LocalParticle* part){{' + ) + src_angles_lines.append( + f' double const p{xx} = LocalParticle_get_p{xx}(part);' + ) + if exact == 'exact_': + src_angles_lines.append( + f' double const p{yy} = LocalParticle_get_p{yy}(part);' + ) + src_angles_lines.append( + ' double const one_plus_delta = 1. + LocalParticle_get_delta(part);' + ) + src_angles_lines.append( + ' double const rpp = 1./sqrt(one_plus_delta*one_plus_delta - px*px - py*py);' + ) + else: + src_angles_lines.append( + ' double const rpp = LocalParticle_get_rpp(part);' + ) + src_angles_lines.append( + ' // INFO: this is not the angle, but sin(angle)' + ) + src_angles_lines.append(f' return p{xx}*rpp;') + src_angles_lines.append('}') + src_angles_lines.append('') + + for xx, yy in [['x', 'y'], ['y', 'x']]: + # Setter + src_angles_lines.append('GPUFUN') + src_angles_lines.append( + f'void LocalParticle_set_{exact}{xx}p(LocalParticle* part, double {xx}p){{' + ) + src_angles_lines.append(f'#ifndef FREEZE_VAR_p{xx}') + src_angles_lines.append( + ' double rpp = LocalParticle_get_rpp(part);' + ) + if exact == 'exact_': + src_angles_lines.append( + f' // Careful! If {yy}p also changes, use LocalParticle_set_{exact}xp_yp!' + ) + src_angles_lines.append( + f' double const {yy}p = LocalParticle_get_{exact}{yy}p(part);' + ) + src_angles_lines.append(' rpp *= sqrt(1 + xp*xp + yp*yp);') + src_angles_lines.append( + f' // INFO: {xx}p is not the angle, but sin(angle)' + ) + src_angles_lines.append( + f' LocalParticle_set_p{xx}(part, {xx}p/rpp);' + ) + src_angles_lines.append('#endif') + src_angles_lines.append('}') + src_angles_lines.append('') + + for xx, yy in [['x', 'y'], ['y', 'x']]: + # Adder + src_angles_lines.append('GPUFUN') + src_angles_lines.append( + f'void LocalParticle_add_to_{exact}{xx}p(LocalParticle* part, double {xx}p){{' + ) + src_angles_lines.append(f'#ifndef FREEZE_VAR_p{xx}') + src_angles_lines.append( + f' LocalParticle_set_{exact}{xx}p(part, ' + + f'LocalParticle_get_{exact}{xx}p(part) + {xx}p);' + ) + src_angles_lines.append('#endif') + src_angles_lines.append('}') + src_angles_lines.append('') + # Scaler + src_angles_lines.append('GPUFUN') + src_angles_lines.append( + f'void LocalParticle_scale_{exact}{xx}p(LocalParticle* part, double value){{' + ) + src_angles_lines.append(f'#ifndef FREEZE_VAR_p{xx}') + src_angles_lines.append( + f' LocalParticle_set_{exact}{xx}p(part, ' + + f'LocalParticle_get_{exact}{xx}p(part) * value);' + ) + src_angles_lines.append('#endif') + src_angles_lines.append('}') + src_angles_lines.append('') + # Double setter, adder, scaler + src_angles_lines.append('GPUFUN') + src_angles_lines.append( + f'void LocalParticle_set_{exact}xp_yp(LocalParticle* part, double xp, double yp){{' + ) + src_angles_lines.append(' double rpp = LocalParticle_get_rpp(part);') + if exact == 'exact_': + src_angles_lines.append(' rpp *= sqrt(1 + xp*xp + yp*yp);') + for xx in ['x', 'y']: + src_angles_lines.append(f'#ifndef FREEZE_VAR_p{xx}') + src_angles_lines.append( + f' LocalParticle_set_p{xx}(part, {xx}p/rpp);' + ) + src_angles_lines.append('#endif') + src_angles_lines.append('}') + src_angles_lines.append('') + src_angles_lines.append('GPUFUN') + src_angles_lines.append( + f'void LocalParticle_add_to_{exact}xp_yp(LocalParticle* part, double xp, double yp){{' + ) + src_angles_lines.append( + f' LocalParticle_set_{exact}xp_yp(part, ' + + f'LocalParticle_get_{exact}xp(part) + xp, ' + + f'LocalParticle_get_{exact}yp(part) + yp);' + ) + src_angles_lines.append('}') + src_angles_lines.append('') + src_angles_lines.append('GPUFUN') + src_angles_lines.append( + f'void LocalParticle_scale_{exact}xp_yp(LocalParticle* part, double value_x, double value_y){{' ) - + part_energy_vars + - ( - (xo.Float64, 'chi'), - (xo.Float64, 'charge_ratio'), - (xo.Float64, 'weight'), - (xo.Float64, 'ax'), - (xo.Float64, 'ay'), - (xo.Float64, 'spin_x'), - (xo.Float64, 'spin_y'), - (xo.Float64, 'spin_z'), - (xo.Float64, 'anomalous_magnetic_moment'), - (xo.Int64, 'pdg_id'), - (xo.Int64, 'particle_id'), - (xo.Int64, 'at_element'), - (xo.Int64, 'at_turn'), - (xo.Int64, 'state'), - (xo.Int64, 'parent_particle_id'), - (xo.UInt32, '_rng_s1'), - (xo.UInt32, '_rng_s2'), - (xo.UInt32, '_rng_s3'), - (xo.UInt32, '_rng_s4') + src_angles_lines.append( + f' LocalParticle_set_{exact}xp_yp(part, ' + + f'LocalParticle_get_{exact}xp(part) * value_x, ' + + f'LocalParticle_get_{exact}yp(part) * value_y);' ) + src_angles_lines.append('}') + src_angles = '\n'.join(src_angles_lines) + + # Particle exchangers + src_exchange = ''' + GPUFUN + void LocalParticle_exchange(LocalParticle* part, int64_t i1, int64_t i2){ + ''' + for tt, vv in per_particle_vars: + src_exchange += '\n'.join( + [ + '\n {', + f' {tt._c_type} temp = part->{vv}[i2];', + f' part->{vv}[i2] = part->{vv}[i1];', + f' part->{vv}[i1] = temp;', + ' }'] + ) + src_exchange += '}\n' + + source = '\n\n'.join( + [src_typedef, src_adders, src_getters, + src_setters, src_scalers, src_exchange, + src_particles_to_local, src_local_to_particles, + src_angles] ) + source += """ + #include "xtrack/particles/local_particle_custom_api.h" + """ + return source + + +class Particles(xo.HybridClass): + _cname = 'ParticlesData' + + size_vars = size_vars + scalar_vars = scalar_vars + part_energy_vars = part_energy_vars + per_particle_vars = per_particle_vars + _xofields = { **{nn: tt for tt, nn in size_vars + scalar_vars}, **{nn: tt[:] for tt, nn in per_particle_vars}, @@ -95,9 +424,7 @@ class Particles(xo.HybridClass): if (not vv[1].startswith('_') and vv[1] != 't_sim')] + ['t_sim'] _extra_c_sources = [ - Path(__file__).parent.joinpath('rng_src', 'base_rng.h'), - Path(__file__).parent.joinpath('rng_src', 'particles_rng.h'), - '\n /*placeholder_for_local_particle_src*/ \n' + gen_local_particle_api() ] _rename = { @@ -1471,551 +1798,6 @@ def add_to_energy(self, delta_energy): def set_particle(self, index, set_scalar_vars=False, **kwargs): raise NotImplementedError('This functionality has been removed') - @classmethod - def gen_local_particle_api(cls, mode='no_local_copy'): - if mode != 'no_local_copy': - raise NotImplementedError - - src_lines = [''] - for name, mass in mass__dict__.items(): - if name.endswith('_MASS_EV'): - src_lines.append(f'#define {name} {mass}') - - src_lines.append('typedef struct {') - - for tt, vv in cls.size_vars + cls.scalar_vars: - src_lines.append(' ' + tt._c_type + ' ' + vv + ';') - - for tt, vv in cls.per_particle_vars: - src_lines.append(' /*gpuglmem*/ ' + tt._c_type + '* ' + vv + ';') - - src_lines.append(' int64_t ipart;') - src_lines.append(' int64_t endpart;') - src_lines.append(' uint64_t track_flags;') - src_lines.append(' /*gpuglmem*/ int8_t* io_buffer;') - src_lines.append('} LocalParticle;') - src_typedef = '\n'.join(src_lines) - - # Get io buffer - src_lines = [] - src_lines.append(''' - /*gpufun*/ - /*gpuglmem*/ int8_t* LocalParticle_get_io_buffer(LocalParticle* part){ - return part->io_buffer; - } - - ''') - - # Get track flag - src_lines.append(''' - /*gpufun*/ - uint64_t LocalParticle_check_track_flag(LocalParticle* part, uint8_t index){ - return (part->track_flags >> index) & 1; - } - ''') - - # Particles_to_LocalParticle - src_lines.append(''' - /*gpufun*/ - void Particles_to_LocalParticle(ParticlesData source, - LocalParticle* dest, - int64_t id, - int64_t eid){''') - for _, vv in cls.size_vars + cls.scalar_vars: - src_lines.append( - f' dest->{vv} = ParticlesData_get_' + vv + '(source);') - - for _, vv in cls.per_particle_vars: - src_lines.append( - f' dest->{vv} = ParticlesData_getp1_' + vv + '(source, 0);') - - src_lines.append(' dest->ipart = id;') - src_lines.append(' dest->endpart = eid;') - src_lines.append('}') - src_particles_to_local = '\n'.join(src_lines) - - # LocalParticle_to_Particles - src_lines = [] - src_lines.append(''' - /*gpufun*/ - void LocalParticle_to_Particles(LocalParticle* source, - ParticlesData dest, - int64_t id, - int64_t set_scalar){''') - src_lines.append('if (set_scalar){') - for _, vv in cls.size_vars + cls.scalar_vars: - src_lines.append( - ' ParticlesData_set_' + vv + '(dest,' - f' LocalParticle_get_{vv}(source));') - src_lines.append('}') - - for _, vv in cls.per_particle_vars: - src_lines.append( - ' ParticlesData_set_' + vv + '(dest, id, ' - f' LocalParticle_get_{vv}(source));') - src_lines.append('}') - src_local_to_particles = '\n'.join(src_lines) - - # Adders - src_lines = [] - for tt, vv in cls.per_particle_vars: - src_lines.append(''' - /*gpufun*/ - void LocalParticle_add_to_''' + vv + f'(LocalParticle* part, {tt._c_type} value)' - + '{') - src_lines.append(f'#ifndef FREEZE_VAR_{vv}') - src_lines.append(f' part->{vv}[part->ipart] += value;') - src_lines.append('#endif') - src_lines.append('}\n') - src_adders = '\n'.join(src_lines) - - # Scalers - src_lines = [] - for tt, vv in cls.per_particle_vars: - src_lines.append(''' - /*gpufun*/ - void LocalParticle_scale_''' + vv + f'(LocalParticle* part, {tt._c_type} value)' - + '{') - src_lines.append(f'#ifndef FREEZE_VAR_{vv}') - src_lines.append(f' part->{vv}[part->ipart] *= value;') - src_lines.append('#endif') - src_lines.append('}\n') - src_scalers = '\n'.join(src_lines) - - # Setters - src_lines = [] - for tt, vv in cls.per_particle_vars: - src_lines.append(''' - /*gpufun*/ - void LocalParticle_set_''' + vv + f'(LocalParticle* part, {tt._c_type} value)' - + '{') - src_lines.append(f'#ifndef FREEZE_VAR_{vv}') - src_lines.append(f' part->{vv}[part->ipart] = value;') - src_lines.append('#endif') - src_lines.append('}') - src_setters = '\n'.join(src_lines) - - # Getters - src_lines = [] - - for tt, vv in cls.size_vars + cls.scalar_vars: - src_lines.append('/*gpufun*/') - src_lines.append(f'{tt._c_type} LocalParticle_get_' + vv - + '(LocalParticle* part)' - + '{') - src_lines.append(f' return part->{vv};') - src_lines.append('}') - - for tt, vv in cls.per_particle_vars: - src_lines.append('/*gpufun*/') - src_lines.append(f'{tt._c_type} LocalParticle_get_' + vv - + '(LocalParticle* part)' - + '{') - src_lines.append(f' return part->{vv}[part->ipart];') - src_lines.append('}') - - src_getters = '\n'.join(src_lines) - - # Angles - src_angles_lines = [] - for exact in ['', 'exact_']: - # xp (py as transverse) and vice versa - for xx, yy in [['x', 'y'], ['y', 'x']]: - # Getter - src_angles_lines.append('/*gpufun*/') - src_angles_lines.append(f'double LocalParticle_get_{exact}{xx}p(LocalParticle* part){{') - src_angles_lines.append(f' double const p{xx} = LocalParticle_get_p{xx}(part);') - if exact == 'exact_': - src_angles_lines.append(f' double const p{yy} = LocalParticle_get_p{yy}(part);') - src_angles_lines.append(' double const one_plus_delta = 1. + LocalParticle_get_delta(part);') - src_angles_lines.append( - ' double const rpp = 1./sqrt(one_plus_delta*one_plus_delta - px*px - py*py);') - else: - src_angles_lines.append(' double const rpp = LocalParticle_get_rpp(part);') - src_angles_lines.append(' // INFO: this is not the angle, but sin(angle)') - src_angles_lines.append(f' return p{xx}*rpp;') - src_angles_lines.append('}') - src_angles_lines.append('') - - for xx, yy in [['x', 'y'], ['y', 'x']]: - # Setter - src_angles_lines.append('/*gpufun*/') - src_angles_lines.append(f'void LocalParticle_set_{exact}{xx}p(LocalParticle* part, double {xx}p){{') - src_angles_lines.append(f'#ifndef FREEZE_VAR_p{xx}') - src_angles_lines.append(' double rpp = LocalParticle_get_rpp(part);') - if exact == 'exact_': - src_angles_lines.append( - f' // Careful! If {yy}p also changes, use LocalParticle_set_{exact}xp_yp!') - src_angles_lines.append(f' double const {yy}p = LocalParticle_get_{exact}{yy}p(part);') - src_angles_lines.append(' rpp *= sqrt(1 + xp*xp + yp*yp);') - src_angles_lines.append(f' // INFO: {xx}p is not the angle, but sin(angle)') - src_angles_lines.append(f' LocalParticle_set_p{xx}(part, {xx}p/rpp);') - src_angles_lines.append('#endif') - src_angles_lines.append('}') - src_angles_lines.append('') - - for xx, yy in [['x', 'y'], ['y', 'x']]: - # Adder - src_angles_lines.append('/*gpufun*/') - src_angles_lines.append(f'void LocalParticle_add_to_{exact}{xx}p(LocalParticle* part, double {xx}p){{') - src_angles_lines.append(f'#ifndef FREEZE_VAR_p{xx}') - src_angles_lines.append(f' LocalParticle_set_{exact}{xx}p(part, ' - + f'LocalParticle_get_{exact}{xx}p(part) + {xx}p);') - src_angles_lines.append('#endif') - src_angles_lines.append('}') - src_angles_lines.append('') - # Scaler - src_angles_lines.append('/*gpufun*/') - src_angles_lines.append(f'void LocalParticle_scale_{exact}{xx}p(LocalParticle* part, double value){{') - src_angles_lines.append(f'#ifndef FREEZE_VAR_p{xx}') - src_angles_lines.append(f' LocalParticle_set_{exact}{xx}p(part, ' - + f'LocalParticle_get_{exact}{xx}p(part) * value);') - src_angles_lines.append('#endif') - src_angles_lines.append('}') - src_angles_lines.append('') - # Double setter, adder, scaler - src_angles_lines.append('/*gpufun*/') - src_angles_lines.append(f'void LocalParticle_set_{exact}xp_yp(LocalParticle* part, double xp, double yp){{') - src_angles_lines.append(' double rpp = LocalParticle_get_rpp(part);') - if exact == 'exact_': - src_angles_lines.append(' rpp *= sqrt(1 + xp*xp + yp*yp);') - for xx in ['x', 'y']: - src_angles_lines.append(f'#ifndef FREEZE_VAR_p{xx}') - src_angles_lines.append(f' LocalParticle_set_p{xx}(part, {xx}p/rpp);') - src_angles_lines.append('#endif') - src_angles_lines.append('}') - src_angles_lines.append('') - src_angles_lines.append('/*gpufun*/') - src_angles_lines.append( - f'void LocalParticle_add_to_{exact}xp_yp(LocalParticle* part, double xp, double yp){{') - src_angles_lines.append(f' LocalParticle_set_{exact}xp_yp(part, ' - + f'LocalParticle_get_{exact}xp(part) + xp, ' - + f'LocalParticle_get_{exact}yp(part) + yp);') - src_angles_lines.append('}') - src_angles_lines.append('') - src_angles_lines.append('/*gpufun*/') - src_angles_lines.append( - f'void LocalParticle_scale_{exact}xp_yp(LocalParticle* part, double value_x, double value_y){{') - src_angles_lines.append(f' LocalParticle_set_{exact}xp_yp(part, ' - + f'LocalParticle_get_{exact}xp(part) * value_x, ' - + f'LocalParticle_get_{exact}yp(part) * value_y);') - src_angles_lines.append('}') - src_angles = '\n'.join(src_angles_lines) - - # Particle exchangers - src_exchange = ''' - /*gpufun*/ - void LocalParticle_exchange(LocalParticle* part, int64_t i1, int64_t i2){ - ''' - for tt, vv in cls.per_particle_vars: - src_exchange += '\n'.join([ - '\n {', - f' {tt._c_type} temp = part->{vv}[i2];', - f' part->{vv}[i2] = part->{vv}[i1];', - f' part->{vv}[i1] = temp;', - ' }']) - src_exchange += '}\n' - - custom_source = ''' - /*gpufun*/ - double LocalParticle_get_energy0(LocalParticle* part){ - - double const p0c = LocalParticle_get_p0c(part); - double const m0 = LocalParticle_get_mass0(part); - - return sqrt( p0c * p0c + m0 * m0 ); - } - - /*gpufun*/ - void LocalParticle_update_ptau(LocalParticle* part, double new_ptau_value){ - - double const beta0 = LocalParticle_get_beta0(part); - - double const ptau = new_ptau_value; - - double const irpp = sqrt(ptau*ptau + 2*ptau/beta0 +1); - - double const new_rpp = 1./irpp; - LocalParticle_set_delta(part, irpp - 1.); - - double const new_rvv = irpp/(1 + beta0*ptau); - LocalParticle_set_rvv(part, new_rvv); - LocalParticle_set_ptau(part, ptau); - - LocalParticle_set_rpp(part, new_rpp ); - } - - /*gpufun*/ - void LocalParticle_update_delta(LocalParticle* part, double new_delta_value){ - double const beta0 = LocalParticle_get_beta0(part); - double const delta_beta0 = new_delta_value * beta0; - double const ptau_beta0 = sqrt( delta_beta0 * delta_beta0 + - 2. * delta_beta0 * beta0 + 1. ) - 1.; - - double const one_plus_delta = 1. + new_delta_value; - double const rvv = ( one_plus_delta ) / ( 1. + ptau_beta0 ); - double const rpp = 1. / one_plus_delta; - double const ptau = ptau_beta0 / beta0; - - LocalParticle_set_delta(part, new_delta_value); - - LocalParticle_set_rvv(part, rvv ); - LocalParticle_set_rpp(part, rpp ); - LocalParticle_set_ptau(part, ptau ); - - } - - /*gpufun*/ - double LocalParticle_get_pzeta(LocalParticle* part){ - - double const ptau = LocalParticle_get_ptau(part); - double const beta0 = LocalParticle_get_beta0(part); - - return ptau/beta0; - - } - - /*gpufun*/ - void LocalParticle_update_pzeta(LocalParticle* part, double new_pzeta_value){ - - double const beta0 = LocalParticle_get_beta0(part); - LocalParticle_update_ptau(part, beta0*new_pzeta_value); - - } - - /*gpufun*/ - void increment_at_element(LocalParticle* part0, int64_t const increment){ - - //start_per_particle_block (part0->part) - LocalParticle_add_to_at_element(part, increment); - //end_per_particle_block - - } - - /*gpufun*/ - void increment_at_turn(LocalParticle* part0, int flag_reset_s){ - - //start_per_particle_block (part0->part) - LocalParticle_add_to_at_turn(part, 1); - LocalParticle_set_at_element(part, 0); - if (flag_reset_s>0){ - LocalParticle_set_s(part, 0.); - } - //end_per_particle_block - } - - /*gpufun*/ - void increment_at_turn_backtrack(LocalParticle* part0, int flag_reset_s, - double const line_length, - int64_t const num_elements){ - - //start_per_particle_block (part0->part) - LocalParticle_add_to_at_turn(part, -1); - LocalParticle_set_at_element(part, num_elements); - if (flag_reset_s>0){ - LocalParticle_set_s(part, line_length); - } - //end_per_particle_block - } - - // check_is_active has different implementation on CPU and GPU - - #define CPU_SERIAL_IMPLEM //only_for_context cpu_serial - #define CPU_OMP_IMPLEM //only_for_context cpu_openmp - - #ifdef CPU_SERIAL_IMPLEM - - /*gpufun*/ - int64_t check_is_active(LocalParticle* part) { - int64_t ipart=0; - while (ipart < part->_num_active_particles){ - #ifdef XSUITE_RESTORE_LOSS - ipart++; - #else - if (part->state[ipart]<1){ - LocalParticle_exchange( - part, ipart, part->_num_active_particles-1); - part->_num_active_particles--; - part->_num_lost_particles++; - } - else{ - ipart++; - } - #endif - } - - if (part->_num_active_particles==0){ - return 0;//All particles lost - } else { - return 1; //Some stable particles are still present - } - } - - #else // not CPU_SERIAL_IMPLEM - #ifdef CPU_OMP_IMPLEM - - /*gpufun*/ - int64_t check_is_active(LocalParticle* part) { - #ifndef SKIP_SWAPS - int64_t ipart = part->ipart; - int64_t endpart = part->endpart; - - int64_t left = ipart; - int64_t right = endpart - 1; - int64_t swap_made = 0; - int64_t has_alive = 0; - - if (left == right) return part->state[left] > 0; - - while (left < right) { - if (part->state[left] > 0) { - left++; - has_alive = 1; - } - else if (part->state[right] <= 0) right--; - else { - LocalParticle_exchange(part, left, right); - left++; - right--; - swap_made = 1; - } - } - - return swap_made || has_alive; - #else - return 1; - #endif - } - - /*gpufun*/ - void count_reorganized_particles(LocalParticle* part) { - int64_t num_active = 0; - int64_t num_lost = 0; - - for (int64_t i = part->ipart; i < part->endpart; i++) { - if (part->state[i] <= -999999999) break; - else if (part->state[i] > 0) num_active++; - else num_lost++; - } - - part->_num_active_particles = num_active; - part->_num_lost_particles = num_lost; - } - - #else // not CPU_SERIAL_IMPLEM and not CPU_OMP_IMPLEM - - /*gpufun*/ - int64_t check_is_active(LocalParticle* part) { - return LocalParticle_get_state(part)>0; - }; - - #endif // CPU_OMP_IMPLEM - #endif // CPU_SERIAL_IMPLEM - - #undef CPU_SERIAL_IMPLEM //only_for_context cpu_serial - #undef CPU_OMP_IMPLEM //only_for_context cpu_openmp - - - ''' - - source = '\n\n'.join([src_typedef, src_adders, src_getters, - src_setters, src_scalers, src_exchange, - src_particles_to_local, src_local_to_particles, - src_angles, custom_source]) - - source += """ - #ifdef XTRACK_GLOBAL_XY_LIMIT - - /*gpufun*/ - void global_aperture_check(LocalParticle* part0) { - if (LocalParticle_check_track_flag( - part0, XS_FLAG_IGNORE_GLOBAL_APERTURE)){ - return; - } - - //start_per_particle_block (part0->part) - double const x = LocalParticle_get_x(part); - double const y = LocalParticle_get_y(part); - - int64_t const is_alive = (int64_t)( - (x >= -XTRACK_GLOBAL_XY_LIMIT) && - (x <= XTRACK_GLOBAL_XY_LIMIT) && - (y >= -XTRACK_GLOBAL_XY_LIMIT) && - (y <= XTRACK_GLOBAL_XY_LIMIT) ); - - // I assume that if I am in the function is because - if (!is_alive){ - LocalParticle_set_state(part, -1); - } - //end_per_particle_block - } - - #endif - - /*gpufun*/ - void LocalParticle_add_to_energy(LocalParticle* part, double delta_energy, int pz_only ){ - double ptau = LocalParticle_get_ptau(part); - double const p0c = LocalParticle_get_p0c(part); - double const charge_ratio = LocalParticle_get_charge_ratio(part); - double const chi = LocalParticle_get_chi(part); - double const mass_ratio = charge_ratio / chi; - - ptau += delta_energy/p0c / mass_ratio; - - double const old_rpp = LocalParticle_get_rpp(part); - - LocalParticle_update_ptau(part, ptau); - - if (!pz_only) { - double const new_rpp = LocalParticle_get_rpp(part); - double const f = old_rpp / new_rpp; - LocalParticle_scale_px(part, f); - LocalParticle_scale_py(part, f); - } - } - - - /*gpufun*/ - void LocalParticle_update_p0c(LocalParticle* part, double new_p0c_value){ - - double const mass0 = LocalParticle_get_mass0(part); - double const old_p0c = LocalParticle_get_p0c(part); - double const old_delta = LocalParticle_get_delta(part); - double const old_beta0 = LocalParticle_get_beta0(part); - - double const ppc = old_p0c * old_delta + old_p0c; - double const new_delta = (ppc - new_p0c_value)/new_p0c_value; - - double const new_energy0 = sqrt(new_p0c_value*new_p0c_value + mass0 * mass0); - double const new_beta0 = new_p0c_value / new_energy0; - double const new_gamma0 = new_energy0 / mass0; - - LocalParticle_set_p0c(part, new_p0c_value); - LocalParticle_set_gamma0(part, new_gamma0); - LocalParticle_set_beta0(part, new_beta0); - - LocalParticle_update_delta(part, new_delta); - - LocalParticle_scale_px(part, old_p0c/new_p0c_value); - LocalParticle_scale_py(part, old_p0c/new_p0c_value); - - LocalParticle_scale_zeta(part, new_beta0/old_beta0); - - } - - /*gpufun*/ - void LocalParticle_kill_particle(LocalParticle* part, int64_t kill_state) { - LocalParticle_set_x(part, 1e30); - LocalParticle_set_px(part, 1e30); - LocalParticle_set_y(part, 1e30); - LocalParticle_set_py(part, 1e30); - LocalParticle_set_zeta(part, 1e30); - LocalParticle_update_delta(part, -1); // zero energy - LocalParticle_set_state(part, kill_state); - } - """ - return source - @classmethod def part_energy_varnames(cls): return [vv for tt, vv in cls.part_energy_vars] diff --git a/xtrack/tracker.py b/xtrack/tracker.py index 27128d045..ba38bd178 100644 --- a/xtrack/tracker.py +++ b/xtrack/tracker.py @@ -47,7 +47,6 @@ def __init__( track_kernel=None, particles_monitor_class=None, extra_headers=(), - local_particle_src=None, _prebuilding_kernels=False, ): @@ -62,16 +61,12 @@ def __init__( raise ValueError("`enable_pipeline_hold` is not implemented in " "non-collective mode") - if local_particle_src is None: - local_particle_src = xt.Particles.gen_local_particle_api() - if not particles_monitor_class: particles_monitor_class = self._get_default_monitor_class() self.line = line self.particles_monitor_class = particles_monitor_class self.extra_headers = extra_headers - self.local_particle_src = local_particle_src self._enable_pipeline_hold = enable_pipeline_hold self.use_prebuilt_kernels = use_prebuilt_kernels self.track_flags = TrackFlags() @@ -684,9 +679,7 @@ def _build_kernel( kernel_descriptions=kernels, extra_headers=self._config_to_headers() + headers, extra_classes=kernel_element_classes + extra_classes, - apply_to_source=[ - partial(_handle_per_particle_blocks, - local_particle_src=self.local_particle_src)], + apply_to_source=[_handle_per_particle_blocks], specialize=True, compile=compile, save_source_as=f'{module_name}.c' if module_name else None, @@ -1128,8 +1121,6 @@ def _track_no_collective( self.track_flags.XS_FLAG_BACKTRACK = True return self._track_no_collective(**kwargs) - self.local_particle_src = particles.gen_local_particle_api() - if freeze_longitudinal: kwargs = locals().copy() kwargs.pop('self') diff --git a/xtrack/twiss.py b/xtrack/twiss.py index 87c00c68e..1cd68663e 100644 --- a/xtrack/twiss.py +++ b/xtrack/twiss.py @@ -2840,7 +2840,6 @@ def _build_auxiliary_tracker_with_extra_markers(tracker, at_s, marker_prefix, io_buffer=tracker.io_buffer, line=auxline, particles_monitor_class=None, - local_particle_src=tracker.local_particle_src ) auxtracker.line.config = tracker.line.config.copy() auxtracker.line._extra_config = tracker.line._extra_config.copy()