@@ -17,39 +17,6 @@ namespace psc
1717namespace marder
1818{
1919
20- namespace detail
21- {
22-
23- inline void find_limits (const Grid_t& grid, int p, Int3& lx, Int3& rx, Int3& ly,
24- Int3& ry, Int3& lz, Int3& rz)
25- {
26- Int3 l_cc = {0 , 0 , 0 }, r_cc = {0 , 0 , 0 };
27- Int3 l_nc = {0 , 0 , 0 }, r_nc = {0 , 0 , 0 };
28- for (int d = 0 ; d < 3 ; d++) {
29- if (grid.bc .fld_lo [d] == BND_FLD_CONDUCTING_WALL &&
30- grid.atBoundaryLo (p, d)) {
31- l_cc[d] = -1 ;
32- l_nc[d] = -1 ;
33- }
34- if (grid.bc .fld_hi [d] == BND_FLD_CONDUCTING_WALL &&
35- grid.atBoundaryHi (p, d)) {
36- r_cc[d] = -1 ;
37- r_nc[d] = 0 ;
38- }
39- }
40- // FIXME, for conducting wall the signs here need checking...
41- lx = -Int3{l_cc[0 ], l_nc[1 ], l_nc[2 ]} + grid.ibn ;
42- rx = Int3{r_cc[0 ], r_nc[1 ], r_nc[2 ]} + grid.ldims + grid.ibn ;
43-
44- ly = -Int3{l_nc[0 ], l_cc[1 ], l_nc[2 ]} + grid.ibn ;
45- ry = Int3{r_nc[0 ], r_cc[1 ], r_nc[2 ]} + grid.ldims + grid.ibn ;
46-
47- lz = -Int3{l_nc[0 ], l_nc[1 ], l_cc[2 ]} + grid.ibn ;
48- rz = Int3{r_nc[0 ], r_nc[1 ], r_cc[2 ]} + grid.ldims + grid.ibn ;
49- }
50-
51- } // namespace detail
52-
5320// ----------------------------------------------------------------------
5421// correct
5522//
@@ -70,53 +37,46 @@ inline void correct(const Grid_t& grid, E1& efield, const Int3& efield_ib,
7037 Real3 fac = .5f * real_t (grid.dt ) * diffusion * Real3 (grid.domain .dx_inv );
7138
7239 for (int p = 0 ; p < grid.n_patches (); p++) {
73- Int3 lx, rx, ly, ry, lz, rz;
74- detail::find_limits (grid, p, lx, rx, ly, ry, lz, rz);
75-
76- Int3 ls[3 ] = {lx, ly, lz};
77- Int3 rs[3 ] = {rx, ry, rz};
78-
7940 auto res = mf.view (_all, _all, _all, 0 , p);
8041 for (int d = 0 ; d < 3 ; d++) {
8142 if (grid.isInvar (d)) {
8243 continue ;
8344 }
8445
85- Int3 l = ls[d];
86- Int3 r = rs[d];
8746 auto e_comp = efield.view (_all, _all, _all, d, p);
8847
89- gt::gslice s1x = _s (l[0 ], r[0 ]);
90- gt::gslice s1y = _s (l[1 ], r[1 ]);
91- gt::gslice s1z = _s (l[2 ], r[2 ]);
92-
48+ Int3 l = grid.ibn ;
49+ Int3 r = grid.ldims + grid.ibn ;
50+ gt::gslice s1[3 ] = {_s (l[0 ], r[0 ]), _s (l[1 ], r[1 ]), _s (l[2 ], r[2 ])};
9351 gt::gslice s2[3 ] = {_s (l[0 ], r[0 ]), _s (l[1 ], r[1 ]), _s (l[2 ], r[2 ])};
94- s2[d] = _s (l[d] + 1 , r[d] + 1 );
52+ s2[d].start += 1 ;
53+ s2[d].stop += 1 ;
9554
96- e_comp.view (s1x, s1y, s1z) =
97- e_comp.view (s1x, s1y, s1z) +
98- (res.view (s2[0 ], s2[1 ], s2[2 ]) - res.view (s1x, s1y, s1z)) * fac[d];
55+ e_comp.view (s1[0 ], s1[1 ], s1[2 ]) =
56+ e_comp.view (s1[0 ], s1[1 ], s1[2 ]) +
57+ (res.view (s2[0 ], s2[1 ], s2[2 ]) - res.view (s1[0 ], s1[1 ], s1[2 ])) *
58+ fac[d];
9959 }
10060 }
10161}
10262
10363#ifdef USE_CUDA
10464
10565template <typename E1 , typename E2 >
106- inline void cuda_marder_correct_yz (E1 & efield, E2 & res, Float3 fac, Int3 ly ,
107- Int3 ry , Int3 lz , Int3 rz )
66+ inline void cuda_marder_correct_yz (E1 & efield, E2 & res, Float3 fac, Int3 l ,
67+ Int3 r , Int3 l , Int3 r )
10868{
10969 auto k_efield = efield.to_kernel ();
11070 auto k_res = res.to_kernel ();
11171 gt::launch<2 >(
11272 {k_efield.shape (1 ), k_efield.shape (2 )}, GT_LAMBDA (int iy, int iz) {
113- if ((iy >= ly [1 ] && iy < ry [1 ]) && (iz >= ly [2 ] && iz < ry [2 ])) {
73+ if ((iy >= l [1 ] && iy < r [1 ]) && (iz >= l [2 ] && iz < r [2 ])) {
11474 k_efield (0 , iy, iz, 1 ) =
11575 k_efield (0 , iy, iz, 1 ) +
11676 fac[1 ] * (k_res (0 , iy + 1 , iz) - k_res (0 , iy, iz));
11777 }
11878
119- if ((iy >= lz [1 ] && iy < rz [1 ]) && (iz >= lz [2 ] && iz < rz [2 ])) {
79+ if ((iy >= l [1 ] && iy < r [1 ]) && (iz >= l [2 ] && iz < r [2 ])) {
12080 k_efield (0 , iy, iz, 2 ) =
12181 k_efield (0 , iy, iz, 2 ) +
12282 fac[2 ] * (k_res (0 , iy, iz + 1 ) - k_res (0 , iy, iz));
@@ -126,30 +86,30 @@ inline void cuda_marder_correct_yz(E1& efield, E2& res, Float3 fac, Int3 ly,
12686}
12787
12888template <typename E1 , typename E2 >
129- inline void cuda_marder_correct_xyz (E1 & efield, E2 & res, Float3 fac, Int3 lx ,
130- Int3 rx, Int3 ly, Int3 ry, Int3 lz, Int3 rz )
89+ inline void cuda_marder_correct_xyz (E1 & efield, E2 & res, Float3 fac, Int3 l ,
90+ Int3 r )
13191{
13292 auto k_efield = efield.to_kernel ();
13393 auto k_res = res.to_kernel ();
13494 gt::launch<3 >(
13595 {k_efield.shape (0 ), k_efield.shape (1 ), k_efield.shape (2 )},
13696 GT_LAMBDA (int ix, int iy, int iz) {
137- if ((ix >= lx [0 ] && ix < rx [0 ]) && (iy >= lx [1 ] && iy < rx [1 ]) &&
138- (iz >= lx [2 ] && iz < rx [2 ])) {
97+ if ((ix >= l [0 ] && ix < r [0 ]) && (iy >= l [1 ] && iy < r [1 ]) &&
98+ (iz >= l [2 ] && iz < r [2 ])) {
13999 k_efield (ix, iy, iz, 0 ) =
140100 k_efield (ix, iy, iz, 0 ) +
141101 fac[0 ] * (k_res (ix, iy + 1 , iz) - k_res (ix, iy, iz));
142102 }
143103
144- if ((ix >= ly [0 ] && ix < ry [0 ]) && (iy >= ly [1 ] && iy < ry [1 ]) &&
145- (iz >= ly [2 ] && iz < ry [2 ])) {
104+ if ((ix >= l [0 ] && ix < r [0 ]) && (iy >= l [1 ] && iy < r [1 ]) &&
105+ (iz >= l [2 ] && iz < r [2 ])) {
146106 k_efield (ix, iy, iz, 1 ) =
147107 k_efield (ix, iy, iz, 1 ) +
148108 fac[1 ] * (k_res (ix, iy + 1 , iz) - k_res (ix, iy, iz));
149109 }
150110
151- if ((ix >= lz [0 ] && ix < rz [0 ]) && (iy >= lz [1 ] && iy < rz [1 ]) &&
152- (iz >= lz [2 ] && iz < rz [2 ])) {
111+ if ((ix >= l [0 ] && ix < r [0 ]) && (iy >= l [1 ] && iy < r [1 ]) &&
112+ (iz >= l [2 ] && iz < r [2 ])) {
153113 k_efield (ix, iy, iz, 2 ) =
154114 k_efield (ix, iy, iz, 2 ) +
155115 fac[2 ] * (k_res (ix, iy, iz + 1 ) - k_res (ix, iy, iz));
@@ -172,15 +132,15 @@ inline void correct(const Grid_t& grid, E1& efield, const Int3& efield_ib,
172132 assert (mf_ib == -grid.ibn );
173133 // OPT, do all patches in one kernel
174134 for (int p = 0 ; p < grid.n_patches (); p++) {
175- Int3 lx, rx, ly, ry, lz, rz ;
176- detail::find_limits (grid, p, lx, rx, ly, ry, lz, rz) ;
135+ Int3 l = grid. ibn ;
136+ Int3 r = grid. ibn + grid. ldims ;
177137
178138 auto p_efield = efield.view (_all, _all, _all, _all, p);
179139 auto p_res = mf.view (_all, _all, _all, 0 , p);
180140 if (grid.isInvar (0 )) {
181- cuda_marder_correct_yz (p_efield, p_res, fac, ly, ry, lz, rz );
141+ cuda_marder_correct_yz (p_efield, p_res, fac, l, r );
182142 } else {
183- cuda_marder_correct_xyz (p_efield, p_res, fac, lx, rx, ly, ry, lz, rz );
143+ cuda_marder_correct_xyz (p_efield, p_res, fac, l, r );
184144 }
185145 }
186146}
@@ -201,7 +161,7 @@ public:
201161 using Bnd = BND ;
202162 using real_t = typename storage_type::value_type;
203163
204- // FIXME: checkpointing won't properly restore state
164+ // FIXME: checkpointing won't properl restore state
205165
206166 MarderCommon (const Grid_t& grid, real_t diffusion, int loop, bool dump)
207167 : diffusion_{diffusion}, loop_{loop}, dump_{dump}
@@ -262,6 +222,38 @@ public:
262222 Int3 res_ib = -grid.ibn ;
263223 auto res = storage_type{psc::mflds::make_shape (grid, 1 , res_ib)};
264224 psc::mflds::interior (grid, res) = dive - rho;
225+
226+ // Gauss' law is ostensibly violated at some boundaries, where virtual
227+ // charges (i.e., charges that aren't associated with actual particles)
228+ // implicitly shape the electric field. To account for virtual charges,
229+ // simply set the error at those boundaries to 0.
230+ for (int p = 0 ; p < grid.n_patches (); p++) {
231+ for (int d = 0 ; d < 3 ; d++) {
232+ if ((grid.bc .fld_lo [d] == BND_FLD_CONDUCTING_WALL ||
233+ grid.bc .fld_lo [d] == BND_FLD_OPEN ) &&
234+ grid.atBoundaryLo (p, d)) {
235+
236+ gt::gslice slices[3 ] = {_s (grid.ibn [0 ], -grid.ibn [0 ]),
237+ _s (grid.ibn [1 ], -grid.ibn [1 ]),
238+ _s (grid.ibn [2 ], -grid.ibn [2 ])};
239+ slices[d].stop = slices[d].start + 1 ;
240+ res.view (slices[0 ], slices[1 ], slices[2 ], 0 , p) = 0.0 ;
241+ }
242+
243+ if ((grid.bc .fld_hi [d] == BND_FLD_CONDUCTING_WALL ||
244+ grid.bc .fld_hi [d] == BND_FLD_OPEN ) &&
245+ grid.atBoundaryHi (p, d)) {
246+ gt::gslice slices[3 ] = {_s (grid.ibn [0 ], -grid.ibn [0 ]),
247+ _s (grid.ibn [1 ], -grid.ibn [1 ]),
248+ _s (grid.ibn [2 ], -grid.ibn [2 ])};
249+ // Note that upper edges are in the ghost region.
250+ slices[d].start = slices[d].stop ;
251+ slices[d].stop += 1 ;
252+ res.view (slices[0 ], slices[1 ], slices[2 ], 0 , p) = 0.0 ;
253+ }
254+ }
255+ }
256+
265257 bnd_.fill_ghosts (grid, res, res_ib, 0 , 1 );
266258
267259 print_progress (grid, rho, dive, res);
0 commit comments