diff --git a/CMakeLists.txt b/CMakeLists.txt index 65e84b2eb..43c1c3cb6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -572,6 +572,7 @@ macro (add_file DST SRC) endmacro() add_file(data/srgb.coeff ${CMAKE_BINARY_DIR}/ext_build/rgb2spec/srgb.coeff rgb2spec_opt_run) +add_file(data/vmf-hemisphere.data ${CMAKE_CURRENT_SOURCE_DIR}/src/integrators/vmf-hemisphere.data) file(COPY resources/data/ior DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/dist/data) diff --git a/docs/examples/10_inverse_rendering/invert_heightfield.py b/docs/examples/10_inverse_rendering/invert_heightfield.py new file mode 100644 index 000000000..2362a1832 --- /dev/null +++ b/docs/examples/10_inverse_rendering/invert_heightfield.py @@ -0,0 +1,111 @@ +import os +import time +import enoki as ek + +import mitsuba +mitsuba.set_variant('gpu_autodiff_rgb') + +from mitsuba.core import Thread, xml, UInt32, Float, Vector2f, Vector3f, Transform4f, ScalarTransform4f +from mitsuba.render import SurfaceInteraction3f +from mitsuba.python.util import traverse +from mitsuba.python.autodiff import render, write_bitmap, Adam + +# Convert flat array into a vector of arrays (will be included in next enoki release) +def ravel(buf, dim = 3): + idx = dim * UInt32.arange(ek.slices(buf) // dim) + if dim == 2: + return Vector2f(ek.gather(buf, idx), ek.gather(buf, idx + 1)) + elif dim == 3: + return Vector3f(ek.gather(buf, idx), ek.gather(buf, idx + 1), ek.gather(buf, idx + 2)) + +# Return contiguous flattened array (will be included in next enoki release) +def unravel(source, target, dim = 3): + idx = UInt32.arange(ek.slices(source)) + for i in range(dim): + ek.scatter(target, source[i], dim * idx + i) + +# Prepare output folder +output_path = "output/invert_heightfield/" +if not os.path.isdir(output_path): + os.makedirs(output_path) + +# Load example scene +scene_folder = '../../../resources/data/docs/examples/invert_heightfield/' +Thread.thread().file_resolver().append(scene_folder) +scene = xml.load_file(scene_folder + 'scene.xml') + +params = traverse(scene) +positions_buf = params['grid_mesh.vertex_positions_buf'] +positions_initial = ravel(positions_buf) +normals_initial = ravel(params['grid_mesh.vertex_normals_buf']) +vertex_count = ek.slices(positions_initial) + +# Create a texture with the reference displacement map +disp_tex = xml.load_dict({ + "type" : "bitmap", + "filename" : "mitsuba_coin.jpg", + "to_uv" : ScalarTransform4f.scale([1, -1, 1]) # texture is upside-down +}).expand()[0] + +# Create a fake surface interaction with an entry per vertex on the mesh +mesh_si = SurfaceInteraction3f.zero(vertex_count) +mesh_si.uv = ravel(params['grid_mesh.vertex_texcoords_buf'], dim=2) + +# Evaluate the displacement map for the entire mesh +disp_tex_data_ref = disp_tex.eval_1(mesh_si) + +# Apply displacement to mesh vertex positions and update scene (e.g. OptiX BVH) +def apply_displacement(amplitude = 0.05): + new_positions = disp_tex.eval_1(mesh_si) * normals_initial * amplitude + positions_initial + unravel(new_positions, params['grid_mesh.vertex_positions_buf']) + params.set_dirty('grid_mesh.vertex_positions_buf') + params.update() + +# Apply displacement before generating reference image +apply_displacement() + +# Render a reference image (no derivatives used yet) +image_ref = render(scene, spp=32) +crop_size = scene.sensors()[0].film().crop_size() +write_bitmap(output_path + 'out_ref.exr', image_ref, crop_size) +print("Write " + output_path + "out_ref.exr") + +# Reset texture data to a constant +disp_tex_params = traverse(disp_tex) +disp_tex_params.keep(['data']) +disp_tex_params['data'] = ek.full(Float, 0.25, len(disp_tex_params['data'])) +disp_tex_params.update() + +# Construct an Adam optimizer that will adjust the texture parameters +opt = Adam(disp_tex_params, lr=0.002) + +time_a = time.time() + +iterations = 100 +for it in range(iterations): + # Perform a differentiable rendering of the scene + image = render(scene, + optimizer=opt, + spp=4, + unbiased=True, + pre_render_callback=apply_displacement) + + write_bitmap(output_path + 'out_%03i.exr' % it, image, crop_size) + + # Objective: MSE between 'image' and 'image_ref' + ob_val = ek.hsum(ek.sqr(image - image_ref)) / len(image) + + # Back-propagate errors to input parameters + ek.backward(ob_val) + + # Optimizer: take a gradient step -> update displacement map + opt.step() + + # Compare iterate against ground-truth value + err_ref = ek.hsum(ek.sqr(disp_tex_data_ref - disp_tex.eval_1(mesh_si))) + print('Iteration %03i: error=%g' % (it, err_ref[0]), end='\r') + +time_b = time.time() + +print() +print('%f ms per iteration' % (((time_b - time_a) * 1000) / iterations)) diff --git a/docs/examples/10_inverse_rendering/invert_pose.py b/docs/examples/10_inverse_rendering/invert_pose.py new file mode 100644 index 000000000..2c754e30c --- /dev/null +++ b/docs/examples/10_inverse_rendering/invert_pose.py @@ -0,0 +1,93 @@ +import os +import time +import enoki as ek + +import mitsuba +mitsuba.set_variant('gpu_autodiff_rgb') + +from mitsuba.core import xml, Thread, Transform4f, Bitmap, Float, Vector3f, UInt32 +from mitsuba.python.util import traverse +from mitsuba.python.autodiff import render, write_bitmap, Adam + +# Convert flat array into a vector of arrays (will be included in next enoki release) +def ravel(buf, dim = 3): + idx = dim * UInt32.arange(ek.slices(buf) // dim) + return Vector3f(ek.gather(buf, idx), ek.gather(buf, idx + 1), ek.gather(buf, idx + 2)) + +# Return contiguous flattened array (will be included in next enoki release) +def unravel(source, target, dim = 3): + idx = UInt32.arange(ek.slices(source)) + for i in range(dim): + ek.scatter(target, source[i], dim * idx + i) + +# Prepare output folder +output_path = "output/invert_pose/" +if not os.path.isdir(output_path): + os.makedirs(output_path) + +# Load example scene +scene_folder = '../../../resources/data/docs/examples/invert_pose/' +Thread.thread().file_resolver().append(scene_folder) +scene = xml.load_file(scene_folder + 'scene.xml') + +params = traverse(scene) +positions_buf = params['object.vertex_positions_buf'] +positions_initial = ravel(positions_buf) + +# Create differential parameter to be optimized +translate_ref = Vector3f(0.0) + +# Create a new ParameterMap (or dict) +params_optim = { + "translate" : translate_ref, +} + +# Construct an Adam optimizer that will adjust the translation parameters +opt = Adam(params_optim, lr=0.02) + +# Apply the transformation to mesh vertex position and update scene (e.g. Optix BVH) +def apply_transformation(): + trasfo = Transform4f.translate(params_optim["translate"]) + new_positions = trasfo.transform_point(positions_initial) + unravel(new_positions, params['object.vertex_positions_buf']) + params.set_dirty('object.vertex_positions_buf') + params.update() + +# Render a reference image (no derivatives used yet) +apply_transformation() +image_ref = render(scene, spp=32) +crop_size = scene.sensors()[0].film().crop_size() +write_bitmap(output_path + 'out_ref.exr', image_ref, crop_size) +print("Write " + output_path + "out_ref.exr") + +# Move object before starting the optimization process +params_optim["translate"] = Vector3f(0.5, 0.2, -0.2) + +time_a = time.time() + +iterations = 100 +for it in range(iterations): + # Perform a differentiable rendering of the scene + image = render(scene, + optimizer=opt, + spp=4, + unbiased=True, + pre_render_callback=apply_transformation) + + write_bitmap(output_path + 'out_%03i.exr' % it, image, crop_size) + + # Objective: MSE between 'image' and 'image_ref' + ob_val = ek.hsum(ek.sqr(image - image_ref)) / len(image) + + # Back-propagate errors to input parameters + ek.backward(ob_val) + + # Optimizer: take a gradient step -> update displacement map + opt.step() + + print('Iteration %03i: error=%g' % (it, ob_val[0]), end='\r') + +time_b = time.time() + +print() +print('%f ms per iteration' % (((time_b - time_a) * 1000) / iterations)) \ No newline at end of file diff --git a/docs/src/inverse_rendering/advanced.rst b/docs/src/inverse_rendering/advanced.rst index d7d8c0c58..3c7a402f7 100644 --- a/docs/src/inverse_rendering/advanced.rst +++ b/docs/src/inverse_rendering/advanced.rst @@ -126,3 +126,292 @@ The solution we found optimizes the objective well (i.e. the rendered image matches the target), but the reconstructed texture may not match our expectation. In such a case, it may be advisable to introduce further regularization (non-negativity, smoothness, etc.). + +.. note:: + + The full Python script of this tutorial can be found in the file: + :file:`docs/examples/10_inverse_rendering/invert_bunny.py`. + + +Pose optimization +------------------------ + +This advanced example demonstrates how to optimize the pose of an object so that it matches a +target reference image. One key aspect of this example is that it requires the differentiation of +mesh parameters, such as vertex positions. Computing derivatives for parameters that affect +visibility is a complex problem as it would normally make the integrants of the rendering equation +non-differentiable. For this reason, this example requires the use of the specialized +:ref:`pathreparam ` integrator, described in this +`article `_. + +The example scene can be found in ``resource/data/docs/examples/invert_pose/`` and contains a +simple object illuminated by a rectangular light source. To avoid discontinuities around the +area light, we use the :ref:`smootharea ` plugin. + +.. subfigstart:: +.. subfigure:: ../../../resources/data/docs/images/autodiff/invert_pose_ref_image.png + :caption: Reference image with the object at the target position. +.. subfigure :: ../../../resources/data/docs/images/autodiff/invert_pose_init_image.png + :caption: Initial image with the object at the wrong position. +.. subfigend:: + :label: fig-pose-autodiff + +First, we define two helper functions that we will use to transform the mesh +parameter buffers (flatten arrays) into ``VectorXf`` type (and the other way around). +Note that those functions will be natively supported by ``enoki`` in a futur release. + +.. code-block:: python + + # Convert flat array into a vector of arrays (will be included in next enoki release) + def ravel(buf, dim = 3): + idx = dim * UInt32.arange(ek.slices(buf) // dim) + if dim == 2: + return Vector2f(ek.gather(buf, idx), ek.gather(buf, idx + 1)) + elif dim == 3: + return Vector3f(ek.gather(buf, idx), ek.gather(buf, idx + 1), ek.gather(buf, idx + 2)) + + # Return contiguous flattened array (will be included in next enoki release) + def unravel(source, target, dim = 3): + idx = UInt32.arange(ek.slices(source)) + for i in range(dim): + ek.scatter(target, source[i], dim * idx + i) + +Using those functions, we can get the initial vertex positions of the mesh after loading the scene: + +.. code-block:: python + + # Load example scene + scene_folder = '../../../resources/data/docs/examples/invert_pose/' + Thread.thread().file_resolver().append(scene_folder) + scene = xml.load_file(scene_folder + 'scene.xml') + + params = traverse(scene) + positions_buf = params['object.vertex_positions_buf'] + positions_initial = ravel(positions_buf) + +In this example, we are going to optimize a translation transform in order to align an object given +a reference image. To do so, we need to create a differential parameter that represents the +amount of translation along the 3 axis. After inserting this parameter in a Python dictionary +(similar structure as the :py:obj:`mitsuba.python.util.ParameterMap`), we construct an Adam +optimizer that will adjust those translation parameters during the optimization loop. + +.. code-block:: python + + # Create differential parameter to be optimized + translate_ref = Vector3f(0.0) + + # Create a new ParameterMap (or dict) + params_optim = { + "translate" : translate_ref, + } + + # Construct an Adam optimizer that will adjust the translation parameters + opt = Adam(params_optim, lr=0.02) + +We define a helper function that creates a translation transform from this parameter and +applies it to every vertex position of the mesh. This function will be called at every iteration +of the optimization loop to update the mesh (and the scene's data structure) after the parameter was +adjusted by the optimizer. + +.. code-block:: python + + # Apply the transformation to mesh vertex position and update scene (e.g. Optix BVH) + def apply_transformation(): + trasfo = Transform4f.translate(params_optim["translate"]) + new_positions = trasfo.transform_point(positions_initial) + unravel(new_positions, params['object.vertex_positions_buf']) + params.set_dirty('object.vertex_positions_buf') + params.update() + +For the sake of this example, we synthetize the reference image with the null tranform: + +.. code-block:: python + + # Render a reference image (no derivatives used yet) + apply_transformation() + image_ref = render(scene, spp=32) + crop_size = scene.sensors()[0].film().crop_size() + write_bitmap(output_path + 'out_ref.exr', image_ref, crop_size) + print("Write " + output_path + "out_ref.exr") + +Starting with a translated object, we can now perform a similar optimization loop as in the +previous examples. The ``apply_transformation()`` function is passed to the ``render`` function +as a pre-render callback, which will be called before every render. + +.. code-block:: python + + # Move object before starting the optimization process + params_optim["translate"] = Vector3f(0.5, 0.2, -0.2) + + iterations = 100 + for it in range(iterations): + # Perform a differentiable rendering of the scene + image = render(scene, + optimizer=opt, + spp=4, + unbiased=True, + pre_render_callback=apply_transformation) + + write_bitmap(output_path + 'out_%03i.exr' % it, image, crop_size) + + # Objective: MSE between 'image' and 'image_ref' + ob_val = ek.hsum(ek.sqr(image - image_ref)) / len(image) + + # Back-propagate errors to input parameters + ek.backward(ob_val) + + # Optimizer: take a gradient step -> update displacement map + opt.step() + + print('Iteration %03i: error=%g' % (it, ob_val[0]), end='\r') + + +The following video show the result of this optimization, with the object slowy aligning with the +reference image: + + +Heightfield optimization +------------------------ + +In this example, we will optimize for the heightfield values on a grid mesh. This will be done by +adjusting the pixel values of displacement map applied on that mesh. We also need the +:ref:`pathreparam ` here as we will differentiate the vertex positions +of the grid. + +The example scene can be found in ``resource/data/docs/examples/invert_heightfield/`` and contains a +simple grid mesh illuminated by a rectangular light source. To avoid discontinuities around the +area light, we use the :ref:`smootharea ` plugin. + +.. subfigstart:: +.. subfigure:: ../../../resources/data/docs/images/autodiff/invert_heightfield_ref_image.png + :caption: Reference render of the heightfield. +.. subfigure :: ../../../resources/data/docs/examples/invert_heightfield/mitsuba_coin.jpg + :caption: Target displacement map. +.. subfigend:: + :label: fig-heightfield-autodiff + +As in the previous example, we can load the scene and read the initial grid mesh parameters +(vertex positions, normals and texture coordinates), which we will use later in the script. + +.. code-block:: python + + import enoki as ek + import mitsuba + mitsuba.set_variant('gpu_autodiff_rgb') + + from mitsuba.core import UInt32, Float, Thread, xml, Vector2f, Vector3f, Transform4f + from mitsuba.render import SurfaceInteraction3f + from mitsuba.python.util import traverse + from mitsuba.python.autodiff import render, write_bitmap, Adam + + # Load example scene + scene_folder = '../../../resources/data/docs/examples/invert_heightfield/' + Thread.thread().file_resolver().append(scene_folder) + scene = xml.load_file(scene_folder + 'scene.xml') + + params = traverse(scene) + positions_buf = params['grid_mesh.vertex_positions_buf'] + positions_initial = ravel(positions_buf) + normals_initial = ravel(params['grid_mesh.vertex_normals_buf']) + vertex_count = ek.slices(positions_initial) + +In this example, we implement displacement mapping directly in Python instead of using a C++ plugin. +This showcases the flexibility of the framework, and the ability to fully control the optimization +process. For instance, one could want to add constraints on the displacement values range, ... + +We first create a :ref:`Bitmap ` texture instance using +:py:func:`mitsuba.core.xml.load_dict`, which will load the displacement map image file from disk. +We also create a :py:class:`mitsuba.render.SurfaceInteraction3f` with one entry per vertex on the +mesh. By properly setting the texture coordinates on this surface interaction, we can now evaluate +the displacement map for the entire mesh in one line of code. + +.. code-block:: python + + # Create a texture with the reference displacement map + disp_tex = xml.load_dict({ + "type" : "bitmap", + "filename" : "mitsuba_coin.jpg" + }).expand()[0] + + # Create a fake surface interaction with an entry per vertex on the mesh + mesh_si = SurfaceInteraction3f.zero(vertex_count) + mesh_si.uv = ravel(params['grid_mesh.vertex_texcoords_buf'], dim=2) + + # Evaluate the displacement map for the entire mesh + disp_tex_data_ref = disp_tex.eval_1(mesh_si) + +Finally, we define a function that applies the displacement map onto the original mesh. This will +be called at every iteration of the optimization loop to update the mesh data everytime the +displacement map is refined. + +.. code-block:: python + + # Apply displacement to mesh vertex positions and call update scene + def apply_displacement(amplitude = 0.05): + new_positions = disp_tex.eval_1(mesh_si) * normals_initial * amplitude + positions_initial + unravel(new_positions, params['grid_mesh.vertex_positions_buf']) + params.set_dirty('grid_mesh.vertex_positions_buf') + params.update() + +We can now generate a reference image. + +.. code-block:: python + + # Apply displacement before generating reference image + apply_displacement() + + # Render a reference image (no derivatives used yet) + image_ref = render(scene, spp=32) + crop_size = scene.sensors()[0].film().crop_size() + write_bitmap('out_ref.exr', image_ref, crop_size) + print("Write out_ref.exr") + +Before runing the optimization loop, we need to change the displacement data to a constant value +(here ``0.25``). This can be done using the :py:func:`mitsuba.python.util.traverse` function +on the texture object directly. We can then create an optimizer that will adjust those texture +parameters during the optimization process. + +.. code-block:: python + + # Reset texture data to a constant + disp_tex_params = traverse(disp_tex) + disp_tex_params['data'] = ek.full(Float, 0.25, len(disp_tex_params['data'])) + disp_tex_params.update() + + # Construct an Adam optimizer that will adjust the texture parameters + disp_tex_params.keep(['data']) + opt = Adam(disp_tex_params, lr=0.002) + +The optimization loop is very similar to the previous example, to the exception that it needs to +manually apply the displacement mapping to the mesh at every iteration. + +.. code-block:: python + + iterations = 100 + for it in range(iterations): + # Apply displacement to mesh and update scene (e.g. OptiX BVH) + apply_displacement() + + # Perform a differentiable rendering of the scene + image = render(scene, optimizer=opt, spp=4) + write_bitmap('out_%03i.exr' % it, image, crop_size) + + # Objective: MSE between 'image' and 'image_ref' + ob_val = ek.hsum(ek.sqr(image - image_ref)) / len(image) + + # Back-propagate errors to input parameters + ek.backward(ob_val) + + # Optimizer: take a gradient step -> update displacement map + opt.step() + + # Compare iterate against ground-truth value + err_ref = ek.hsum(ek.sqr(disp_tex_data_ref - disp_tex.eval_1(mesh_si))) + print('Iteration %03i: error=%g' % (it, err_ref[0]), end='\r') + +Here we can see the result of the heightfield optimization: + +.. note:: + + The full Python script of this tutorial can be found in the file: + :file:`docs/examples/10_inverse_rendering/invert_heightfield.py`. diff --git a/docs/src/inverse_rendering/diff_render.rst b/docs/src/inverse_rendering/diff_render.rst index 6d8d5ee42..29220017b 100644 --- a/docs/src/inverse_rendering/diff_render.rst +++ b/docs/src/inverse_rendering/diff_render.rst @@ -322,7 +322,7 @@ overly large value. .. note:: The full Python script of this tutorial can be found in the file: - :file:`docs/examples/10_diff_render/invert_cbox.py`. + :file:`docs/examples/10_inverse_rendering/invert_cbox.py`. Forward-mode differentiation @@ -395,4 +395,4 @@ respect to albedo, the red color disappears. .. note:: The full Python script of this tutorial can be found in the file: - :file:`docs/examples/10_diff_render/forward_diff.py`. + :file:`docs/examples/10_inverse_rendering/forward_diff.py`. diff --git a/include/mitsuba/core/warp.h b/include/mitsuba/core/warp.h index 918e2e4ad..9f13369a7 100644 --- a/include/mitsuba/core/warp.h +++ b/include/mitsuba/core/warp.h @@ -554,9 +554,9 @@ MTS_INLINE Value square_to_beckmann_pdf(const Vector &m, // ======================================================================= /// Warp a uniformly distributed square sample to a von Mises Fisher distribution -template +template > MTS_INLINE Vector square_to_von_mises_fisher(const Point &sample, - const scalar_t &kappa) { + const KappaValue &kappa) { #if 0 // Approach 1: warping method based on standard disk mapping @@ -595,8 +595,8 @@ MTS_INLINE Vector square_to_von_mises_fisher(const Point &sa } /// Inverse of the mapping \ref von_mises_fisher_to_square -template -MTS_INLINE Point von_mises_fisher_to_square(const Vector &v, scalar_t kappa) { +template > +MTS_INLINE Point von_mises_fisher_to_square(const Vector &v, KappaValue kappa) { Value expm2k = exp(-2.f * kappa), t = exp((v.z() - 1.f) * kappa), sy = (expm2k - t) / (expm2k - 1.f), @@ -607,17 +607,15 @@ MTS_INLINE Point von_mises_fisher_to_square(const Vector &v, } /// Probability density of \ref square_to_von_mises_fisher() -template -MTS_INLINE Value square_to_von_mises_fisher_pdf(const Vector &v, scalar_t kappa) { +template > +MTS_INLINE Value square_to_von_mises_fisher_pdf(const Vector &v, KappaValue kappa) { /* Stable algorithm for evaluating the von Mises Fisher distribution https://www.mitsuba-renderer.org/~wenzel/files/vmf.pdf */ - assert(kappa >= 0); - if (unlikely(kappa == 0)) - return math::InvFourPi; - else - return exp(kappa * (v.z() - 1.f)) * (kappa * math::InvTwoPi) / + Value result = exp(kappa * (v.z() - 1.f)) * (kappa * math::InvTwoPi) / (1.f - exp(-2.f * kappa)); + masked(result, eq(kappa, 0.f)) = math::InvFourPi; + return result; } // ======================================================================= diff --git a/include/mitsuba/render/bsdf.h b/include/mitsuba/render/bsdf.h index e3c1672f8..f5350e2b0 100644 --- a/include/mitsuba/render/bsdf.h +++ b/include/mitsuba/render/bsdf.h @@ -220,6 +220,9 @@ template struct BSDFSample3 { /// Stores the component index that was sampled by \ref BSDF::sample() UInt32 sampled_component; + /// Roughness of the sampled material (used in \ref DiffPathIntegrator) + Float sampled_roughness; + //! @} // ============================================================= @@ -242,13 +245,13 @@ template struct BSDFSample3 { */ BSDFSample3(const Vector3f &wo) : wo(wo), pdf(0.f), eta(1.f), sampled_type(0), - sampled_component(uint32_t(-1)) { } + sampled_component(uint32_t(-1)), sampled_roughness(math::Infinity) { } //! @} // ============================================================= - ENOKI_STRUCT(BSDFSample3, wo, pdf, eta, sampled_type, sampled_component); + ENOKI_STRUCT(BSDFSample3, wo, pdf, eta, sampled_type, sampled_component, sampled_roughness); }; @@ -509,7 +512,7 @@ NAMESPACE_END(mitsuba) // ----------------------------------------------------------------------- ENOKI_STRUCT_SUPPORT(mitsuba::BSDFSample3, wo, pdf, eta, - sampled_type, sampled_component) + sampled_type, sampled_component, sampled_roughness) //! @} // ----------------------------------------------------------------------- diff --git a/include/mitsuba/render/interaction.h b/include/mitsuba/render/interaction.h index e9e5784c8..34c2b940c 100644 --- a/include/mitsuba/render/interaction.h +++ b/include/mitsuba/render/interaction.h @@ -244,6 +244,37 @@ struct SurfaceInteraction : Interaction { fmsub(a00, b1y, a01 * b0y) * inv_det); } + /// Fill with detailed information describing the intersection + void fill_surface_interaction(const Ray3f &ray, + const void *cache, // TODO should be Float* but can't because of ENOKI_STRUCT_SUPPORT (pointer to reference issue) + Mask active = true) { + ShapePtr target = select(neq(instance, nullptr), instance, shape); + + auto si = target->fill_surface_interaction(ray, (Float *) cache, + arange(slices(ray)), + *this, is_valid() && active); + + // Keep this->t == INF if interaction isn't valid + masked(t, is_valid()) = si.t; + p = si.p; + n = si.n; + uv = si.uv; + sh_frame.n = si.sh_frame.n; + sh_frame.s = si.sh_frame.s; + sh_frame.t = si.sh_frame.t; + dp_du = si.dp_du; + dp_dv = si.dp_dv; + } + + /** + * Calls the \ref Shape::differentiable_position method on the shape in order + * to re-compute \c p and \c n with respect to the shape parameters only. + */ + void compute_differentiable_shape_position(Mask active) { + ShapePtr target = select(neq(instance, nullptr), instance, shape); + std::tie(p, n) = target->differentiable_position(*this, active); + } + /** * \brief Converts a Mueller matrix defined in a local frame to world space * diff --git a/include/mitsuba/render/kdtree.h b/include/mitsuba/render/kdtree.h index 4e62725ea..ad417cc47 100644 --- a/include/mitsuba/render/kdtree.h +++ b/include/mitsuba/render/kdtree.h @@ -2353,7 +2353,7 @@ class MTS_EXPORT_RENDER ShapeKDTree : public TShapeKDTree(); // Ask shape(s) to fill in the rest using the cache - si.shape->fill_surface_interaction(ray, cache + 2, si, active); + si.fill_surface_interaction(ray, (void *)(cache + 2), active); // Gram-schmidt orthogonalization to compute local shading frame si.sh_frame.s = normalize( diff --git a/include/mitsuba/render/mesh.h b/include/mitsuba/render/mesh.h index 3914866e7..33d73528f 100644 --- a/include/mitsuba/render/mesh.h +++ b/include/mitsuba/render/mesh.h @@ -157,10 +157,16 @@ class MTS_EXPORT_RENDER Mesh : public Shape { barycentric_coordinates(const SurfaceInteraction3f &si, Mask active = true) const; - virtual void fill_surface_interaction(const Ray3f &ray, - const Float *cache, - SurfaceInteraction3f &si, - Mask active = true) const override; + virtual SurfaceInteraction3f + fill_surface_interaction(const Ray3f &ray, + const Float *cache, + const UInt32 &cache_indices, + SurfaceInteraction3f si, + Mask active = true) const override; + + virtual std::pair + differentiable_position(const SurfaceInteraction3f &si, + Mask active = true) const override; virtual std::pair normal_derivative(const SurfaceInteraction3f &si, @@ -193,7 +199,7 @@ class MTS_EXPORT_RENDER Mesh : public Shape { * barycentric coordinates */ MTS_INLINE std::tuple - ray_intersect_triangle(const ScalarIndex &index, const Ray3f &ray, + ray_intersect_triangle(const UInt32 &index, const Ray3f &ray, identity_t active = true) const { auto fi = face_indices(index); @@ -356,10 +362,13 @@ NAMESPACE_END(mitsuba) //! @{ \name Enoki accessors for dynamic vectorization // ----------------------------------------------------------------------- -// // Enable usage of array pointers for our types -// ENOKI_CALL_SUPPORT_TEMPLATE_BEGIN(mitsuba::Mesh) -// ENOKI_CALL_SUPPORT_METHOD(fill_surface_interaction) -// ENOKI_CALL_SUPPORT_TEMPLATE_END(mitsuba::Mesh) +// Enable usage of array pointers for our types +ENOKI_CALL_SUPPORT_TEMPLATE_BEGIN(mitsuba::Mesh) + // ENOKI_CALL_SUPPORT_METHOD(fill_surface_interaction) + ENOKI_CALL_SUPPORT_METHOD(differentiable_position) + ENOKI_CALL_SUPPORT_GETTER_TYPE(faces, m_faces, uint8_t*) + ENOKI_CALL_SUPPORT_GETTER_TYPE(vertices, m_vertices, uint8_t*) +ENOKI_CALL_SUPPORT_TEMPLATE_END(mitsuba::Mesh) //! @} // ----------------------------------------------------------------------- diff --git a/include/mitsuba/render/optix/common.h b/include/mitsuba/render/optix/common.h index 74f4b24ec..02ec51025 100644 --- a/include/mitsuba/render/optix/common.h +++ b/include/mitsuba/render/optix/common.h @@ -31,6 +31,7 @@ struct OptixParams { bool *out_hit; OptixTraversableHandle handle; + bool fill_surface_interaction; }; #ifdef __CUDACC__ @@ -58,25 +59,28 @@ __device__ void write_output_params(OptixParams ¶ms, params.out_u[launch_index] = uv.x(); params.out_v[launch_index] = uv.y(); - params.out_ng_x[launch_index] = ng.x(); - params.out_ng_y[launch_index] = ng.y(); - params.out_ng_z[launch_index] = ng.z(); - - params.out_ns_x[launch_index] = ns.x(); - params.out_ns_y[launch_index] = ns.y(); - params.out_ns_z[launch_index] = ns.z(); - params.out_p_x[launch_index] = p.x(); params.out_p_y[launch_index] = p.y(); params.out_p_z[launch_index] = p.z(); - params.out_dp_du_x[launch_index] = dp_du.x(); - params.out_dp_du_y[launch_index] = dp_du.y(); - params.out_dp_du_z[launch_index] = dp_du.z(); + if (params.fill_surface_interaction) { + params.out_ng_x[launch_index] = ng.x(); + params.out_ng_y[launch_index] = ng.y(); + params.out_ng_z[launch_index] = ng.z(); + + params.out_ns_x[launch_index] = ns.x(); + params.out_ns_y[launch_index] = ns.y(); + params.out_ns_z[launch_index] = ns.z(); + + + params.out_dp_du_x[launch_index] = dp_du.x(); + params.out_dp_du_y[launch_index] = dp_du.y(); + params.out_dp_du_z[launch_index] = dp_du.z(); - params.out_dp_dv_x[launch_index] = dp_dv.x(); - params.out_dp_dv_y[launch_index] = dp_dv.y(); - params.out_dp_dv_z[launch_index] = dp_dv.z(); + params.out_dp_dv_x[launch_index] = dp_dv.x(); + params.out_dp_dv_y[launch_index] = dp_dv.y(); + params.out_dp_dv_z[launch_index] = dp_dv.z(); + } params.out_t[launch_index] = t; } diff --git a/include/mitsuba/render/scene.h b/include/mitsuba/render/scene.h index 94c179bb6..a007d1b4d 100644 --- a/include/mitsuba/render/scene.h +++ b/include/mitsuba/render/scene.h @@ -7,10 +7,27 @@ NAMESPACE_BEGIN(mitsuba) + +/** + * \brief Specifies the surface interaction computation mode when tracing rays + */ +enum class HitComputeMode : uint32_t { + /// Let the tracer engine (Embree, Optix) compute the surface interaction + Default = 0, + + /// Compute a differential surface interaction if shape parameters require gradients. This + /// mode will disable the computation by the tracer engine. + Differentiable = 1, + + /// Only compute si.t, si.p, si.uv (barycentric), si.shape, and si.prim_index + Least = 2 +}; + + template class MTS_EXPORT_RENDER Scene : public Object { public: - MTS_IMPORT_TYPES(BSDF, Emitter, Film, Sampler, Shape, Sensor, Integrator, Medium, MediumPtr) + MTS_IMPORT_TYPES(BSDF, Emitter, EmitterPtr, Film, Sampler, Shape, ShapePtr, Sensor, Integrator, Medium, MediumPtr) /// Instantiate a scene from a \ref Properties object Scene(const Properties &props); @@ -35,6 +52,10 @@ class MTS_EXPORT_RENDER Scene : public Object { */ SurfaceInteraction3f ray_intersect(const Ray3f &ray, Mask active = true) const; + SurfaceInteraction3f ray_intersect(const Ray3f &ray, + HitComputeMode mode = HitComputeMode::Default, + Mask active = true) const; + /** * \brief Ray intersection using brute force search. Used in * unit tests to validate the kdtree-based ray tracer. @@ -68,6 +89,30 @@ class MTS_EXPORT_RENDER Scene : public Object { //! @{ \name Sampling interface // ============================================================= + /** + * \brief Sample the emitters of the scene + * + * Given an arbitrary reference point in the scene, this method samples + * an emitter. + * + * Ideally, the implementation should importance sample the product of + * the emission profile and the geometry term between the reference point + * and the position on the emitter. + * + * \param ref + * A reference point somewhere within the scene + * + * \param sample + * A uniformly distributed sample + * + * \return + * The sampled emitter and the sample probability. + */ + std::pair + sample_emitter(const Interaction3f &ref, + const Float &sample, + Mask active = true) const; + /** * \brief Direct illumination sampling routine * @@ -179,7 +224,7 @@ class MTS_EXPORT_RENDER Scene : public Object { /// Trace a ray MTS_INLINE SurfaceInteraction3f ray_intersect_cpu(const Ray3f &ray, Mask active) const; - MTS_INLINE SurfaceInteraction3f ray_intersect_gpu(const Ray3f &ray, Mask active) const; + MTS_INLINE SurfaceInteraction3f ray_intersect_gpu(const Ray3f &ray, HitComputeMode mode, Mask active) const; MTS_INLINE SurfaceInteraction3f ray_intersect_naive_cpu(const Ray3f &ray, Mask active) const; /// Trace a shadow ray diff --git a/include/mitsuba/render/shape.h b/include/mitsuba/render/shape.h index 4d9cd29b5..f38645e8f 100644 --- a/include/mitsuba/render/shape.h +++ b/include/mitsuba/render/shape.h @@ -170,8 +170,20 @@ class MTS_EXPORT_RENDER Shape : public Object { * \param cache * Cached information about the previously computed intersection. */ - virtual void fill_surface_interaction(const Ray3f &ray, const Float *cache, - SurfaceInteraction3f &si, Mask active = true) const; + virtual SurfaceInteraction3f fill_surface_interaction(const Ray3f &ray, + const Float *cache, + const UInt32 &cache_indices, + SurfaceInteraction3f si, + Mask active = true) const; + + /** + * \brief Return a position and the corrsponding geometric normal on this shape given + * a partially filled \c si (see \c HitComputeMode::Least ). + * The returned values are differentiable with respect to the shape parameters only. + */ + virtual std::pair + differentiable_position(const SurfaceInteraction3f &si, + Mask active = true) const; /** * \brief Test for an intersection and return detailed information @@ -438,6 +450,7 @@ ENOKI_CALL_SUPPORT_TEMPLATE_BEGIN(mitsuba::Shape) ENOKI_CALL_SUPPORT_METHOD(eval_attribute) ENOKI_CALL_SUPPORT_METHOD(eval_attribute_1) ENOKI_CALL_SUPPORT_METHOD(eval_attribute_3) + ENOKI_CALL_SUPPORT_METHOD(differentiable_position) ENOKI_CALL_SUPPORT_GETTER_TYPE(emitter, m_emitter, const typename Class::Emitter *) ENOKI_CALL_SUPPORT_GETTER_TYPE(sensor, m_sensor, const typename Class::Sensor *) ENOKI_CALL_SUPPORT_GETTER_TYPE(bsdf, m_bsdf, const typename Class::BSDF *) diff --git a/resources/data b/resources/data index 0583e3b22..849934385 160000 --- a/resources/data +++ b/resources/data @@ -1 +1 @@ -Subproject commit 0583e3b22ea201d903481b1cd1b7980ac77a0571 +Subproject commit 8499343851ee4c9e1ea4ab54b0116d2024d85380 diff --git a/resources/mitsuba.conf.template b/resources/mitsuba.conf.template index 95870c2cf..233f1d111 100644 --- a/resources/mitsuba.conf.template +++ b/resources/mitsuba.conf.template @@ -71,7 +71,10 @@ "enabled": [ # The "scalar_rgb" variant *must* be included at the moment. "scalar_rgb", - "scalar_spectral" + "scalar_spectral", + "gpu_autodiff_spectral", + "gpu_autodiff_rgb", + "gpu_autodiff_mono" ], # If mitsuba is launched without any specific mode parameter, diff --git a/resources/ptx/optix_rt.ptx b/resources/ptx/optix_rt.ptx index d366ccd92..2d64fdc0b 100644 --- a/resources/ptx/optix_rt.ptx +++ b/resources/ptx/optix_rt.ptx @@ -17,7 +17,7 @@ .param .b64 vprintf_param_1 ) ; -.const .align 8 .b8 params[248]; +.const .align 8 .b8 params[256]; .global .align 1 .b8 $str[36] = {79, 80, 84, 73, 88, 95, 69, 88, 67, 69, 80, 84, 73, 79, 78, 95, 67, 79, 68, 69, 95, 83, 84, 65, 67, 75, 95, 79, 86, 69, 82, 70, 76, 79, 87, 0}; .global .align 1 .b8 $str1[42] = {79, 80, 84, 73, 88, 95, 69, 88, 67, 69, 80, 84, 73, 79, 78, 95, 67, 79, 68, 69, 95, 84, 82, 65, 67, 69, 95, 68, 69, 80, 84, 72, 95, 69, 88, 67, 69, 69, 68, 69, 68, 0}; .global .align 1 .b8 $str2[46] = {79, 80, 84, 73, 88, 95, 69, 88, 67, 69, 80, 84, 73, 79, 78, 95, 67, 79, 68, 69, 95, 84, 82, 65, 86, 69, 82, 83, 65, 76, 95, 68, 69, 80, 84, 72, 95, 69, 88, 67, 69, 69, 68, 69, 68, 0}; @@ -218,12 +218,12 @@ BB0_16: ) { - .reg .pred %p<13>; - .reg .b16 %rs<3>; - .reg .f32 %f<146>; + .reg .pred %p<15>; + .reg .b16 %rs<4>; + .reg .f32 %f<169>; .reg .b32 %r<22>; .reg .f64 %fd<5>; - .reg .b64 %rd<71>; + .reg .b64 %rd<74>; // inline asm @@ -247,61 +247,61 @@ BB0_16: setp.eq.s64 %p1, %rd1, 0; @%p1 bra BB1_2; - cvta.to.global.u64 %rd4, %rd1; - cvt.u64.u32 %rd5, %r1; - add.s64 %rd6, %rd4, %rd5; - mov.u16 %rs1, 1; - st.global.u8 [%rd6], %rs1; - bra.uni BB1_10; + cvta.to.global.u64 %rd5, %rd1; + cvt.u64.u32 %rd6, %r1; + add.s64 %rd7, %rd5, %rd6; + mov.u16 %rs2, 1; + st.global.u8 [%rd7], %rs2; + bra.uni BB1_14; BB1_2: // inline asm - call (%rd7), _optix_get_sbt_data_ptr_64, (); + call (%rd8), _optix_get_sbt_data_ptr_64, (); // inline asm - ld.u64 %rd3, [%rd7+8]; + ld.u64 %rd3, [%rd8+8]; // inline asm - call (%f16), _optix_get_world_ray_origin_x, (); + call (%f41), _optix_get_world_ray_origin_x, (); // inline asm // inline asm - call (%f17), _optix_get_world_ray_origin_y, (); + call (%f42), _optix_get_world_ray_origin_y, (); // inline asm // inline asm - call (%f18), _optix_get_world_ray_origin_z, (); + call (%f43), _optix_get_world_ray_origin_z, (); // inline asm // inline asm - call (%f19), _optix_get_world_ray_direction_x, (); + call (%f44), _optix_get_world_ray_direction_x, (); // inline asm // inline asm - call (%f20), _optix_get_world_ray_direction_y, (); + call (%f45), _optix_get_world_ray_direction_y, (); // inline asm // inline asm - call (%f21), _optix_get_world_ray_direction_z, (); + call (%f46), _optix_get_world_ray_direction_z, (); // inline asm // inline asm - call (%f22), _optix_get_ray_tmax, (); + call (%f47), _optix_get_ray_tmax, (); // inline asm - fma.rn.f32 %f2, %f22, %f19, %f16; - fma.rn.f32 %f3, %f22, %f20, %f17; - fma.rn.f32 %f4, %f22, %f21, %f18; - ld.v4.f32 {%f23, %f24, %f25, %f26}, [%rd3+208]; - ld.f32 %f30, [%rd3+160]; - fma.rn.f32 %f31, %f2, %f30, %f23; - ld.f32 %f32, [%rd3+164]; - fma.rn.f32 %f33, %f2, %f32, %f24; - ld.f32 %f34, [%rd3+168]; - fma.rn.f32 %f35, %f2, %f34, %f25; - ld.f32 %f36, [%rd3+176]; - fma.rn.f32 %f37, %f3, %f36, %f31; - ld.f32 %f38, [%rd3+180]; - fma.rn.f32 %f39, %f3, %f38, %f33; - ld.f32 %f40, [%rd3+184]; - fma.rn.f32 %f41, %f3, %f40, %f35; - ld.f32 %f42, [%rd3+192]; - fma.rn.f32 %f5, %f4, %f42, %f37; - ld.f32 %f43, [%rd3+196]; - fma.rn.f32 %f6, %f4, %f43, %f39; - ld.f32 %f44, [%rd3+200]; - fma.rn.f32 %f7, %f4, %f44, %f41; + fma.rn.f32 %f166, %f47, %f44, %f41; + fma.rn.f32 %f167, %f47, %f45, %f42; + fma.rn.f32 %f168, %f47, %f46, %f43; + ld.v4.f32 {%f48, %f49, %f50, %f51}, [%rd3+208]; + ld.f32 %f55, [%rd3+160]; + fma.rn.f32 %f56, %f166, %f55, %f48; + ld.f32 %f57, [%rd3+164]; + fma.rn.f32 %f58, %f166, %f57, %f49; + ld.f32 %f59, [%rd3+168]; + fma.rn.f32 %f60, %f166, %f59, %f50; + ld.f32 %f61, [%rd3+176]; + fma.rn.f32 %f62, %f167, %f61, %f56; + ld.f32 %f63, [%rd3+180]; + fma.rn.f32 %f64, %f167, %f63, %f58; + ld.f32 %f65, [%rd3+184]; + fma.rn.f32 %f66, %f167, %f65, %f60; + ld.f32 %f67, [%rd3+192]; + fma.rn.f32 %f5, %f168, %f67, %f62; + ld.f32 %f68, [%rd3+196]; + fma.rn.f32 %f6, %f168, %f68, %f64; + ld.f32 %f69, [%rd3+200]; + fma.rn.f32 %f7, %f168, %f69, %f66; abs.f32 %f8, %f5; abs.f32 %f9, %f6; setp.eq.f32 %p2, %f8, 0f00000000; @@ -317,7 +317,7 @@ BB1_6: shr.s32 %r18, %r2, 31; and.b32 %r19, %r18, 1078530011; or.b32 %r20, %r19, %r3; - mov.b32 %f144, %r20; + mov.b32 %f155, %r20; bra.uni BB1_7; BB1_3: @@ -332,202 +332,216 @@ BB1_5: and.b32 %r15, %r14, 13483017; add.s32 %r16, %r15, 1061752795; or.b32 %r17, %r16, %r3; - mov.b32 %f144, %r17; + mov.b32 %f155, %r17; bra.uni BB1_7; BB1_4: - max.f32 %f45, %f9, %f8; - min.f32 %f46, %f9, %f8; - div.rn.f32 %f47, %f46, %f45; - mul.rn.f32 %f48, %f47, %f47; - mov.f32 %f49, 0fC0B59883; - mov.f32 %f50, 0fBF52C7EA; - fma.rn.f32 %f51, %f48, %f50, %f49; - mov.f32 %f52, 0fC0D21907; - fma.rn.f32 %f53, %f51, %f48, %f52; - mul.f32 %f54, %f48, %f53; - mul.f32 %f55, %f47, %f54; - add.f32 %f56, %f48, 0f41355DC0; - mov.f32 %f57, 0f41E6BD60; - fma.rn.f32 %f58, %f56, %f48, %f57; - mov.f32 %f59, 0f419D92C8; - fma.rn.f32 %f60, %f58, %f48, %f59; - rcp.rn.f32 %f61, %f60; - fma.rn.f32 %f62, %f55, %f61, %f47; - mov.f32 %f63, 0f3FC90FDB; - sub.f32 %f64, %f63, %f62; + max.f32 %f70, %f9, %f8; + min.f32 %f71, %f9, %f8; + div.rn.f32 %f72, %f71, %f70; + mul.rn.f32 %f73, %f72, %f72; + mov.f32 %f74, 0fC0B59883; + mov.f32 %f75, 0fBF52C7EA; + fma.rn.f32 %f76, %f73, %f75, %f74; + mov.f32 %f77, 0fC0D21907; + fma.rn.f32 %f78, %f76, %f73, %f77; + mul.f32 %f79, %f73, %f78; + mul.f32 %f80, %f72, %f79; + add.f32 %f81, %f73, 0f41355DC0; + mov.f32 %f82, 0f41E6BD60; + fma.rn.f32 %f83, %f81, %f73, %f82; + mov.f32 %f84, 0f419D92C8; + fma.rn.f32 %f85, %f83, %f73, %f84; + rcp.rn.f32 %f86, %f85; + fma.rn.f32 %f87, %f80, %f86, %f72; + mov.f32 %f88, 0f3FC90FDB; + sub.f32 %f89, %f88, %f87; setp.gt.f32 %p8, %f9, %f8; - selp.f32 %f65, %f64, %f62, %p8; - mov.f32 %f66, 0f40490FDB; - sub.f32 %f67, %f66, %f65; + selp.f32 %f90, %f89, %f87, %p8; + mov.f32 %f91, 0f40490FDB; + sub.f32 %f92, %f91, %f90; setp.lt.s32 %p9, %r2, 0; - selp.f32 %f68, %f67, %f65, %p9; - mov.b32 %r12, %f68; + selp.f32 %f93, %f92, %f90, %p9; + mov.b32 %r12, %f93; or.b32 %r13, %r12, %r3; - mov.b32 %f69, %r13; - add.f32 %f70, %f8, %f9; - setp.gtu.f32 %p10, %f70, 0f7F800000; - selp.f32 %f144, %f70, %f69, %p10; + mov.b32 %f94, %r13; + add.f32 %f95, %f8, %f9; + setp.gtu.f32 %p10, %f95, 0f7F800000; + selp.f32 %f155, %f95, %f94, %p10; BB1_7: - setp.geu.f32 %p11, %f144, 0f00000000; + setp.geu.f32 %p11, %f155, 0f00000000; @%p11 bra BB1_9; - cvt.f64.f32 %fd1, %f144; + cvt.f64.f32 %fd1, %f155; add.f64 %fd2, %fd1, 0d401921FB54442D18; - cvt.rn.f32.f64 %f144, %fd2; + cvt.rn.f32.f64 %f155, %fd2; BB1_9: - ld.v2.f32 {%f71, %f72}, [%rd3+288]; - div.rn.f32 %f75, %f7, %f71; - cvt.f64.f32 %fd3, %f144; - div.rn.f64 %fd4, %fd3, 0d401921FB54442D18; - cvt.rn.f32.f64 %f76, %fd4; - ld.v4.f32 {%f77, %f78, %f79, %f80}, [%rd3+32]; - mul.f32 %f84, %f6, 0fC0C90FDB; - mul.f32 %f85, %f84, %f77; - mul.f32 %f86, %f84, %f78; - mul.f32 %f87, %f84, %f79; - ld.v4.f32 {%f88, %f89, %f90, %f91}, [%rd3+48]; - mul.f32 %f95, %f5, 0f40C90FDB; - fma.rn.f32 %f96, %f95, %f88, %f85; - fma.rn.f32 %f97, %f95, %f89, %f86; - fma.rn.f32 %f98, %f95, %f90, %f87; - ld.f32 %f99, [%rd3+64]; - mov.f32 %f100, 0f00000000; - fma.rn.f32 %f101, %f100, %f99, %f96; - ld.f32 %f102, [%rd3+68]; - fma.rn.f32 %f103, %f100, %f102, %f97; - ld.f32 %f104, [%rd3+72]; - fma.rn.f32 %f105, %f100, %f104, %f98; - mul.f32 %f106, %f77, 0f00000000; - mul.f32 %f107, %f78, 0f00000000; - mul.f32 %f108, %f79, 0f00000000; - fma.rn.f32 %f109, %f100, %f88, %f106; - fma.rn.f32 %f110, %f100, %f89, %f107; - fma.rn.f32 %f111, %f100, %f90, %f108; - fma.rn.f32 %f112, %f71, %f99, %f109; - fma.rn.f32 %f113, %f71, %f102, %f110; - fma.rn.f32 %f114, %f71, %f104, %f111; - mul.f32 %f115, %f114, %f103; - mul.f32 %f116, %f105, %f113; - sub.f32 %f117, %f115, %f116; - mul.f32 %f118, %f105, %f112; - mul.f32 %f119, %f101, %f114; - sub.f32 %f120, %f118, %f119; - mul.f32 %f121, %f101, %f113; - mul.f32 %f122, %f103, %f112; - sub.f32 %f123, %f121, %f122; - mul.f32 %f124, %f117, %f117; - fma.rn.f32 %f125, %f120, %f120, %f124; - fma.rn.f32 %f126, %f123, %f123, %f125; - sqrt.rn.f32 %f127, %f126; - div.rn.f32 %f128, %f117, %f127; - div.rn.f32 %f129, %f120, %f127; - div.rn.f32 %f130, %f123, %f127; - mul.f32 %f131, %f5, %f5; - fma.rn.f32 %f132, %f6, %f6, %f131; - sqrt.rn.f32 %f133, %f132; - sub.f32 %f134, %f72, %f133; - fma.rn.f32 %f135, %f128, %f134, %f2; - fma.rn.f32 %f136, %f129, %f134, %f3; - fma.rn.f32 %f137, %f130, %f134, %f4; - ld.u8 %rs2, [%rd3+296]; - setp.eq.s16 %p12, %rs2, 0; - neg.f32 %f138, %f128; - neg.f32 %f139, %f129; - neg.f32 %f140, %f130; - selp.f32 %f141, %f130, %f140, %p12; - selp.f32 %f142, %f129, %f139, %p12; - selp.f32 %f143, %f128, %f138, %p12; - ld.u64 %rd8, [%rd7]; + ld.f32 %f16, [%rd3+288]; + ld.const.u8 %rs1, [params+248]; + setp.eq.s16 %p12, %rs1, 0; + @%p12 bra BB1_11; + + mul.f32 %f97, %f6, 0fC0C90FDB; + ld.v4.f32 {%f98, %f99, %f100, %f101}, [%rd3+32]; + mul.f32 %f105, %f97, %f98; + mul.f32 %f106, %f97, %f99; + mul.f32 %f107, %f97, %f100; + ld.v4.f32 {%f108, %f109, %f110, %f111}, [%rd3+48]; + mul.f32 %f115, %f5, 0f40C90FDB; + fma.rn.f32 %f116, %f115, %f108, %f105; + fma.rn.f32 %f117, %f115, %f109, %f106; + fma.rn.f32 %f118, %f115, %f110, %f107; + ld.f32 %f119, [%rd3+64]; + mov.f32 %f120, 0f00000000; + fma.rn.f32 %f160, %f120, %f119, %f116; + ld.f32 %f121, [%rd3+68]; + fma.rn.f32 %f161, %f120, %f121, %f117; + ld.f32 %f122, [%rd3+72]; + fma.rn.f32 %f162, %f120, %f122, %f118; + mul.f32 %f123, %f98, 0f00000000; + mul.f32 %f124, %f99, 0f00000000; + mul.f32 %f125, %f100, 0f00000000; + fma.rn.f32 %f126, %f120, %f108, %f123; + fma.rn.f32 %f127, %f120, %f109, %f124; + fma.rn.f32 %f128, %f120, %f110, %f125; + fma.rn.f32 %f157, %f16, %f119, %f126; + fma.rn.f32 %f158, %f16, %f121, %f127; + fma.rn.f32 %f159, %f16, %f122, %f128; + mul.f32 %f129, %f159, %f161; + mul.f32 %f130, %f162, %f158; + sub.f32 %f131, %f129, %f130; + mul.f32 %f132, %f162, %f157; + mul.f32 %f133, %f160, %f159; + sub.f32 %f134, %f132, %f133; + mul.f32 %f135, %f160, %f158; + mul.f32 %f136, %f161, %f157; + sub.f32 %f137, %f135, %f136; + mul.f32 %f138, %f131, %f131; + fma.rn.f32 %f139, %f134, %f134, %f138; + fma.rn.f32 %f140, %f137, %f137, %f139; + sqrt.rn.f32 %f141, %f140; + div.rn.f32 %f142, %f131, %f141; + div.rn.f32 %f143, %f134, %f141; + div.rn.f32 %f144, %f137, %f141; + mul.f32 %f145, %f5, %f5; + fma.rn.f32 %f146, %f6, %f6, %f145; + sqrt.rn.f32 %f147, %f146; + ld.f32 %f148, [%rd3+292]; + sub.f32 %f149, %f148, %f147; + fma.rn.f32 %f166, %f142, %f149, %f166; + fma.rn.f32 %f167, %f143, %f149, %f167; + fma.rn.f32 %f168, %f144, %f149, %f168; + ld.u8 %rs3, [%rd3+296]; + setp.eq.s16 %p13, %rs3, 0; + neg.f32 %f150, %f142; + neg.f32 %f151, %f143; + neg.f32 %f152, %f144; + selp.f32 %f163, %f142, %f150, %p13; + selp.f32 %f164, %f143, %f151, %p13; + selp.f32 %f165, %f144, %f152, %p13; + +BB1_11: + ld.u64 %rd9, [%rd8]; // inline asm call (%r21), _optix_read_primitive_idx, (); // inline asm - ld.const.u64 %rd9, [params+216]; - cvta.to.global.u64 %rd10, %rd9; - mul.wide.u32 %rd11, %r1, 8; - add.s64 %rd12, %rd10, %rd11; - st.global.u64 [%rd12], %rd8; - ld.const.u64 %rd13, [params+224]; - cvta.to.global.u64 %rd14, %rd13; - mul.wide.u32 %rd15, %r1, 4; - add.s64 %rd16, %rd14, %rd15; - st.global.u32 [%rd16], %r21; - ld.const.u64 %rd17, [params+80]; - cvta.to.global.u64 %rd18, %rd17; - add.s64 %rd19, %rd18, %rd15; - st.global.f32 [%rd19], %f76; - ld.const.u64 %rd20, [params+88]; - cvta.to.global.u64 %rd21, %rd20; - add.s64 %rd22, %rd21, %rd15; - st.global.f32 [%rd22], %f75; - ld.const.u64 %rd23, [params+96]; - cvta.to.global.u64 %rd24, %rd23; - add.s64 %rd25, %rd24, %rd15; - st.global.f32 [%rd25], %f143; - ld.const.u64 %rd26, [params+104]; - cvta.to.global.u64 %rd27, %rd26; - add.s64 %rd28, %rd27, %rd15; - st.global.f32 [%rd28], %f142; - ld.const.u64 %rd29, [params+112]; - cvta.to.global.u64 %rd30, %rd29; - add.s64 %rd31, %rd30, %rd15; - st.global.f32 [%rd31], %f141; - ld.const.u64 %rd32, [params+120]; - cvta.to.global.u64 %rd33, %rd32; - add.s64 %rd34, %rd33, %rd15; - st.global.f32 [%rd34], %f143; - ld.const.u64 %rd35, [params+128]; - cvta.to.global.u64 %rd36, %rd35; - add.s64 %rd37, %rd36, %rd15; - st.global.f32 [%rd37], %f142; - ld.const.u64 %rd38, [params+136]; - cvta.to.global.u64 %rd39, %rd38; - add.s64 %rd40, %rd39, %rd15; - st.global.f32 [%rd40], %f141; - ld.const.u64 %rd41, [params+144]; - cvta.to.global.u64 %rd42, %rd41; - add.s64 %rd43, %rd42, %rd15; - st.global.f32 [%rd43], %f135; - ld.const.u64 %rd44, [params+152]; - cvta.to.global.u64 %rd45, %rd44; - add.s64 %rd46, %rd45, %rd15; - st.global.f32 [%rd46], %f136; - ld.const.u64 %rd47, [params+160]; - cvta.to.global.u64 %rd48, %rd47; - add.s64 %rd49, %rd48, %rd15; - st.global.f32 [%rd49], %f137; - ld.const.u64 %rd50, [params+168]; - cvta.to.global.u64 %rd51, %rd50; - add.s64 %rd52, %rd51, %rd15; - st.global.f32 [%rd52], %f101; - ld.const.u64 %rd53, [params+176]; - cvta.to.global.u64 %rd54, %rd53; - add.s64 %rd55, %rd54, %rd15; - st.global.f32 [%rd55], %f103; - ld.const.u64 %rd56, [params+184]; - cvta.to.global.u64 %rd57, %rd56; - add.s64 %rd58, %rd57, %rd15; - st.global.f32 [%rd58], %f105; - ld.const.u64 %rd59, [params+192]; - cvta.to.global.u64 %rd60, %rd59; - add.s64 %rd61, %rd60, %rd15; - st.global.f32 [%rd61], %f112; - ld.const.u64 %rd62, [params+200]; - cvta.to.global.u64 %rd63, %rd62; - add.s64 %rd64, %rd63, %rd15; - st.global.f32 [%rd64], %f113; - ld.const.u64 %rd65, [params+208]; - cvta.to.global.u64 %rd66, %rd65; - add.s64 %rd67, %rd66, %rd15; - st.global.f32 [%rd67], %f114; - ld.const.u64 %rd68, [params+72]; - cvta.to.global.u64 %rd69, %rd68; - add.s64 %rd70, %rd69, %rd15; - st.global.f32 [%rd70], %f22; + ld.const.u64 %rd10, [params+216]; + cvta.to.global.u64 %rd11, %rd10; + cvt.u64.u32 %rd4, %r1; + mul.wide.u32 %rd12, %r1, 8; + add.s64 %rd13, %rd11, %rd12; + st.global.u64 [%rd13], %rd9; + ld.const.u64 %rd14, [params+224]; + cvta.to.global.u64 %rd15, %rd14; + mul.wide.u32 %rd16, %r1, 4; + add.s64 %rd17, %rd15, %rd16; + st.global.u32 [%rd17], %r21; + ld.const.u64 %rd18, [params+80]; + cvta.to.global.u64 %rd19, %rd18; + add.s64 %rd20, %rd19, %rd16; + cvt.f64.f32 %fd3, %f155; + div.rn.f64 %fd4, %fd3, 0d401921FB54442D18; + cvt.rn.f32.f64 %f153, %fd4; + st.global.f32 [%rd20], %f153; + ld.const.u64 %rd21, [params+88]; + cvta.to.global.u64 %rd22, %rd21; + add.s64 %rd23, %rd22, %rd16; + div.rn.f32 %f154, %f7, %f16; + st.global.f32 [%rd23], %f154; + ld.const.u64 %rd24, [params+144]; + cvta.to.global.u64 %rd25, %rd24; + add.s64 %rd26, %rd25, %rd16; + st.global.f32 [%rd26], %f166; + ld.const.u64 %rd27, [params+152]; + cvta.to.global.u64 %rd28, %rd27; + add.s64 %rd29, %rd28, %rd16; + st.global.f32 [%rd29], %f167; + ld.const.u64 %rd30, [params+160]; + cvta.to.global.u64 %rd31, %rd30; + add.s64 %rd32, %rd31, %rd16; + st.global.f32 [%rd32], %f168; + @%p12 bra BB1_13; + + ld.const.u64 %rd33, [params+96]; + cvta.to.global.u64 %rd34, %rd33; + shl.b64 %rd35, %rd4, 2; + add.s64 %rd36, %rd34, %rd35; + st.global.f32 [%rd36], %f163; + ld.const.u64 %rd37, [params+104]; + cvta.to.global.u64 %rd38, %rd37; + add.s64 %rd39, %rd38, %rd35; + st.global.f32 [%rd39], %f164; + ld.const.u64 %rd40, [params+112]; + cvta.to.global.u64 %rd41, %rd40; + add.s64 %rd42, %rd41, %rd35; + st.global.f32 [%rd42], %f165; + ld.const.u64 %rd43, [params+120]; + cvta.to.global.u64 %rd44, %rd43; + add.s64 %rd45, %rd44, %rd35; + st.global.f32 [%rd45], %f163; + ld.const.u64 %rd46, [params+128]; + cvta.to.global.u64 %rd47, %rd46; + add.s64 %rd48, %rd47, %rd35; + st.global.f32 [%rd48], %f164; + ld.const.u64 %rd49, [params+136]; + cvta.to.global.u64 %rd50, %rd49; + add.s64 %rd51, %rd50, %rd35; + st.global.f32 [%rd51], %f165; + ld.const.u64 %rd52, [params+168]; + cvta.to.global.u64 %rd53, %rd52; + add.s64 %rd54, %rd53, %rd35; + st.global.f32 [%rd54], %f160; + ld.const.u64 %rd55, [params+176]; + cvta.to.global.u64 %rd56, %rd55; + add.s64 %rd57, %rd56, %rd35; + st.global.f32 [%rd57], %f161; + ld.const.u64 %rd58, [params+184]; + cvta.to.global.u64 %rd59, %rd58; + add.s64 %rd60, %rd59, %rd35; + st.global.f32 [%rd60], %f162; + ld.const.u64 %rd61, [params+192]; + cvta.to.global.u64 %rd62, %rd61; + add.s64 %rd63, %rd62, %rd35; + st.global.f32 [%rd63], %f157; + ld.const.u64 %rd64, [params+200]; + cvta.to.global.u64 %rd65, %rd64; + add.s64 %rd66, %rd65, %rd35; + st.global.f32 [%rd66], %f158; + ld.const.u64 %rd67, [params+208]; + cvta.to.global.u64 %rd68, %rd67; + add.s64 %rd69, %rd68, %rd35; + st.global.f32 [%rd69], %f159; -BB1_10: +BB1_13: + ld.const.u64 %rd70, [params+72]; + cvta.to.global.u64 %rd71, %rd70; + shl.b64 %rd72, %rd4, 2; + add.s64 %rd73, %rd71, %rd72; + st.global.f32 [%rd73], %f47; + +BB1_14: ret; } @@ -616,12 +630,12 @@ BB2_2: ) { - .reg .pred %p<13>; - .reg .b16 %rs<2>; - .reg .f32 %f<176>; + .reg .pred %p<15>; + .reg .b16 %rs<3>; + .reg .f32 %f<195>; .reg .b32 %r<22>; .reg .f64 %fd<3>; - .reg .b64 %rd<71>; + .reg .b64 %rd<74>; // inline asm @@ -645,72 +659,72 @@ BB2_2: setp.eq.s64 %p1, %rd1, 0; @%p1 bra BB3_2; - cvta.to.global.u64 %rd4, %rd1; - cvt.u64.u32 %rd5, %r1; - add.s64 %rd6, %rd4, %rd5; - mov.u16 %rs1, 1; - st.global.u8 [%rd6], %rs1; - bra.uni BB3_8; + cvta.to.global.u64 %rd5, %rd1; + cvt.u64.u32 %rd6, %r1; + add.s64 %rd7, %rd5, %rd6; + mov.u16 %rs2, 1; + st.global.u8 [%rd7], %rs2; + bra.uni BB3_12; BB3_2: // inline asm - call (%rd7), _optix_get_sbt_data_ptr_64, (); + call (%rd8), _optix_get_sbt_data_ptr_64, (); // inline asm - ld.u64 %rd3, [%rd7+8]; + ld.u64 %rd3, [%rd8+8]; // inline asm - call (%f17), _optix_get_world_ray_origin_x, (); + call (%f38), _optix_get_world_ray_origin_x, (); // inline asm // inline asm - call (%f18), _optix_get_world_ray_origin_y, (); + call (%f39), _optix_get_world_ray_origin_y, (); // inline asm // inline asm - call (%f19), _optix_get_world_ray_origin_z, (); + call (%f40), _optix_get_world_ray_origin_z, (); // inline asm // inline asm - call (%f20), _optix_get_world_ray_direction_x, (); + call (%f41), _optix_get_world_ray_direction_x, (); // inline asm // inline asm - call (%f21), _optix_get_world_ray_direction_y, (); + call (%f42), _optix_get_world_ray_direction_y, (); // inline asm // inline asm - call (%f22), _optix_get_world_ray_direction_z, (); + call (%f43), _optix_get_world_ray_direction_z, (); // inline asm - ld.v4.f32 {%f23, %f24, %f25, %f26}, [%rd3+208]; - ld.f32 %f30, [%rd3+160]; - fma.rn.f32 %f31, %f17, %f30, %f23; - ld.f32 %f32, [%rd3+164]; - fma.rn.f32 %f33, %f17, %f32, %f24; - ld.f32 %f34, [%rd3+168]; - fma.rn.f32 %f35, %f17, %f34, %f25; - ld.f32 %f36, [%rd3+176]; - fma.rn.f32 %f37, %f18, %f36, %f31; - ld.f32 %f38, [%rd3+180]; - fma.rn.f32 %f39, %f18, %f38, %f33; - ld.f32 %f40, [%rd3+184]; - fma.rn.f32 %f41, %f18, %f40, %f35; - ld.f32 %f42, [%rd3+192]; - fma.rn.f32 %f43, %f19, %f42, %f37; - ld.f32 %f44, [%rd3+196]; - fma.rn.f32 %f45, %f19, %f44, %f39; - ld.f32 %f46, [%rd3+200]; - fma.rn.f32 %f47, %f19, %f46, %f41; - ld.v4.f32 {%f48, %f49, %f50, %f51}, [%rd3+160]; - mul.f32 %f55, %f20, %f48; - mul.f32 %f56, %f20, %f49; - mul.f32 %f57, %f20, %f50; - fma.rn.f32 %f58, %f21, %f36, %f55; - fma.rn.f32 %f59, %f21, %f38, %f56; - fma.rn.f32 %f60, %f21, %f40, %f57; - fma.rn.f32 %f61, %f22, %f42, %f58; - fma.rn.f32 %f62, %f22, %f44, %f59; - fma.rn.f32 %f63, %f22, %f46, %f60; - neg.f32 %f64, %f47; - div.rn.f32 %f7, %f64, %f63; - fma.rn.f32 %f8, %f7, %f61, %f43; - fma.rn.f32 %f9, %f7, %f62, %f45; - mul.f32 %f65, %f8, %f8; - fma.rn.f32 %f66, %f9, %f9, %f65; - sqrt.rn.f32 %f10, %f66; + ld.v4.f32 {%f44, %f45, %f46, %f47}, [%rd3+208]; + ld.f32 %f51, [%rd3+160]; + fma.rn.f32 %f52, %f38, %f51, %f44; + ld.f32 %f53, [%rd3+164]; + fma.rn.f32 %f54, %f38, %f53, %f45; + ld.f32 %f55, [%rd3+168]; + fma.rn.f32 %f56, %f38, %f55, %f46; + ld.f32 %f57, [%rd3+176]; + fma.rn.f32 %f58, %f39, %f57, %f52; + ld.f32 %f59, [%rd3+180]; + fma.rn.f32 %f60, %f39, %f59, %f54; + ld.f32 %f61, [%rd3+184]; + fma.rn.f32 %f62, %f39, %f61, %f56; + ld.f32 %f63, [%rd3+192]; + fma.rn.f32 %f64, %f40, %f63, %f58; + ld.f32 %f65, [%rd3+196]; + fma.rn.f32 %f66, %f40, %f65, %f60; + ld.f32 %f67, [%rd3+200]; + fma.rn.f32 %f68, %f40, %f67, %f62; + ld.v4.f32 {%f69, %f70, %f71, %f72}, [%rd3+160]; + mul.f32 %f76, %f41, %f69; + mul.f32 %f77, %f41, %f70; + mul.f32 %f78, %f41, %f71; + fma.rn.f32 %f79, %f42, %f57, %f76; + fma.rn.f32 %f80, %f42, %f59, %f77; + fma.rn.f32 %f81, %f42, %f61, %f78; + fma.rn.f32 %f82, %f43, %f63, %f79; + fma.rn.f32 %f83, %f43, %f65, %f80; + fma.rn.f32 %f84, %f43, %f67, %f81; + neg.f32 %f85, %f68; + div.rn.f32 %f7, %f85, %f84; + fma.rn.f32 %f8, %f7, %f82, %f64; + fma.rn.f32 %f9, %f7, %f83, %f66; + mul.f32 %f86, %f8, %f8; + fma.rn.f32 %f87, %f9, %f9, %f86; + sqrt.rn.f32 %f10, %f87; abs.f32 %f11, %f8; abs.f32 %f12, %f9; setp.eq.f32 %p2, %f11, 0f00000000; @@ -726,7 +740,7 @@ BB3_6: shr.s32 %r18, %r2, 31; and.b32 %r19, %r18, 1078530011; or.b32 %r20, %r19, %r3; - mov.b32 %f175, %r20; + mov.b32 %f185, %r20; bra.uni BB3_7; BB3_3: @@ -741,194 +755,207 @@ BB3_5: and.b32 %r15, %r14, 13483017; add.s32 %r16, %r15, 1061752795; or.b32 %r17, %r16, %r3; - mov.b32 %f175, %r17; + mov.b32 %f185, %r17; bra.uni BB3_7; BB3_4: - max.f32 %f67, %f12, %f11; - min.f32 %f68, %f12, %f11; - div.rn.f32 %f69, %f68, %f67; - mul.rn.f32 %f70, %f69, %f69; - mov.f32 %f71, 0fC0B59883; - mov.f32 %f72, 0fBF52C7EA; - fma.rn.f32 %f73, %f70, %f72, %f71; - mov.f32 %f74, 0fC0D21907; - fma.rn.f32 %f75, %f73, %f70, %f74; - mul.f32 %f76, %f70, %f75; - mul.f32 %f77, %f69, %f76; - add.f32 %f78, %f70, 0f41355DC0; - mov.f32 %f79, 0f41E6BD60; - fma.rn.f32 %f80, %f78, %f70, %f79; - mov.f32 %f81, 0f419D92C8; - fma.rn.f32 %f82, %f80, %f70, %f81; - rcp.rn.f32 %f83, %f82; - fma.rn.f32 %f84, %f77, %f83, %f69; - mov.f32 %f85, 0f3FC90FDB; - sub.f32 %f86, %f85, %f84; + max.f32 %f88, %f12, %f11; + min.f32 %f89, %f12, %f11; + div.rn.f32 %f90, %f89, %f88; + mul.rn.f32 %f91, %f90, %f90; + mov.f32 %f92, 0fC0B59883; + mov.f32 %f93, 0fBF52C7EA; + fma.rn.f32 %f94, %f91, %f93, %f92; + mov.f32 %f95, 0fC0D21907; + fma.rn.f32 %f96, %f94, %f91, %f95; + mul.f32 %f97, %f91, %f96; + mul.f32 %f98, %f90, %f97; + add.f32 %f99, %f91, 0f41355DC0; + mov.f32 %f100, 0f41E6BD60; + fma.rn.f32 %f101, %f99, %f91, %f100; + mov.f32 %f102, 0f419D92C8; + fma.rn.f32 %f103, %f101, %f91, %f102; + rcp.rn.f32 %f104, %f103; + fma.rn.f32 %f105, %f98, %f104, %f90; + mov.f32 %f106, 0f3FC90FDB; + sub.f32 %f107, %f106, %f105; setp.gt.f32 %p8, %f12, %f11; - selp.f32 %f87, %f86, %f84, %p8; - mov.f32 %f88, 0f40490FDB; - sub.f32 %f89, %f88, %f87; + selp.f32 %f108, %f107, %f105, %p8; + mov.f32 %f109, 0f40490FDB; + sub.f32 %f110, %f109, %f108; setp.lt.s32 %p9, %r2, 0; - selp.f32 %f90, %f89, %f87, %p9; - mov.b32 %r12, %f90; + selp.f32 %f111, %f110, %f108, %p9; + mov.b32 %r12, %f111; or.b32 %r13, %r12, %r3; - mov.b32 %f91, %r13; - add.f32 %f92, %f11, %f12; - setp.gtu.f32 %p10, %f92, 0f7F800000; - selp.f32 %f175, %f92, %f91, %p10; + mov.b32 %f112, %r13; + add.f32 %f113, %f11, %f12; + setp.gtu.f32 %p10, %f113, 0f7F800000; + selp.f32 %f185, %f113, %f112, %p10; BB3_7: - rcp.rn.f32 %f93, %f10; - mul.f32 %f94, %f8, %f93; - setp.neu.f32 %p11, %f10, 0f00000000; - selp.f32 %f95, %f94, 0f3F800000, %p11; - mul.f32 %f96, %f9, %f93; - selp.f32 %f97, %f96, 0f00000000, %p11; - ld.v4.f32 {%f98, %f99, %f100, %f101}, [%rd3+32]; - cvt.f64.f32 %fd1, %f175; + fma.rn.f32 %f17, %f41, %f7, %f38; + fma.rn.f32 %f18, %f42, %f7, %f39; + fma.rn.f32 %f19, %f43, %f7, %f40; + ld.const.u8 %rs1, [params+248]; + setp.eq.s16 %p11, %rs1, 0; + @%p11 bra BB3_9; + + rcp.rn.f32 %f115, %f10; + setp.neu.f32 %p12, %f10, 0f00000000; + mul.f32 %f116, %f8, %f115; + selp.f32 %f117, %f116, 0f3F800000, %p12; + mul.f32 %f118, %f9, %f115; + selp.f32 %f119, %f118, 0f00000000, %p12; + ld.v4.f32 {%f120, %f121, %f122, %f123}, [%rd3+32]; + mul.f32 %f127, %f117, %f120; + mul.f32 %f128, %f117, %f121; + mul.f32 %f129, %f117, %f122; + ld.v4.f32 {%f130, %f131, %f132, %f133}, [%rd3+48]; + fma.rn.f32 %f137, %f119, %f130, %f127; + fma.rn.f32 %f138, %f119, %f131, %f128; + fma.rn.f32 %f139, %f119, %f132, %f129; + ld.f32 %f140, [%rd3+64]; + mov.f32 %f141, 0f00000000; + fma.rn.f32 %f191, %f141, %f140, %f137; + ld.f32 %f142, [%rd3+68]; + fma.rn.f32 %f190, %f141, %f142, %f138; + ld.f32 %f143, [%rd3+72]; + fma.rn.f32 %f189, %f141, %f143, %f139; + neg.f32 %f144, %f119; + mul.f32 %f145, %f120, %f144; + mul.f32 %f146, %f121, %f144; + mul.f32 %f147, %f122, %f144; + fma.rn.f32 %f148, %f117, %f130, %f145; + fma.rn.f32 %f149, %f117, %f131, %f146; + fma.rn.f32 %f150, %f117, %f132, %f147; + fma.rn.f32 %f194, %f141, %f140, %f148; + fma.rn.f32 %f193, %f141, %f142, %f149; + fma.rn.f32 %f192, %f141, %f143, %f150; + ld.v4.f32 {%f151, %f152, %f153, %f154}, [%rd3+96]; + mul.f32 %f158, %f151, 0f00000000; + mul.f32 %f159, %f152, 0f00000000; + mul.f32 %f160, %f153, 0f00000000; + ld.v4.f32 {%f161, %f162, %f163, %f164}, [%rd3+112]; + fma.rn.f32 %f168, %f141, %f161, %f158; + fma.rn.f32 %f169, %f141, %f162, %f159; + fma.rn.f32 %f170, %f141, %f163, %f160; + ld.f32 %f171, [%rd3+128]; + mov.f32 %f172, 0f3F800000; + fma.rn.f32 %f173, %f172, %f171, %f168; + ld.f32 %f174, [%rd3+132]; + fma.rn.f32 %f175, %f172, %f174, %f169; + ld.f32 %f176, [%rd3+136]; + fma.rn.f32 %f177, %f172, %f176, %f170; + mul.f32 %f178, %f173, %f173; + fma.rn.f32 %f179, %f175, %f175, %f178; + fma.rn.f32 %f180, %f177, %f177, %f179; + sqrt.rn.f32 %f181, %f180; + div.rn.f32 %f187, %f173, %f181; + div.rn.f32 %f186, %f175, %f181; + div.rn.f32 %f188, %f177, %f181; + +BB3_9: + cvt.f64.f32 %fd1, %f185; div.rn.f64 %fd2, %fd1, 0d401921FB54442D18; - cvt.rn.f32.f64 %f105, %fd2; - setp.lt.f32 %p12, %f105, 0f00000000; - add.f32 %f106, %f105, 0f3F800000; - selp.f32 %f107, %f106, %f105, %p12; - mul.f32 %f108, %f95, %f98; - mul.f32 %f109, %f95, %f99; - mul.f32 %f110, %f95, %f100; - ld.v4.f32 {%f111, %f112, %f113, %f114}, [%rd3+48]; - fma.rn.f32 %f118, %f97, %f111, %f108; - fma.rn.f32 %f119, %f97, %f112, %f109; - fma.rn.f32 %f120, %f97, %f113, %f110; - ld.f32 %f121, [%rd3+64]; - mov.f32 %f122, 0f00000000; - fma.rn.f32 %f123, %f122, %f121, %f118; - ld.f32 %f124, [%rd3+68]; - fma.rn.f32 %f125, %f122, %f124, %f119; - ld.f32 %f126, [%rd3+72]; - fma.rn.f32 %f127, %f122, %f126, %f120; - neg.f32 %f128, %f97; - mul.f32 %f129, %f98, %f128; - mul.f32 %f130, %f99, %f128; - mul.f32 %f131, %f100, %f128; - fma.rn.f32 %f132, %f95, %f111, %f129; - fma.rn.f32 %f133, %f95, %f112, %f130; - fma.rn.f32 %f134, %f95, %f113, %f131; - fma.rn.f32 %f135, %f122, %f121, %f132; - fma.rn.f32 %f136, %f122, %f124, %f133; - fma.rn.f32 %f137, %f122, %f126, %f134; - ld.v4.f32 {%f138, %f139, %f140, %f141}, [%rd3+96]; - mul.f32 %f145, %f138, 0f00000000; - mul.f32 %f146, %f139, 0f00000000; - mul.f32 %f147, %f140, 0f00000000; - ld.v4.f32 {%f148, %f149, %f150, %f151}, [%rd3+112]; - fma.rn.f32 %f155, %f122, %f148, %f145; - fma.rn.f32 %f156, %f122, %f149, %f146; - fma.rn.f32 %f157, %f122, %f150, %f147; - ld.f32 %f158, [%rd3+128]; - mov.f32 %f159, 0f3F800000; - fma.rn.f32 %f160, %f159, %f158, %f155; - ld.f32 %f161, [%rd3+132]; - fma.rn.f32 %f162, %f159, %f161, %f156; - ld.f32 %f163, [%rd3+136]; - fma.rn.f32 %f164, %f159, %f163, %f157; - mul.f32 %f165, %f160, %f160; - fma.rn.f32 %f166, %f162, %f162, %f165; - fma.rn.f32 %f167, %f164, %f164, %f166; - sqrt.rn.f32 %f168, %f167; - div.rn.f32 %f169, %f160, %f168; - div.rn.f32 %f170, %f162, %f168; - div.rn.f32 %f171, %f164, %f168; - fma.rn.f32 %f172, %f20, %f7, %f17; - fma.rn.f32 %f173, %f21, %f7, %f18; - fma.rn.f32 %f174, %f22, %f7, %f19; - ld.u64 %rd8, [%rd7]; + cvt.rn.f32.f64 %f182, %fd2; + ld.u64 %rd9, [%rd8]; // inline asm call (%r21), _optix_read_primitive_idx, (); // inline asm - ld.const.u64 %rd9, [params+216]; - cvta.to.global.u64 %rd10, %rd9; - mul.wide.u32 %rd11, %r1, 8; - add.s64 %rd12, %rd10, %rd11; - st.global.u64 [%rd12], %rd8; - ld.const.u64 %rd13, [params+224]; - cvta.to.global.u64 %rd14, %rd13; - mul.wide.u32 %rd15, %r1, 4; - add.s64 %rd16, %rd14, %rd15; - st.global.u32 [%rd16], %r21; - ld.const.u64 %rd17, [params+80]; - cvta.to.global.u64 %rd18, %rd17; - add.s64 %rd19, %rd18, %rd15; - st.global.f32 [%rd19], %f10; - ld.const.u64 %rd20, [params+88]; - cvta.to.global.u64 %rd21, %rd20; - add.s64 %rd22, %rd21, %rd15; - st.global.f32 [%rd22], %f107; - ld.const.u64 %rd23, [params+96]; - cvta.to.global.u64 %rd24, %rd23; - add.s64 %rd25, %rd24, %rd15; - st.global.f32 [%rd25], %f169; - ld.const.u64 %rd26, [params+104]; - cvta.to.global.u64 %rd27, %rd26; - add.s64 %rd28, %rd27, %rd15; - st.global.f32 [%rd28], %f170; - ld.const.u64 %rd29, [params+112]; - cvta.to.global.u64 %rd30, %rd29; - add.s64 %rd31, %rd30, %rd15; - st.global.f32 [%rd31], %f171; - ld.const.u64 %rd32, [params+120]; - cvta.to.global.u64 %rd33, %rd32; - add.s64 %rd34, %rd33, %rd15; - st.global.f32 [%rd34], %f169; - ld.const.u64 %rd35, [params+128]; - cvta.to.global.u64 %rd36, %rd35; - add.s64 %rd37, %rd36, %rd15; - st.global.f32 [%rd37], %f170; - ld.const.u64 %rd38, [params+136]; - cvta.to.global.u64 %rd39, %rd38; - add.s64 %rd40, %rd39, %rd15; - st.global.f32 [%rd40], %f171; - ld.const.u64 %rd41, [params+144]; - cvta.to.global.u64 %rd42, %rd41; - add.s64 %rd43, %rd42, %rd15; - st.global.f32 [%rd43], %f172; - ld.const.u64 %rd44, [params+152]; - cvta.to.global.u64 %rd45, %rd44; - add.s64 %rd46, %rd45, %rd15; - st.global.f32 [%rd46], %f173; - ld.const.u64 %rd47, [params+160]; - cvta.to.global.u64 %rd48, %rd47; - add.s64 %rd49, %rd48, %rd15; - st.global.f32 [%rd49], %f174; - ld.const.u64 %rd50, [params+168]; - cvta.to.global.u64 %rd51, %rd50; - add.s64 %rd52, %rd51, %rd15; - st.global.f32 [%rd52], %f123; - ld.const.u64 %rd53, [params+176]; - cvta.to.global.u64 %rd54, %rd53; - add.s64 %rd55, %rd54, %rd15; - st.global.f32 [%rd55], %f125; - ld.const.u64 %rd56, [params+184]; - cvta.to.global.u64 %rd57, %rd56; - add.s64 %rd58, %rd57, %rd15; - st.global.f32 [%rd58], %f127; - ld.const.u64 %rd59, [params+192]; - cvta.to.global.u64 %rd60, %rd59; - add.s64 %rd61, %rd60, %rd15; - st.global.f32 [%rd61], %f135; - ld.const.u64 %rd62, [params+200]; - cvta.to.global.u64 %rd63, %rd62; - add.s64 %rd64, %rd63, %rd15; - st.global.f32 [%rd64], %f136; - ld.const.u64 %rd65, [params+208]; - cvta.to.global.u64 %rd66, %rd65; - add.s64 %rd67, %rd66, %rd15; - st.global.f32 [%rd67], %f137; - ld.const.u64 %rd68, [params+72]; - cvta.to.global.u64 %rd69, %rd68; - add.s64 %rd70, %rd69, %rd15; - st.global.f32 [%rd70], %f7; + ld.const.u64 %rd10, [params+216]; + cvta.to.global.u64 %rd11, %rd10; + cvt.u64.u32 %rd4, %r1; + mul.wide.u32 %rd12, %r1, 8; + add.s64 %rd13, %rd11, %rd12; + st.global.u64 [%rd13], %rd9; + ld.const.u64 %rd14, [params+224]; + cvta.to.global.u64 %rd15, %rd14; + mul.wide.u32 %rd16, %r1, 4; + add.s64 %rd17, %rd15, %rd16; + st.global.u32 [%rd17], %r21; + ld.const.u64 %rd18, [params+80]; + cvta.to.global.u64 %rd19, %rd18; + add.s64 %rd20, %rd19, %rd16; + st.global.f32 [%rd20], %f10; + ld.const.u64 %rd21, [params+88]; + cvta.to.global.u64 %rd22, %rd21; + add.s64 %rd23, %rd22, %rd16; + add.f32 %f183, %f182, 0f3F800000; + setp.lt.f32 %p13, %f182, 0f00000000; + selp.f32 %f184, %f183, %f182, %p13; + st.global.f32 [%rd23], %f184; + ld.const.u64 %rd24, [params+144]; + cvta.to.global.u64 %rd25, %rd24; + add.s64 %rd26, %rd25, %rd16; + st.global.f32 [%rd26], %f17; + ld.const.u64 %rd27, [params+152]; + cvta.to.global.u64 %rd28, %rd27; + add.s64 %rd29, %rd28, %rd16; + st.global.f32 [%rd29], %f18; + ld.const.u64 %rd30, [params+160]; + cvta.to.global.u64 %rd31, %rd30; + add.s64 %rd32, %rd31, %rd16; + st.global.f32 [%rd32], %f19; + @%p11 bra BB3_11; + + ld.const.u64 %rd33, [params+96]; + cvta.to.global.u64 %rd34, %rd33; + shl.b64 %rd35, %rd4, 2; + add.s64 %rd36, %rd34, %rd35; + st.global.f32 [%rd36], %f187; + ld.const.u64 %rd37, [params+104]; + cvta.to.global.u64 %rd38, %rd37; + add.s64 %rd39, %rd38, %rd35; + st.global.f32 [%rd39], %f186; + ld.const.u64 %rd40, [params+112]; + cvta.to.global.u64 %rd41, %rd40; + add.s64 %rd42, %rd41, %rd35; + st.global.f32 [%rd42], %f188; + ld.const.u64 %rd43, [params+120]; + cvta.to.global.u64 %rd44, %rd43; + add.s64 %rd45, %rd44, %rd35; + st.global.f32 [%rd45], %f187; + ld.const.u64 %rd46, [params+128]; + cvta.to.global.u64 %rd47, %rd46; + add.s64 %rd48, %rd47, %rd35; + st.global.f32 [%rd48], %f186; + ld.const.u64 %rd49, [params+136]; + cvta.to.global.u64 %rd50, %rd49; + add.s64 %rd51, %rd50, %rd35; + st.global.f32 [%rd51], %f188; + ld.const.u64 %rd52, [params+168]; + cvta.to.global.u64 %rd53, %rd52; + add.s64 %rd54, %rd53, %rd35; + st.global.f32 [%rd54], %f191; + ld.const.u64 %rd55, [params+176]; + cvta.to.global.u64 %rd56, %rd55; + add.s64 %rd57, %rd56, %rd35; + st.global.f32 [%rd57], %f190; + ld.const.u64 %rd58, [params+184]; + cvta.to.global.u64 %rd59, %rd58; + add.s64 %rd60, %rd59, %rd35; + st.global.f32 [%rd60], %f189; + ld.const.u64 %rd61, [params+192]; + cvta.to.global.u64 %rd62, %rd61; + add.s64 %rd63, %rd62, %rd35; + st.global.f32 [%rd63], %f194; + ld.const.u64 %rd64, [params+200]; + cvta.to.global.u64 %rd65, %rd64; + add.s64 %rd66, %rd65, %rd35; + st.global.f32 [%rd66], %f193; + ld.const.u64 %rd67, [params+208]; + cvta.to.global.u64 %rd68, %rd67; + add.s64 %rd69, %rd68, %rd35; + st.global.f32 [%rd69], %f192; + +BB3_11: + ld.const.u64 %rd70, [params+72]; + cvta.to.global.u64 %rd71, %rd70; + shl.b64 %rd72, %rd4, 2; + add.s64 %rd73, %rd71, %rd72; + st.global.f32 [%rd73], %f7; -BB3_8: +BB3_12: ret; } @@ -937,11 +964,11 @@ BB3_8: ) { - .reg .pred %p<5>; - .reg .b16 %rs<2>; - .reg .f32 %f<177>; + .reg .pred %p<7>; + .reg .b16 %rs<3>; + .reg .f32 %f<193>; .reg .b32 %r<17>; - .reg .b64 %rd<102>; + .reg .b64 %rd<105>; // inline asm @@ -965,318 +992,329 @@ BB3_8: setp.eq.s64 %p1, %rd1, 0; @%p1 bra BB4_2; - cvta.to.global.u64 %rd12, %rd1; - cvt.u64.u32 %rd13, %r1; - add.s64 %rd14, %rd12, %rd13; - mov.u16 %rs1, 1; - st.global.u8 [%rd14], %rs1; - bra.uni BB4_9; + cvta.to.global.u64 %rd13, %rd1; + cvt.u64.u32 %rd14, %r1; + add.s64 %rd15, %rd13, %rd14; + mov.u16 %rs2, 1; + st.global.u8 [%rd15], %rs2; + bra.uni BB4_12; BB4_2: // inline asm - call (%rd15), _optix_get_sbt_data_ptr_64, (); + call (%rd16), _optix_get_sbt_data_ptr_64, (); // inline asm - ld.u64 %rd3, [%rd15+8]; + ld.u64 %rd3, [%rd16+8]; // inline asm - call (%f175, %f176), _optix_get_triangle_barycentrics, (); + call (%f191, %f192), _optix_get_triangle_barycentrics, (); // inline asm - ld.u64 %rd16, [%rd3]; + mov.f32 %f78, 0f3F800000; + sub.f32 %f79, %f78, %f191; + sub.f32 %f3, %f79, %f192; + ld.u64 %rd17, [%rd3]; // inline asm call (%r10), _optix_read_primitive_idx, (); // inline asm - mul.wide.u32 %rd17, %r10, 3; - shl.b64 %rd18, %rd17, 2; - add.s64 %rd19, %rd16, %rd18; - ld.u32 %r11, [%rd19]; + mul.wide.u32 %rd18, %r10, 3; + shl.b64 %rd19, %rd18, 2; + add.s64 %rd20, %rd17, %rd19; + ld.u32 %r11, [%rd20]; cvt.u64.u32 %rd4, %r11; mul.wide.u32 %rd5, %r11, 3; - ld.u64 %rd20, [%rd3+8]; - shl.b64 %rd21, %rd5, 2; - add.s64 %rd22, %rd20, %rd21; - ld.u32 %r12, [%rd19+4]; + ld.u64 %rd21, [%rd3+8]; + shl.b64 %rd22, %rd5, 2; + add.s64 %rd23, %rd21, %rd22; + ld.u32 %r12, [%rd20+4]; cvt.u64.u32 %rd6, %r12; mul.wide.u32 %rd7, %r12, 3; - shl.b64 %rd23, %rd7, 2; - add.s64 %rd24, %rd20, %rd23; - ld.u32 %r13, [%rd19+8]; + shl.b64 %rd24, %rd7, 2; + add.s64 %rd25, %rd21, %rd24; + ld.u32 %r13, [%rd20+8]; cvt.u64.u32 %rd8, %r13; mul.wide.u32 %rd9, %r13, 3; - shl.b64 %rd25, %rd9, 2; - add.s64 %rd26, %rd20, %rd25; - ld.f32 %f58, [%rd24]; - ld.f32 %f59, [%rd22]; - sub.f32 %f1, %f58, %f59; - ld.f32 %f60, [%rd24+4]; - ld.f32 %f61, [%rd22+4]; - sub.f32 %f2, %f60, %f61; - ld.f32 %f62, [%rd24+8]; - ld.f32 %f63, [%rd22+8]; - sub.f32 %f3, %f62, %f63; - mov.f32 %f64, 0f3F800000; - sub.f32 %f65, %f64, %f175; - sub.f32 %f6, %f65, %f176; - ld.f32 %f66, [%rd26]; - sub.f32 %f7, %f66, %f59; - ld.f32 %f67, [%rd26+4]; - sub.f32 %f8, %f67, %f61; - ld.f32 %f68, [%rd26+8]; - sub.f32 %f9, %f68, %f63; - mul.f32 %f69, %f58, %f175; - mul.f32 %f70, %f60, %f175; - mul.f32 %f71, %f62, %f175; - fma.rn.f32 %f72, %f59, %f6, %f69; - fma.rn.f32 %f73, %f61, %f6, %f70; - fma.rn.f32 %f74, %f63, %f6, %f71; - fma.rn.f32 %f10, %f66, %f176, %f72; - fma.rn.f32 %f11, %f67, %f176, %f73; - fma.rn.f32 %f12, %f68, %f176, %f74; - mul.f32 %f75, %f2, %f9; - mul.f32 %f76, %f3, %f8; - sub.f32 %f77, %f75, %f76; - mul.f32 %f78, %f3, %f7; - mul.f32 %f79, %f1, %f9; - sub.f32 %f80, %f78, %f79; - mul.f32 %f81, %f1, %f8; - mul.f32 %f82, %f2, %f7; - sub.f32 %f83, %f81, %f82; - mul.f32 %f84, %f77, %f77; - fma.rn.f32 %f85, %f80, %f80, %f84; - fma.rn.f32 %f86, %f83, %f83, %f85; - sqrt.rn.f32 %f87, %f86; - div.rn.f32 %f13, %f77, %f87; - div.rn.f32 %f14, %f80, %f87; - div.rn.f32 %f15, %f83, %f87; - mov.b32 %r14, %f15; + shl.b64 %rd26, %rd9, 2; + add.s64 %rd27, %rd21, %rd26; + ld.f32 %f4, [%rd23]; + ld.f32 %f5, [%rd23+4]; + ld.f32 %f6, [%rd23+8]; + ld.f32 %f7, [%rd25]; + mul.f32 %f80, %f7, %f191; + ld.f32 %f8, [%rd25+4]; + mul.f32 %f81, %f8, %f191; + ld.f32 %f9, [%rd25+8]; + mul.f32 %f82, %f9, %f191; + fma.rn.f32 %f83, %f4, %f3, %f80; + fma.rn.f32 %f84, %f5, %f3, %f81; + fma.rn.f32 %f85, %f6, %f3, %f82; + ld.f32 %f10, [%rd27]; + ld.f32 %f11, [%rd27+4]; + ld.f32 %f12, [%rd27+8]; + fma.rn.f32 %f13, %f10, %f192, %f83; + fma.rn.f32 %f14, %f11, %f192, %f84; + fma.rn.f32 %f15, %f12, %f192, %f85; + ld.const.u8 %rs1, [params+248]; + setp.eq.s16 %p2, %rs1, 0; + @%p2 bra BB4_9; + + sub.f32 %f16, %f7, %f4; + sub.f32 %f17, %f12, %f6; + sub.f32 %f18, %f8, %f5; + mul.f32 %f86, %f18, %f17; + sub.f32 %f19, %f11, %f5; + sub.f32 %f20, %f9, %f6; + mul.f32 %f87, %f20, %f19; + sub.f32 %f88, %f86, %f87; + sub.f32 %f21, %f10, %f4; + mul.f32 %f89, %f20, %f21; + mul.f32 %f90, %f16, %f17; + sub.f32 %f91, %f89, %f90; + mul.f32 %f92, %f16, %f19; + mul.f32 %f93, %f18, %f21; + sub.f32 %f94, %f92, %f93; + mul.f32 %f95, %f88, %f88; + fma.rn.f32 %f96, %f91, %f91, %f95; + fma.rn.f32 %f97, %f94, %f94, %f96; + sqrt.rn.f32 %f98, %f97; + div.rn.f32 %f188, %f88, %f98; + div.rn.f32 %f189, %f91, %f98; + div.rn.f32 %f190, %f94, %f98; + mov.b32 %r14, %f190; and.b32 %r15, %r14, -2147483648; or.b32 %r16, %r15, 1065353216; - mov.b32 %f16, %r16; - add.f32 %f88, %f15, %f16; - mov.f32 %f89, 0fBF800000; - div.rn.f32 %f90, %f89, %f88; - mul.f32 %f91, %f13, %f14; - mul.f32 %f169, %f91, %f90; - mul.f32 %f92, %f13, %f13; - mul.f32 %f93, %f92, %f90; - fma.rn.f32 %f172, %f93, %f16, 0f3F800000; - mul.f32 %f94, %f14, %f14; - fma.rn.f32 %f170, %f94, %f90, %f16; + mov.b32 %f25, %r16; + add.f32 %f99, %f190, %f25; + mov.f32 %f100, 0fBF800000; + div.rn.f32 %f101, %f100, %f99; + mul.f32 %f102, %f188, %f189; + mul.f32 %f179, %f102, %f101; + mul.f32 %f103, %f188, %f188; + mul.f32 %f104, %f103, %f101; + fma.rn.f32 %f182, %f104, %f25, 0f3F800000; + mul.f32 %f105, %f189, %f189; + fma.rn.f32 %f180, %f105, %f101, %f25; ld.u64 %rd10, [%rd3+16]; - setp.eq.s64 %p2, %rd10, 0; - @%p2 bra BB4_4; - - add.s64 %rd28, %rd10, %rd21; - add.s64 %rd30, %rd10, %rd23; - add.s64 %rd32, %rd10, %rd25; - ld.f32 %f95, [%rd28]; - ld.f32 %f96, [%rd28+4]; - ld.f32 %f97, [%rd28+8]; - ld.f32 %f98, [%rd30]; - mul.f32 %f99, %f98, %f175; - ld.f32 %f100, [%rd30+4]; - mul.f32 %f101, %f100, %f175; - ld.f32 %f102, [%rd30+8]; - mul.f32 %f103, %f102, %f175; - fma.rn.f32 %f104, %f95, %f6, %f99; - fma.rn.f32 %f105, %f96, %f6, %f101; - fma.rn.f32 %f106, %f97, %f6, %f103; - ld.f32 %f107, [%rd32]; - ld.f32 %f108, [%rd32+4]; - ld.f32 %f109, [%rd32+8]; - fma.rn.f32 %f110, %f107, %f176, %f104; - fma.rn.f32 %f111, %f108, %f176, %f105; - fma.rn.f32 %f112, %f109, %f176, %f106; - mul.f32 %f113, %f110, %f110; - fma.rn.f32 %f114, %f111, %f111, %f113; - fma.rn.f32 %f115, %f112, %f112, %f114; - sqrt.rn.f32 %f116, %f115; - div.rn.f32 %f166, %f110, %f116; - div.rn.f32 %f167, %f111, %f116; - div.rn.f32 %f168, %f112, %f116; - bra.uni BB4_5; - -BB4_4: - mov.f32 %f166, %f13; - mov.f32 %f167, %f14; - mov.f32 %f168, %f15; + setp.eq.s64 %p3, %rd10, 0; + @%p3 bra BB4_5; + + add.s64 %rd29, %rd10, %rd22; + add.s64 %rd31, %rd10, %rd24; + add.s64 %rd33, %rd10, %rd26; + ld.f32 %f106, [%rd29]; + ld.f32 %f107, [%rd29+4]; + ld.f32 %f108, [%rd29+8]; + ld.f32 %f109, [%rd31]; + mul.f32 %f110, %f109, %f191; + ld.f32 %f111, [%rd31+4]; + mul.f32 %f112, %f111, %f191; + ld.f32 %f113, [%rd31+8]; + mul.f32 %f114, %f113, %f191; + fma.rn.f32 %f115, %f106, %f3, %f110; + fma.rn.f32 %f116, %f107, %f3, %f112; + fma.rn.f32 %f117, %f108, %f3, %f114; + ld.f32 %f118, [%rd33]; + ld.f32 %f119, [%rd33+4]; + ld.f32 %f120, [%rd33+8]; + fma.rn.f32 %f121, %f118, %f192, %f115; + fma.rn.f32 %f122, %f119, %f192, %f116; + fma.rn.f32 %f123, %f120, %f192, %f117; + mul.f32 %f124, %f121, %f121; + fma.rn.f32 %f125, %f122, %f122, %f124; + fma.rn.f32 %f126, %f123, %f123, %f125; + sqrt.rn.f32 %f127, %f126; + div.rn.f32 %f185, %f121, %f127; + div.rn.f32 %f186, %f122, %f127; + div.rn.f32 %f187, %f123, %f127; + bra.uni BB4_6; BB4_5: - mul.f32 %f173, %f169, %f16; - mul.f32 %f117, %f13, %f16; - neg.f32 %f174, %f117; - neg.f32 %f171, %f14; + mov.f32 %f185, %f188; + mov.f32 %f186, %f189; + mov.f32 %f187, %f190; + +BB4_6: + mul.f32 %f183, %f179, %f25; + mul.f32 %f128, %f188, %f25; + neg.f32 %f184, %f128; + neg.f32 %f181, %f189; ld.u64 %rd11, [%rd3+24]; - setp.eq.s64 %p3, %rd11, 0; - @%p3 bra BB4_8; - - shl.b64 %rd33, %rd4, 3; - add.s64 %rd34, %rd11, %rd33; - shl.b64 %rd35, %rd6, 3; - add.s64 %rd36, %rd11, %rd35; - shl.b64 %rd37, %rd8, 3; - add.s64 %rd38, %rd11, %rd37; - ld.f32 %f118, [%rd34]; - ld.f32 %f119, [%rd34+4]; - ld.f32 %f120, [%rd36]; - mul.f32 %f121, %f120, %f175; - ld.f32 %f122, [%rd36+4]; - mul.f32 %f123, %f122, %f175; - fma.rn.f32 %f124, %f118, %f6, %f121; - fma.rn.f32 %f125, %f119, %f6, %f123; - ld.f32 %f126, [%rd38]; - ld.f32 %f127, [%rd38+4]; - fma.rn.f32 %f175, %f126, %f176, %f124; - fma.rn.f32 %f176, %f127, %f176, %f125; - sub.f32 %f37, %f120, %f118; - sub.f32 %f38, %f122, %f119; - sub.f32 %f39, %f126, %f118; - sub.f32 %f40, %f127, %f119; - mul.f32 %f128, %f37, %f40; - mul.f32 %f129, %f38, %f39; - sub.f32 %f41, %f128, %f129; - setp.eq.f32 %p4, %f41, 0f00000000; - @%p4 bra BB4_8; - - rcp.rn.f32 %f130, %f41; - mul.f32 %f131, %f38, %f7; - mul.f32 %f132, %f40, %f1; - sub.f32 %f133, %f132, %f131; - mul.f32 %f134, %f38, %f8; - mul.f32 %f135, %f40, %f2; - sub.f32 %f136, %f135, %f134; - mul.f32 %f137, %f38, %f9; - mul.f32 %f138, %f40, %f3; - sub.f32 %f139, %f138, %f137; - mul.f32 %f172, %f133, %f130; - mul.f32 %f173, %f136, %f130; - mul.f32 %f174, %f139, %f130; - mul.f32 %f140, %f1, %f39; - mul.f32 %f141, %f2, %f39; - mul.f32 %f142, %f3, %f39; - mul.f32 %f143, %f37, %f7; - sub.f32 %f144, %f143, %f140; - mul.f32 %f145, %f37, %f8; - sub.f32 %f146, %f145, %f141; - mul.f32 %f147, %f37, %f9; - sub.f32 %f148, %f147, %f142; - mul.f32 %f169, %f144, %f130; - mul.f32 %f170, %f146, %f130; - mul.f32 %f171, %f148, %f130; - -BB4_8: - // inline asm - call (%f149), _optix_get_world_ray_origin_x, (); - // inline asm - // inline asm - call (%f150), _optix_get_world_ray_origin_y, (); - // inline asm - // inline asm - call (%f151), _optix_get_world_ray_origin_z, (); - // inline asm - // inline asm - call (%f152), _optix_get_world_ray_direction_x, (); - // inline asm - // inline asm - call (%f153), _optix_get_world_ray_direction_y, (); - // inline asm - // inline asm - call (%f154), _optix_get_world_ray_direction_z, (); - // inline asm - sub.f32 %f155, %f10, %f149; - sub.f32 %f156, %f11, %f150; - sub.f32 %f157, %f12, %f151; - mul.f32 %f158, %f155, %f155; - fma.rn.f32 %f159, %f156, %f156, %f158; - fma.rn.f32 %f160, %f157, %f157, %f159; - mul.f32 %f161, %f152, %f152; - fma.rn.f32 %f162, %f153, %f153, %f161; - fma.rn.f32 %f163, %f154, %f154, %f162; - div.rn.f32 %f164, %f160, %f163; - sqrt.rn.f32 %f165, %f164; - ld.u64 %rd39, [%rd15]; - ld.const.u64 %rd40, [params+216]; - cvta.to.global.u64 %rd41, %rd40; - mul.wide.u32 %rd42, %r1, 8; - add.s64 %rd43, %rd41, %rd42; - st.global.u64 [%rd43], %rd39; - ld.const.u64 %rd44, [params+224]; - cvta.to.global.u64 %rd45, %rd44; - mul.wide.u32 %rd46, %r1, 4; - add.s64 %rd47, %rd45, %rd46; - st.global.u32 [%rd47], %r10; - ld.const.u64 %rd48, [params+80]; - cvta.to.global.u64 %rd49, %rd48; - add.s64 %rd50, %rd49, %rd46; - st.global.f32 [%rd50], %f175; - ld.const.u64 %rd51, [params+88]; - cvta.to.global.u64 %rd52, %rd51; - add.s64 %rd53, %rd52, %rd46; - st.global.f32 [%rd53], %f176; - ld.const.u64 %rd54, [params+96]; - cvta.to.global.u64 %rd55, %rd54; - add.s64 %rd56, %rd55, %rd46; - st.global.f32 [%rd56], %f13; - ld.const.u64 %rd57, [params+104]; - cvta.to.global.u64 %rd58, %rd57; - add.s64 %rd59, %rd58, %rd46; - st.global.f32 [%rd59], %f14; - ld.const.u64 %rd60, [params+112]; - cvta.to.global.u64 %rd61, %rd60; - add.s64 %rd62, %rd61, %rd46; - st.global.f32 [%rd62], %f15; - ld.const.u64 %rd63, [params+120]; - cvta.to.global.u64 %rd64, %rd63; - add.s64 %rd65, %rd64, %rd46; - st.global.f32 [%rd65], %f166; - ld.const.u64 %rd66, [params+128]; - cvta.to.global.u64 %rd67, %rd66; - add.s64 %rd68, %rd67, %rd46; - st.global.f32 [%rd68], %f167; - ld.const.u64 %rd69, [params+136]; - cvta.to.global.u64 %rd70, %rd69; - add.s64 %rd71, %rd70, %rd46; - st.global.f32 [%rd71], %f168; - ld.const.u64 %rd72, [params+144]; - cvta.to.global.u64 %rd73, %rd72; - add.s64 %rd74, %rd73, %rd46; - st.global.f32 [%rd74], %f10; - ld.const.u64 %rd75, [params+152]; - cvta.to.global.u64 %rd76, %rd75; - add.s64 %rd77, %rd76, %rd46; - st.global.f32 [%rd77], %f11; - ld.const.u64 %rd78, [params+160]; - cvta.to.global.u64 %rd79, %rd78; - add.s64 %rd80, %rd79, %rd46; - st.global.f32 [%rd80], %f12; - ld.const.u64 %rd81, [params+168]; - cvta.to.global.u64 %rd82, %rd81; - add.s64 %rd83, %rd82, %rd46; - st.global.f32 [%rd83], %f172; - ld.const.u64 %rd84, [params+176]; - cvta.to.global.u64 %rd85, %rd84; - add.s64 %rd86, %rd85, %rd46; - st.global.f32 [%rd86], %f173; - ld.const.u64 %rd87, [params+184]; - cvta.to.global.u64 %rd88, %rd87; - add.s64 %rd89, %rd88, %rd46; - st.global.f32 [%rd89], %f174; - ld.const.u64 %rd90, [params+192]; - cvta.to.global.u64 %rd91, %rd90; - add.s64 %rd92, %rd91, %rd46; - st.global.f32 [%rd92], %f169; - ld.const.u64 %rd93, [params+200]; - cvta.to.global.u64 %rd94, %rd93; - add.s64 %rd95, %rd94, %rd46; - st.global.f32 [%rd95], %f170; - ld.const.u64 %rd96, [params+208]; - cvta.to.global.u64 %rd97, %rd96; - add.s64 %rd98, %rd97, %rd46; - st.global.f32 [%rd98], %f171; - ld.const.u64 %rd99, [params+72]; - cvta.to.global.u64 %rd100, %rd99; - add.s64 %rd101, %rd100, %rd46; - st.global.f32 [%rd101], %f165; + setp.eq.s64 %p4, %rd11, 0; + @%p4 bra BB4_9; + + shl.b64 %rd34, %rd4, 3; + add.s64 %rd35, %rd11, %rd34; + shl.b64 %rd36, %rd6, 3; + add.s64 %rd37, %rd11, %rd36; + shl.b64 %rd38, %rd8, 3; + add.s64 %rd39, %rd11, %rd38; + ld.f32 %f129, [%rd35]; + ld.f32 %f130, [%rd35+4]; + ld.f32 %f131, [%rd37]; + mul.f32 %f132, %f131, %f191; + ld.f32 %f133, [%rd37+4]; + mul.f32 %f134, %f133, %f191; + fma.rn.f32 %f135, %f129, %f3, %f132; + fma.rn.f32 %f136, %f130, %f3, %f134; + ld.f32 %f137, [%rd39]; + ld.f32 %f138, [%rd39+4]; + fma.rn.f32 %f191, %f137, %f192, %f135; + fma.rn.f32 %f192, %f138, %f192, %f136; + sub.f32 %f49, %f131, %f129; + sub.f32 %f50, %f133, %f130; + sub.f32 %f51, %f137, %f129; + sub.f32 %f52, %f138, %f130; + mul.f32 %f139, %f49, %f52; + mul.f32 %f140, %f50, %f51; + sub.f32 %f53, %f139, %f140; + setp.eq.f32 %p5, %f53, 0f00000000; + @%p5 bra BB4_9; + + rcp.rn.f32 %f141, %f53; + mul.f32 %f142, %f50, %f21; + mul.f32 %f143, %f52, %f16; + sub.f32 %f144, %f143, %f142; + mul.f32 %f145, %f50, %f19; + mul.f32 %f146, %f52, %f18; + sub.f32 %f147, %f146, %f145; + mul.f32 %f148, %f50, %f17; + mul.f32 %f149, %f52, %f20; + sub.f32 %f150, %f149, %f148; + mul.f32 %f182, %f144, %f141; + mul.f32 %f183, %f147, %f141; + mul.f32 %f184, %f150, %f141; + mul.f32 %f151, %f16, %f51; + mul.f32 %f152, %f18, %f51; + mul.f32 %f153, %f20, %f51; + mul.f32 %f154, %f49, %f21; + sub.f32 %f155, %f154, %f151; + mul.f32 %f156, %f49, %f19; + sub.f32 %f157, %f156, %f152; + mul.f32 %f158, %f49, %f17; + sub.f32 %f159, %f158, %f153; + mul.f32 %f179, %f155, %f141; + mul.f32 %f180, %f157, %f141; + mul.f32 %f181, %f159, %f141; BB4_9: + // inline asm + call (%f160), _optix_get_world_ray_origin_x, (); + // inline asm + // inline asm + call (%f161), _optix_get_world_ray_origin_y, (); + // inline asm + // inline asm + call (%f162), _optix_get_world_ray_origin_z, (); + // inline asm + // inline asm + call (%f163), _optix_get_world_ray_direction_x, (); + // inline asm + // inline asm + call (%f164), _optix_get_world_ray_direction_y, (); + // inline asm + // inline asm + call (%f165), _optix_get_world_ray_direction_z, (); + // inline asm + sub.f32 %f166, %f13, %f160; + sub.f32 %f167, %f14, %f161; + sub.f32 %f168, %f15, %f162; + mul.f32 %f169, %f166, %f166; + fma.rn.f32 %f170, %f167, %f167, %f169; + fma.rn.f32 %f171, %f168, %f168, %f170; + mul.f32 %f172, %f163, %f163; + fma.rn.f32 %f173, %f164, %f164, %f172; + fma.rn.f32 %f174, %f165, %f165, %f173; + div.rn.f32 %f175, %f171, %f174; + sqrt.rn.f32 %f74, %f175; + ld.u64 %rd40, [%rd16]; + ld.const.u64 %rd41, [params+216]; + cvta.to.global.u64 %rd42, %rd41; + cvt.u64.u32 %rd12, %r1; + mul.wide.u32 %rd43, %r1, 8; + add.s64 %rd44, %rd42, %rd43; + st.global.u64 [%rd44], %rd40; + ld.const.u64 %rd45, [params+224]; + cvta.to.global.u64 %rd46, %rd45; + mul.wide.u32 %rd47, %r1, 4; + add.s64 %rd48, %rd46, %rd47; + st.global.u32 [%rd48], %r10; + ld.const.u64 %rd49, [params+80]; + cvta.to.global.u64 %rd50, %rd49; + add.s64 %rd51, %rd50, %rd47; + st.global.f32 [%rd51], %f191; + ld.const.u64 %rd52, [params+88]; + cvta.to.global.u64 %rd53, %rd52; + add.s64 %rd54, %rd53, %rd47; + st.global.f32 [%rd54], %f192; + ld.const.u64 %rd55, [params+144]; + cvta.to.global.u64 %rd56, %rd55; + add.s64 %rd57, %rd56, %rd47; + st.global.f32 [%rd57], %f13; + ld.const.u64 %rd58, [params+152]; + cvta.to.global.u64 %rd59, %rd58; + add.s64 %rd60, %rd59, %rd47; + st.global.f32 [%rd60], %f14; + ld.const.u64 %rd61, [params+160]; + cvta.to.global.u64 %rd62, %rd61; + add.s64 %rd63, %rd62, %rd47; + st.global.f32 [%rd63], %f15; + @%p2 bra BB4_11; + + ld.const.u64 %rd64, [params+96]; + cvta.to.global.u64 %rd65, %rd64; + shl.b64 %rd66, %rd12, 2; + add.s64 %rd67, %rd65, %rd66; + st.global.f32 [%rd67], %f188; + ld.const.u64 %rd68, [params+104]; + cvta.to.global.u64 %rd69, %rd68; + add.s64 %rd70, %rd69, %rd66; + st.global.f32 [%rd70], %f189; + ld.const.u64 %rd71, [params+112]; + cvta.to.global.u64 %rd72, %rd71; + add.s64 %rd73, %rd72, %rd66; + st.global.f32 [%rd73], %f190; + ld.const.u64 %rd74, [params+120]; + cvta.to.global.u64 %rd75, %rd74; + add.s64 %rd76, %rd75, %rd66; + st.global.f32 [%rd76], %f185; + ld.const.u64 %rd77, [params+128]; + cvta.to.global.u64 %rd78, %rd77; + add.s64 %rd79, %rd78, %rd66; + st.global.f32 [%rd79], %f186; + ld.const.u64 %rd80, [params+136]; + cvta.to.global.u64 %rd81, %rd80; + add.s64 %rd82, %rd81, %rd66; + st.global.f32 [%rd82], %f187; + ld.const.u64 %rd83, [params+168]; + cvta.to.global.u64 %rd84, %rd83; + add.s64 %rd85, %rd84, %rd66; + st.global.f32 [%rd85], %f182; + ld.const.u64 %rd86, [params+176]; + cvta.to.global.u64 %rd87, %rd86; + add.s64 %rd88, %rd87, %rd66; + st.global.f32 [%rd88], %f183; + ld.const.u64 %rd89, [params+184]; + cvta.to.global.u64 %rd90, %rd89; + add.s64 %rd91, %rd90, %rd66; + st.global.f32 [%rd91], %f184; + ld.const.u64 %rd92, [params+192]; + cvta.to.global.u64 %rd93, %rd92; + add.s64 %rd94, %rd93, %rd66; + st.global.f32 [%rd94], %f179; + ld.const.u64 %rd95, [params+200]; + cvta.to.global.u64 %rd96, %rd95; + add.s64 %rd97, %rd96, %rd66; + st.global.f32 [%rd97], %f180; + ld.const.u64 %rd98, [params+208]; + cvta.to.global.u64 %rd99, %rd98; + add.s64 %rd100, %rd99, %rd66; + st.global.f32 [%rd100], %f181; + +BB4_11: + ld.const.u64 %rd101, [params+72]; + cvta.to.global.u64 %rd102, %rd101; + shl.b64 %rd103, %rd12, 2; + add.s64 %rd104, %rd102, %rd103; + st.global.f32 [%rd104], %f74; + +BB4_12: ret; } @@ -1368,11 +1406,11 @@ BB5_3: ) { - .reg .pred %p<2>; - .reg .b16 %rs<2>; - .reg .f32 %f<78>; + .reg .pred %p<3>; + .reg .b16 %rs<3>; + .reg .f32 %f<81>; .reg .b32 %r<10>; - .reg .b64 %rd<70>; + .reg .b64 %rd<72>; // inline asm @@ -1401,72 +1439,72 @@ BB5_3: add.s64 %rd4, %rd3, %rd2; mov.u16 %rs1, 1; st.global.u8 [%rd4], %rs1; - bra.uni BB6_3; + bra.uni BB6_5; BB6_2: // inline asm call (%rd5), _optix_get_sbt_data_ptr_64, (); // inline asm ld.u64 %rd6, [%rd5+8]; - ld.v4.f32 {%f7, %f8, %f9, %f10}, [%rd6+160]; - ld.v4.f32 {%f14, %f15, %f16, %f17}, [%rd6+176]; - ld.v4.f32 {%f21, %f22, %f23, %f24}, [%rd6+192]; - // inline asm - call (%f1), _optix_get_world_ray_origin_x, (); - // inline asm - // inline asm - call (%f2), _optix_get_world_ray_origin_y, (); - // inline asm - // inline asm - call (%f3), _optix_get_world_ray_origin_z, (); - // inline asm - // inline asm - call (%f4), _optix_get_world_ray_direction_x, (); - // inline asm - // inline asm - call (%f5), _optix_get_world_ray_direction_y, (); - // inline asm - // inline asm - call (%f6), _optix_get_world_ray_direction_z, (); - // inline asm - ld.v4.f32 {%f28, %f29, %f30, %f31}, [%rd6+80]; - ld.f32 %f35, [%rd6+32]; - fma.rn.f32 %f36, %f1, %f35, %f28; - ld.f32 %f37, [%rd6+36]; - fma.rn.f32 %f38, %f1, %f37, %f29; - ld.f32 %f39, [%rd6+40]; - fma.rn.f32 %f40, %f1, %f39, %f30; - ld.f32 %f41, [%rd6+48]; - fma.rn.f32 %f42, %f2, %f41, %f36; - ld.f32 %f43, [%rd6+52]; - fma.rn.f32 %f44, %f2, %f43, %f38; - ld.f32 %f45, [%rd6+56]; - fma.rn.f32 %f46, %f2, %f45, %f40; - ld.f32 %f47, [%rd6+64]; - fma.rn.f32 %f48, %f3, %f47, %f42; - ld.f32 %f49, [%rd6+68]; - fma.rn.f32 %f50, %f3, %f49, %f44; - ld.f32 %f51, [%rd6+72]; - fma.rn.f32 %f52, %f3, %f51, %f46; - ld.v4.f32 {%f53, %f54, %f55, %f56}, [%rd6+32]; - mul.f32 %f60, %f4, %f53; - mul.f32 %f61, %f4, %f54; - mul.f32 %f62, %f4, %f55; - fma.rn.f32 %f63, %f5, %f41, %f60; - fma.rn.f32 %f64, %f5, %f43, %f61; - fma.rn.f32 %f65, %f5, %f45, %f62; - fma.rn.f32 %f66, %f6, %f47, %f63; - fma.rn.f32 %f67, %f6, %f49, %f64; - fma.rn.f32 %f68, %f6, %f51, %f65; - neg.f32 %f69, %f52; - div.rn.f32 %f70, %f69, %f68; - fma.rn.f32 %f71, %f4, %f70, %f1; - fma.rn.f32 %f72, %f5, %f70, %f2; - fma.rn.f32 %f73, %f6, %f70, %f3; - fma.rn.f32 %f74, %f70, %f66, %f48; - fma.rn.f32 %f75, %f70, %f67, %f50; - fma.rn.f32 %f76, %f74, 0f3F000000, 0f3F000000; - fma.rn.f32 %f77, %f75, 0f3F000000, 0f3F000000; + ld.v4.f32 {%f20, %f21, %f22, %f23}, [%rd6+160]; + ld.v4.f32 {%f24, %f25, %f26, %f27}, [%rd6+176]; + ld.v4.f32 {%f28, %f29, %f30, %f31}, [%rd6+192]; + // inline asm + call (%f14), _optix_get_world_ray_origin_x, (); + // inline asm + // inline asm + call (%f15), _optix_get_world_ray_origin_y, (); + // inline asm + // inline asm + call (%f16), _optix_get_world_ray_origin_z, (); + // inline asm + // inline asm + call (%f17), _optix_get_world_ray_direction_x, (); + // inline asm + // inline asm + call (%f18), _optix_get_world_ray_direction_y, (); + // inline asm + // inline asm + call (%f19), _optix_get_world_ray_direction_z, (); + // inline asm + ld.v4.f32 {%f32, %f33, %f34, %f35}, [%rd6+80]; + ld.f32 %f39, [%rd6+32]; + fma.rn.f32 %f40, %f14, %f39, %f32; + ld.f32 %f41, [%rd6+36]; + fma.rn.f32 %f42, %f14, %f41, %f33; + ld.f32 %f43, [%rd6+40]; + fma.rn.f32 %f44, %f14, %f43, %f34; + ld.f32 %f45, [%rd6+48]; + fma.rn.f32 %f46, %f15, %f45, %f40; + ld.f32 %f47, [%rd6+52]; + fma.rn.f32 %f48, %f15, %f47, %f42; + ld.f32 %f49, [%rd6+56]; + fma.rn.f32 %f50, %f15, %f49, %f44; + ld.f32 %f51, [%rd6+64]; + fma.rn.f32 %f52, %f16, %f51, %f46; + ld.f32 %f53, [%rd6+68]; + fma.rn.f32 %f54, %f16, %f53, %f48; + ld.f32 %f55, [%rd6+72]; + fma.rn.f32 %f56, %f16, %f55, %f50; + ld.v4.f32 {%f57, %f58, %f59, %f60}, [%rd6+32]; + mul.f32 %f64, %f17, %f57; + mul.f32 %f65, %f17, %f58; + mul.f32 %f66, %f17, %f59; + fma.rn.f32 %f67, %f18, %f45, %f64; + fma.rn.f32 %f68, %f18, %f47, %f65; + fma.rn.f32 %f69, %f18, %f49, %f66; + fma.rn.f32 %f70, %f19, %f51, %f67; + fma.rn.f32 %f71, %f19, %f53, %f68; + fma.rn.f32 %f72, %f19, %f55, %f69; + neg.f32 %f73, %f56; + div.rn.f32 %f13, %f73, %f72; + fma.rn.f32 %f74, %f17, %f13, %f14; + fma.rn.f32 %f75, %f18, %f13, %f15; + fma.rn.f32 %f76, %f19, %f13, %f16; + fma.rn.f32 %f77, %f13, %f70, %f52; + fma.rn.f32 %f78, %f13, %f71, %f54; + fma.rn.f32 %f79, %f77, 0f3F000000, 0f3F000000; + fma.rn.f32 %f80, %f78, 0f3F000000, 0f3F000000; ld.u64 %rd7, [%rd5]; // inline asm call (%r9), _optix_read_primitive_idx, (); @@ -1484,77 +1522,83 @@ BB6_2: ld.const.u64 %rd16, [params+80]; cvta.to.global.u64 %rd17, %rd16; add.s64 %rd18, %rd17, %rd14; - st.global.f32 [%rd18], %f76; + st.global.f32 [%rd18], %f79; ld.const.u64 %rd19, [params+88]; cvta.to.global.u64 %rd20, %rd19; add.s64 %rd21, %rd20, %rd14; - st.global.f32 [%rd21], %f77; - ld.const.u64 %rd22, [params+96]; + st.global.f32 [%rd21], %f80; + ld.const.u64 %rd22, [params+144]; cvta.to.global.u64 %rd23, %rd22; add.s64 %rd24, %rd23, %rd14; - st.global.f32 [%rd24], %f7; - ld.const.u64 %rd25, [params+104]; + st.global.f32 [%rd24], %f74; + ld.const.u64 %rd25, [params+152]; cvta.to.global.u64 %rd26, %rd25; add.s64 %rd27, %rd26, %rd14; - st.global.f32 [%rd27], %f8; - ld.const.u64 %rd28, [params+112]; + st.global.f32 [%rd27], %f75; + ld.const.u64 %rd28, [params+160]; cvta.to.global.u64 %rd29, %rd28; add.s64 %rd30, %rd29, %rd14; - st.global.f32 [%rd30], %f9; - ld.const.u64 %rd31, [params+120]; + st.global.f32 [%rd30], %f76; + ld.const.u8 %rs2, [params+248]; + setp.eq.s16 %p2, %rs2, 0; + @%p2 bra BB6_4; + + ld.const.u64 %rd31, [params+96]; cvta.to.global.u64 %rd32, %rd31; - add.s64 %rd33, %rd32, %rd14; - st.global.f32 [%rd33], %f7; - ld.const.u64 %rd34, [params+128]; - cvta.to.global.u64 %rd35, %rd34; - add.s64 %rd36, %rd35, %rd14; - st.global.f32 [%rd36], %f8; - ld.const.u64 %rd37, [params+136]; - cvta.to.global.u64 %rd38, %rd37; - add.s64 %rd39, %rd38, %rd14; - st.global.f32 [%rd39], %f9; - ld.const.u64 %rd40, [params+144]; - cvta.to.global.u64 %rd41, %rd40; - add.s64 %rd42, %rd41, %rd14; - st.global.f32 [%rd42], %f71; - ld.const.u64 %rd43, [params+152]; - cvta.to.global.u64 %rd44, %rd43; - add.s64 %rd45, %rd44, %rd14; - st.global.f32 [%rd45], %f72; - ld.const.u64 %rd46, [params+160]; - cvta.to.global.u64 %rd47, %rd46; - add.s64 %rd48, %rd47, %rd14; - st.global.f32 [%rd48], %f73; - ld.const.u64 %rd49, [params+168]; - cvta.to.global.u64 %rd50, %rd49; - add.s64 %rd51, %rd50, %rd14; - st.global.f32 [%rd51], %f14; - ld.const.u64 %rd52, [params+176]; - cvta.to.global.u64 %rd53, %rd52; - add.s64 %rd54, %rd53, %rd14; - st.global.f32 [%rd54], %f15; - ld.const.u64 %rd55, [params+184]; - cvta.to.global.u64 %rd56, %rd55; - add.s64 %rd57, %rd56, %rd14; - st.global.f32 [%rd57], %f16; - ld.const.u64 %rd58, [params+192]; - cvta.to.global.u64 %rd59, %rd58; - add.s64 %rd60, %rd59, %rd14; - st.global.f32 [%rd60], %f21; - ld.const.u64 %rd61, [params+200]; - cvta.to.global.u64 %rd62, %rd61; - add.s64 %rd63, %rd62, %rd14; - st.global.f32 [%rd63], %f22; - ld.const.u64 %rd64, [params+208]; - cvta.to.global.u64 %rd65, %rd64; - add.s64 %rd66, %rd65, %rd14; - st.global.f32 [%rd66], %f23; - ld.const.u64 %rd67, [params+72]; - cvta.to.global.u64 %rd68, %rd67; - add.s64 %rd69, %rd68, %rd14; - st.global.f32 [%rd69], %f70; + add.s64 %rd34, %rd32, %rd14; + st.global.f32 [%rd34], %f20; + ld.const.u64 %rd35, [params+104]; + cvta.to.global.u64 %rd36, %rd35; + add.s64 %rd37, %rd36, %rd14; + st.global.f32 [%rd37], %f21; + ld.const.u64 %rd38, [params+112]; + cvta.to.global.u64 %rd39, %rd38; + add.s64 %rd40, %rd39, %rd14; + st.global.f32 [%rd40], %f22; + ld.const.u64 %rd41, [params+120]; + cvta.to.global.u64 %rd42, %rd41; + add.s64 %rd43, %rd42, %rd14; + st.global.f32 [%rd43], %f20; + ld.const.u64 %rd44, [params+128]; + cvta.to.global.u64 %rd45, %rd44; + add.s64 %rd46, %rd45, %rd14; + st.global.f32 [%rd46], %f21; + ld.const.u64 %rd47, [params+136]; + cvta.to.global.u64 %rd48, %rd47; + add.s64 %rd49, %rd48, %rd14; + st.global.f32 [%rd49], %f22; + ld.const.u64 %rd50, [params+168]; + cvta.to.global.u64 %rd51, %rd50; + add.s64 %rd52, %rd51, %rd14; + st.global.f32 [%rd52], %f24; + ld.const.u64 %rd53, [params+176]; + cvta.to.global.u64 %rd54, %rd53; + add.s64 %rd55, %rd54, %rd14; + st.global.f32 [%rd55], %f25; + ld.const.u64 %rd56, [params+184]; + cvta.to.global.u64 %rd57, %rd56; + add.s64 %rd58, %rd57, %rd14; + st.global.f32 [%rd58], %f26; + ld.const.u64 %rd59, [params+192]; + cvta.to.global.u64 %rd60, %rd59; + add.s64 %rd61, %rd60, %rd14; + st.global.f32 [%rd61], %f28; + ld.const.u64 %rd62, [params+200]; + cvta.to.global.u64 %rd63, %rd62; + add.s64 %rd64, %rd63, %rd14; + st.global.f32 [%rd64], %f29; + ld.const.u64 %rd65, [params+208]; + cvta.to.global.u64 %rd66, %rd65; + add.s64 %rd67, %rd66, %rd14; + st.global.f32 [%rd67], %f30; + +BB6_4: + ld.const.u64 %rd68, [params+72]; + cvta.to.global.u64 %rd69, %rd68; + add.s64 %rd71, %rd69, %rd14; + st.global.f32 [%rd71], %f13; -BB6_3: +BB6_5: ret; } @@ -1684,12 +1728,12 @@ BB7_6: ) { - .reg .pred %p<16>; - .reg .b16 %rs<3>; - .reg .f32 %f<178>; + .reg .pred %p<18>; + .reg .b16 %rs<4>; + .reg .f32 %f<203>; .reg .b32 %r<22>; .reg .f64 %fd<7>; - .reg .b64 %rd<71>; + .reg .b64 %rd<74>; // inline asm @@ -1713,103 +1757,103 @@ BB7_6: setp.eq.s64 %p1, %rd1, 0; @%p1 bra BB8_2; - cvta.to.global.u64 %rd4, %rd1; - cvt.u64.u32 %rd5, %r1; - add.s64 %rd6, %rd4, %rd5; - mov.u16 %rs1, 1; - st.global.u8 [%rd6], %rs1; - bra.uni BB8_10; + cvta.to.global.u64 %rd5, %rd1; + cvt.u64.u32 %rd6, %r1; + add.s64 %rd7, %rd5, %rd6; + mov.u16 %rs2, 1; + st.global.u8 [%rd7], %rs2; + bra.uni BB8_14; BB8_2: // inline asm - call (%rd7), _optix_get_sbt_data_ptr_64, (); - // inline asm - ld.u64 %rd3, [%rd7+8]; - // inline asm - call (%f21), _optix_get_world_ray_origin_x, (); - // inline asm - // inline asm - call (%f22), _optix_get_world_ray_origin_y, (); - // inline asm - // inline asm - call (%f23), _optix_get_world_ray_origin_z, (); - // inline asm - // inline asm - call (%f24), _optix_get_world_ray_direction_x, (); - // inline asm - // inline asm - call (%f25), _optix_get_world_ray_direction_y, (); - // inline asm - // inline asm - call (%f26), _optix_get_world_ray_direction_z, (); - // inline asm - // inline asm - call (%f27), _optix_get_ray_tmax, (); - // inline asm - fma.rn.f32 %f28, %f27, %f24, %f21; - fma.rn.f32 %f29, %f27, %f25, %f22; - fma.rn.f32 %f30, %f27, %f26, %f23; - ld.f32 %f31, [%rd3+288]; - sub.f32 %f32, %f28, %f31; - ld.f32 %f33, [%rd3+292]; - sub.f32 %f34, %f29, %f33; - ld.f32 %f35, [%rd3+296]; - sub.f32 %f36, %f30, %f35; - mul.f32 %f37, %f32, %f32; - fma.rn.f32 %f38, %f34, %f34, %f37; - fma.rn.f32 %f39, %f36, %f36, %f38; - sqrt.rn.f32 %f40, %f39; - div.rn.f32 %f2, %f32, %f40; - div.rn.f32 %f3, %f34, %f40; - div.rn.f32 %f4, %f36, %f40; - ld.f32 %f41, [%rd3+304]; - fma.rn.f32 %f5, %f41, %f2, %f31; - fma.rn.f32 %f6, %f41, %f3, %f33; - fma.rn.f32 %f7, %f41, %f4, %f35; - ld.v4.f32 {%f42, %f43, %f44, %f45}, [%rd3+208]; - ld.f32 %f49, [%rd3+160]; - fma.rn.f32 %f50, %f5, %f49, %f42; - ld.f32 %f51, [%rd3+164]; - fma.rn.f32 %f52, %f5, %f51, %f43; - ld.f32 %f53, [%rd3+168]; - fma.rn.f32 %f54, %f5, %f53, %f44; - ld.f32 %f55, [%rd3+176]; - fma.rn.f32 %f56, %f6, %f55, %f50; - ld.f32 %f57, [%rd3+180]; - fma.rn.f32 %f58, %f6, %f57, %f52; - ld.f32 %f59, [%rd3+184]; - fma.rn.f32 %f60, %f6, %f59, %f54; - ld.f32 %f61, [%rd3+192]; - fma.rn.f32 %f8, %f7, %f61, %f56; - ld.f32 %f62, [%rd3+196]; - fma.rn.f32 %f9, %f7, %f62, %f58; - ld.f32 %f63, [%rd3+200]; - fma.rn.f32 %f10, %f7, %f63, %f60; - mul.f32 %f64, %f9, %f9; - fma.rn.f32 %f11, %f8, %f8, %f64; - abs.f32 %f65, %f10; - mov.f32 %f66, 0f3F800000; - sub.f32 %f67, %f66, %f65; - mul.f32 %f68, %f67, 0f3F000000; - sqrt.rn.f32 %f69, %f68; - setp.gt.f32 %p2, %f65, 0f3F11EB85; - selp.f32 %f70, %f69, %f65, %p2; - mul.f32 %f71, %f70, %f70; - mov.f32 %f72, 0f3C94D2E9; - mov.f32 %f73, 0f3D53F941; - fma.rn.f32 %f74, %f73, %f71, %f72; - mov.f32 %f75, 0f3D3F841F; - fma.rn.f32 %f76, %f74, %f71, %f75; - mov.f32 %f77, 0f3D994929; - fma.rn.f32 %f78, %f76, %f71, %f77; - mov.f32 %f79, 0f3E2AAB94; - fma.rn.f32 %f80, %f78, %f71, %f79; - mul.f32 %f81, %f71, %f80; - fma.rn.f32 %f82, %f81, %f70, %f70; - add.f32 %f83, %f82, %f82; - mov.f32 %f84, 0f3FC90FDB; - sub.f32 %f85, %f84, %f82; - selp.f32 %f12, %f83, %f85, %p2; + call (%rd8), _optix_get_sbt_data_ptr_64, (); + // inline asm + ld.u64 %rd3, [%rd8+8]; + // inline asm + call (%f42), _optix_get_world_ray_origin_x, (); + // inline asm + // inline asm + call (%f43), _optix_get_world_ray_origin_y, (); + // inline asm + // inline asm + call (%f44), _optix_get_world_ray_origin_z, (); + // inline asm + // inline asm + call (%f45), _optix_get_world_ray_direction_x, (); + // inline asm + // inline asm + call (%f46), _optix_get_world_ray_direction_y, (); + // inline asm + // inline asm + call (%f47), _optix_get_world_ray_direction_z, (); + // inline asm + // inline asm + call (%f48), _optix_get_ray_tmax, (); + // inline asm + fma.rn.f32 %f49, %f48, %f45, %f42; + fma.rn.f32 %f50, %f48, %f46, %f43; + fma.rn.f32 %f51, %f48, %f47, %f44; + ld.f32 %f52, [%rd3+288]; + sub.f32 %f53, %f49, %f52; + ld.f32 %f54, [%rd3+292]; + sub.f32 %f55, %f50, %f54; + ld.f32 %f56, [%rd3+296]; + sub.f32 %f57, %f51, %f56; + mul.f32 %f58, %f53, %f53; + fma.rn.f32 %f59, %f55, %f55, %f58; + fma.rn.f32 %f60, %f57, %f57, %f59; + sqrt.rn.f32 %f61, %f60; + div.rn.f32 %f202, %f53, %f61; + div.rn.f32 %f201, %f55, %f61; + div.rn.f32 %f200, %f57, %f61; + ld.f32 %f62, [%rd3+304]; + fma.rn.f32 %f5, %f62, %f202, %f52; + fma.rn.f32 %f6, %f62, %f201, %f54; + fma.rn.f32 %f7, %f62, %f200, %f56; + ld.v4.f32 {%f63, %f64, %f65, %f66}, [%rd3+208]; + ld.f32 %f70, [%rd3+160]; + fma.rn.f32 %f71, %f5, %f70, %f63; + ld.f32 %f72, [%rd3+164]; + fma.rn.f32 %f73, %f5, %f72, %f64; + ld.f32 %f74, [%rd3+168]; + fma.rn.f32 %f75, %f5, %f74, %f65; + ld.f32 %f76, [%rd3+176]; + fma.rn.f32 %f77, %f6, %f76, %f71; + ld.f32 %f78, [%rd3+180]; + fma.rn.f32 %f79, %f6, %f78, %f73; + ld.f32 %f80, [%rd3+184]; + fma.rn.f32 %f81, %f6, %f80, %f75; + ld.f32 %f82, [%rd3+192]; + fma.rn.f32 %f8, %f7, %f82, %f77; + ld.f32 %f83, [%rd3+196]; + fma.rn.f32 %f9, %f7, %f83, %f79; + ld.f32 %f84, [%rd3+200]; + fma.rn.f32 %f10, %f7, %f84, %f81; + mul.f32 %f85, %f9, %f9; + fma.rn.f32 %f11, %f8, %f8, %f85; + abs.f32 %f86, %f10; + mov.f32 %f87, 0f3F800000; + sub.f32 %f88, %f87, %f86; + mul.f32 %f89, %f88, 0f3F000000; + sqrt.rn.f32 %f90, %f89; + setp.gt.f32 %p2, %f86, 0f3F11EB85; + selp.f32 %f91, %f90, %f86, %p2; + mul.f32 %f92, %f91, %f91; + mov.f32 %f93, 0f3C94D2E9; + mov.f32 %f94, 0f3D53F941; + fma.rn.f32 %f95, %f94, %f92, %f93; + mov.f32 %f96, 0f3D3F841F; + fma.rn.f32 %f97, %f95, %f92, %f96; + mov.f32 %f98, 0f3D994929; + fma.rn.f32 %f99, %f97, %f92, %f98; + mov.f32 %f100, 0f3E2AAB94; + fma.rn.f32 %f101, %f99, %f92, %f100; + mul.f32 %f102, %f92, %f101; + fma.rn.f32 %f103, %f102, %f91, %f91; + add.f32 %f104, %f103, %f103; + mov.f32 %f105, 0f3FC90FDB; + sub.f32 %f106, %f105, %f103; + selp.f32 %f12, %f104, %f106, %p2; abs.f32 %f13, %f8; abs.f32 %f14, %f9; setp.eq.f32 %p3, %f13, 0f00000000; @@ -1825,7 +1869,7 @@ BB8_6: shr.s32 %r18, %r2, 31; and.b32 %r19, %r18, 1078530011; or.b32 %r20, %r19, %r3; - mov.b32 %f176, %r20; + mov.b32 %f189, %r20; bra.uni BB8_7; BB8_3: @@ -1840,199 +1884,212 @@ BB8_5: and.b32 %r15, %r14, 13483017; add.s32 %r16, %r15, 1061752795; or.b32 %r17, %r16, %r3; - mov.b32 %f176, %r17; + mov.b32 %f189, %r17; bra.uni BB8_7; BB8_4: - max.f32 %f86, %f14, %f13; - min.f32 %f87, %f14, %f13; - div.rn.f32 %f88, %f87, %f86; - mul.rn.f32 %f89, %f88, %f88; - mov.f32 %f90, 0fC0B59883; - mov.f32 %f91, 0fBF52C7EA; - fma.rn.f32 %f92, %f89, %f91, %f90; - mov.f32 %f93, 0fC0D21907; - fma.rn.f32 %f94, %f92, %f89, %f93; - mul.f32 %f95, %f89, %f94; - mul.f32 %f96, %f88, %f95; - add.f32 %f97, %f89, 0f41355DC0; - mov.f32 %f98, 0f41E6BD60; - fma.rn.f32 %f99, %f97, %f89, %f98; - mov.f32 %f100, 0f419D92C8; - fma.rn.f32 %f101, %f99, %f89, %f100; - rcp.rn.f32 %f102, %f101; - fma.rn.f32 %f103, %f96, %f102, %f88; - sub.f32 %f105, %f84, %f103; + max.f32 %f107, %f14, %f13; + min.f32 %f108, %f14, %f13; + div.rn.f32 %f109, %f108, %f107; + mul.rn.f32 %f110, %f109, %f109; + mov.f32 %f111, 0fC0B59883; + mov.f32 %f112, 0fBF52C7EA; + fma.rn.f32 %f113, %f110, %f112, %f111; + mov.f32 %f114, 0fC0D21907; + fma.rn.f32 %f115, %f113, %f110, %f114; + mul.f32 %f116, %f110, %f115; + mul.f32 %f117, %f109, %f116; + add.f32 %f118, %f110, 0f41355DC0; + mov.f32 %f119, 0f41E6BD60; + fma.rn.f32 %f120, %f118, %f110, %f119; + mov.f32 %f121, 0f419D92C8; + fma.rn.f32 %f122, %f120, %f110, %f121; + rcp.rn.f32 %f123, %f122; + fma.rn.f32 %f124, %f117, %f123, %f109; + sub.f32 %f126, %f105, %f124; setp.gt.f32 %p9, %f14, %f13; - selp.f32 %f106, %f105, %f103, %p9; - mov.f32 %f107, 0f40490FDB; - sub.f32 %f108, %f107, %f106; + selp.f32 %f127, %f126, %f124, %p9; + mov.f32 %f128, 0f40490FDB; + sub.f32 %f129, %f128, %f127; setp.lt.s32 %p10, %r2, 0; - selp.f32 %f109, %f108, %f106, %p10; - mov.b32 %r12, %f109; + selp.f32 %f130, %f129, %f127, %p10; + mov.b32 %r12, %f130; or.b32 %r13, %r12, %r3; - mov.b32 %f110, %r13; - add.f32 %f111, %f13, %f14; - setp.gtu.f32 %p11, %f111, 0f7F800000; - selp.f32 %f176, %f111, %f110, %p11; + mov.b32 %f131, %r13; + add.f32 %f132, %f13, %f14; + setp.gtu.f32 %p11, %f132, 0f7F800000; + selp.f32 %f189, %f132, %f131, %p11; BB8_7: - setp.geu.f32 %p12, %f176, 0f00000000; + setp.geu.f32 %p12, %f189, 0f00000000; @%p12 bra BB8_9; - cvt.f64.f32 %fd1, %f176; + cvt.f64.f32 %fd1, %f189; add.f64 %fd2, %fd1, 0d401921FB54442D18; - cvt.rn.f32.f64 %f176, %fd2; + cvt.rn.f32.f64 %f189, %fd2; BB8_9: - cvt.f64.f32 %fd3, %f176; - div.rn.f64 %fd4, %fd3, 0d401921FB54442D18; - cvt.rn.f32.f64 %f112, %fd4; - setp.lt.f32 %p13, %f10, 0f00000000; - mov.f32 %f113, 0f40490FDB; - sub.f32 %f114, %f113, %f12; - selp.f32 %f115, %f114, %f12, %p13; - cvt.f64.f32 %fd5, %f115; - div.rn.f64 %fd6, %fd5, 0d400921FB54442D18; - cvt.rn.f32.f64 %f116, %fd6; - sqrt.rn.f32 %f117, %f11; - rcp.rn.f32 %f118, %f117; - mul.f32 %f119, %f8, %f118; - mul.f32 %f120, %f9, %f118; - mul.f32 %f121, %f10, %f119; - mul.f32 %f122, %f10, %f120; - neg.f32 %f123, %f117; - setp.eq.f32 %p14, %f117, 0f00000000; - selp.f32 %f124, 0f00000000, %f123, %p14; - selp.f32 %f125, 0f00000000, %f122, %p14; - selp.f32 %f126, 0f3F800000, %f121, %p14; - ld.v4.f32 {%f127, %f128, %f129, %f130}, [%rd3+32]; + ld.const.u8 %rs1, [params+248]; + setp.eq.s16 %p13, %rs1, 0; + @%p13 bra BB8_11; + neg.f32 %f134, %f9; - mul.f32 %f135, %f127, %f134; - mul.f32 %f136, %f128, %f134; - mul.f32 %f137, %f129, %f134; - ld.v4.f32 {%f138, %f139, %f140, %f141}, [%rd3+48]; - fma.rn.f32 %f145, %f8, %f138, %f135; - fma.rn.f32 %f146, %f8, %f139, %f136; - fma.rn.f32 %f147, %f8, %f140, %f137; - ld.f32 %f148, [%rd3+64]; - mov.f32 %f149, 0f00000000; - fma.rn.f32 %f150, %f149, %f148, %f145; - ld.f32 %f151, [%rd3+68]; - fma.rn.f32 %f152, %f149, %f151, %f146; - ld.f32 %f153, [%rd3+72]; - fma.rn.f32 %f154, %f149, %f153, %f147; - mul.f32 %f155, %f150, 0f40C90FDB; - mul.f32 %f156, %f152, 0f40C90FDB; - mul.f32 %f157, %f154, 0f40C90FDB; - mul.f32 %f158, %f126, %f127; - mul.f32 %f159, %f126, %f128; - mul.f32 %f160, %f126, %f129; - fma.rn.f32 %f161, %f125, %f138, %f158; - fma.rn.f32 %f162, %f125, %f139, %f159; - fma.rn.f32 %f163, %f125, %f140, %f160; - fma.rn.f32 %f164, %f124, %f148, %f161; - fma.rn.f32 %f165, %f124, %f151, %f162; - fma.rn.f32 %f166, %f124, %f153, %f163; - mul.f32 %f167, %f164, 0f40490FDB; - mul.f32 %f168, %f165, 0f40490FDB; - mul.f32 %f169, %f166, 0f40490FDB; - ld.u8 %rs2, [%rd3+308]; - setp.eq.s16 %p15, %rs2, 0; - neg.f32 %f170, %f2; - selp.f32 %f171, %f2, %f170, %p15; - neg.f32 %f172, %f3; - selp.f32 %f173, %f3, %f172, %p15; - neg.f32 %f174, %f4; - selp.f32 %f175, %f4, %f174, %p15; - ld.u64 %rd8, [%rd7]; + sqrt.rn.f32 %f135, %f11; + rcp.rn.f32 %f136, %f135; + mul.f32 %f137, %f8, %f136; + mul.f32 %f138, %f9, %f136; + mul.f32 %f139, %f10, %f137; + mul.f32 %f140, %f10, %f138; + neg.f32 %f141, %f135; + setp.eq.f32 %p14, %f135, 0f00000000; + selp.f32 %f142, 0f3F800000, %f139, %p14; + selp.f32 %f143, 0f00000000, %f140, %p14; + selp.f32 %f144, 0f00000000, %f141, %p14; + ld.v4.f32 {%f145, %f146, %f147, %f148}, [%rd3+32]; + mul.f32 %f152, %f145, %f134; + mul.f32 %f153, %f146, %f134; + mul.f32 %f154, %f147, %f134; + ld.v4.f32 {%f155, %f156, %f157, %f158}, [%rd3+48]; + fma.rn.f32 %f162, %f8, %f155, %f152; + fma.rn.f32 %f163, %f8, %f156, %f153; + fma.rn.f32 %f164, %f8, %f157, %f154; + ld.f32 %f165, [%rd3+64]; + mov.f32 %f166, 0f00000000; + fma.rn.f32 %f167, %f166, %f165, %f162; + ld.f32 %f168, [%rd3+68]; + fma.rn.f32 %f169, %f166, %f168, %f163; + ld.f32 %f170, [%rd3+72]; + fma.rn.f32 %f171, %f166, %f170, %f164; + mul.f32 %f194, %f167, 0f40C90FDB; + mul.f32 %f195, %f169, 0f40C90FDB; + mul.f32 %f196, %f171, 0f40C90FDB; + mul.f32 %f172, %f142, %f145; + mul.f32 %f173, %f142, %f146; + mul.f32 %f174, %f142, %f147; + fma.rn.f32 %f175, %f143, %f155, %f172; + fma.rn.f32 %f176, %f143, %f156, %f173; + fma.rn.f32 %f177, %f143, %f157, %f174; + fma.rn.f32 %f178, %f144, %f165, %f175; + fma.rn.f32 %f179, %f144, %f168, %f176; + fma.rn.f32 %f180, %f144, %f170, %f177; + mul.f32 %f191, %f178, 0f40490FDB; + mul.f32 %f192, %f179, 0f40490FDB; + mul.f32 %f193, %f180, 0f40490FDB; + ld.u8 %rs3, [%rd3+308]; + setp.eq.s16 %p15, %rs3, 0; + neg.f32 %f181, %f202; + selp.f32 %f202, %f202, %f181, %p15; + neg.f32 %f182, %f201; + selp.f32 %f201, %f201, %f182, %p15; + neg.f32 %f183, %f200; + selp.f32 %f200, %f200, %f183, %p15; + +BB8_11: + ld.u64 %rd9, [%rd8]; // inline asm call (%r21), _optix_read_primitive_idx, (); // inline asm - ld.const.u64 %rd9, [params+216]; - cvta.to.global.u64 %rd10, %rd9; - mul.wide.u32 %rd11, %r1, 8; - add.s64 %rd12, %rd10, %rd11; - st.global.u64 [%rd12], %rd8; - ld.const.u64 %rd13, [params+224]; - cvta.to.global.u64 %rd14, %rd13; - mul.wide.u32 %rd15, %r1, 4; - add.s64 %rd16, %rd14, %rd15; - st.global.u32 [%rd16], %r21; - ld.const.u64 %rd17, [params+80]; - cvta.to.global.u64 %rd18, %rd17; - add.s64 %rd19, %rd18, %rd15; - st.global.f32 [%rd19], %f112; - ld.const.u64 %rd20, [params+88]; - cvta.to.global.u64 %rd21, %rd20; - add.s64 %rd22, %rd21, %rd15; - st.global.f32 [%rd22], %f116; - ld.const.u64 %rd23, [params+96]; - cvta.to.global.u64 %rd24, %rd23; - add.s64 %rd25, %rd24, %rd15; - st.global.f32 [%rd25], %f171; - ld.const.u64 %rd26, [params+104]; - cvta.to.global.u64 %rd27, %rd26; - add.s64 %rd28, %rd27, %rd15; - st.global.f32 [%rd28], %f173; - ld.const.u64 %rd29, [params+112]; - cvta.to.global.u64 %rd30, %rd29; - add.s64 %rd31, %rd30, %rd15; - st.global.f32 [%rd31], %f175; - ld.const.u64 %rd32, [params+120]; - cvta.to.global.u64 %rd33, %rd32; - add.s64 %rd34, %rd33, %rd15; - st.global.f32 [%rd34], %f171; - ld.const.u64 %rd35, [params+128]; - cvta.to.global.u64 %rd36, %rd35; - add.s64 %rd37, %rd36, %rd15; - st.global.f32 [%rd37], %f173; - ld.const.u64 %rd38, [params+136]; - cvta.to.global.u64 %rd39, %rd38; - add.s64 %rd40, %rd39, %rd15; - st.global.f32 [%rd40], %f175; - ld.const.u64 %rd41, [params+144]; - cvta.to.global.u64 %rd42, %rd41; - add.s64 %rd43, %rd42, %rd15; - st.global.f32 [%rd43], %f5; - ld.const.u64 %rd44, [params+152]; - cvta.to.global.u64 %rd45, %rd44; - add.s64 %rd46, %rd45, %rd15; - st.global.f32 [%rd46], %f6; - ld.const.u64 %rd47, [params+160]; - cvta.to.global.u64 %rd48, %rd47; - add.s64 %rd49, %rd48, %rd15; - st.global.f32 [%rd49], %f7; - ld.const.u64 %rd50, [params+168]; - cvta.to.global.u64 %rd51, %rd50; - add.s64 %rd52, %rd51, %rd15; - st.global.f32 [%rd52], %f155; - ld.const.u64 %rd53, [params+176]; - cvta.to.global.u64 %rd54, %rd53; - add.s64 %rd55, %rd54, %rd15; - st.global.f32 [%rd55], %f156; - ld.const.u64 %rd56, [params+184]; - cvta.to.global.u64 %rd57, %rd56; - add.s64 %rd58, %rd57, %rd15; - st.global.f32 [%rd58], %f157; - ld.const.u64 %rd59, [params+192]; - cvta.to.global.u64 %rd60, %rd59; - add.s64 %rd61, %rd60, %rd15; - st.global.f32 [%rd61], %f167; - ld.const.u64 %rd62, [params+200]; - cvta.to.global.u64 %rd63, %rd62; - add.s64 %rd64, %rd63, %rd15; - st.global.f32 [%rd64], %f168; - ld.const.u64 %rd65, [params+208]; - cvta.to.global.u64 %rd66, %rd65; - add.s64 %rd67, %rd66, %rd15; - st.global.f32 [%rd67], %f169; - ld.const.u64 %rd68, [params+72]; - cvta.to.global.u64 %rd69, %rd68; - add.s64 %rd70, %rd69, %rd15; - st.global.f32 [%rd70], %f27; + ld.const.u64 %rd10, [params+216]; + cvta.to.global.u64 %rd11, %rd10; + cvt.u64.u32 %rd4, %r1; + mul.wide.u32 %rd12, %r1, 8; + add.s64 %rd13, %rd11, %rd12; + st.global.u64 [%rd13], %rd9; + ld.const.u64 %rd14, [params+224]; + cvta.to.global.u64 %rd15, %rd14; + mul.wide.u32 %rd16, %r1, 4; + add.s64 %rd17, %rd15, %rd16; + st.global.u32 [%rd17], %r21; + ld.const.u64 %rd18, [params+80]; + cvta.to.global.u64 %rd19, %rd18; + add.s64 %rd20, %rd19, %rd16; + cvt.f64.f32 %fd3, %f189; + div.rn.f64 %fd4, %fd3, 0d401921FB54442D18; + cvt.rn.f32.f64 %f184, %fd4; + st.global.f32 [%rd20], %f184; + ld.const.u64 %rd21, [params+88]; + cvta.to.global.u64 %rd22, %rd21; + add.s64 %rd23, %rd22, %rd16; + mov.f32 %f185, 0f40490FDB; + sub.f32 %f186, %f185, %f12; + setp.lt.f32 %p16, %f10, 0f00000000; + selp.f32 %f187, %f186, %f12, %p16; + cvt.f64.f32 %fd5, %f187; + div.rn.f64 %fd6, %fd5, 0d400921FB54442D18; + cvt.rn.f32.f64 %f188, %fd6; + st.global.f32 [%rd23], %f188; + ld.const.u64 %rd24, [params+144]; + cvta.to.global.u64 %rd25, %rd24; + add.s64 %rd26, %rd25, %rd16; + st.global.f32 [%rd26], %f5; + ld.const.u64 %rd27, [params+152]; + cvta.to.global.u64 %rd28, %rd27; + add.s64 %rd29, %rd28, %rd16; + st.global.f32 [%rd29], %f6; + ld.const.u64 %rd30, [params+160]; + cvta.to.global.u64 %rd31, %rd30; + add.s64 %rd32, %rd31, %rd16; + st.global.f32 [%rd32], %f7; + @%p13 bra BB8_13; + + ld.const.u64 %rd33, [params+96]; + cvta.to.global.u64 %rd34, %rd33; + shl.b64 %rd35, %rd4, 2; + add.s64 %rd36, %rd34, %rd35; + st.global.f32 [%rd36], %f202; + ld.const.u64 %rd37, [params+104]; + cvta.to.global.u64 %rd38, %rd37; + add.s64 %rd39, %rd38, %rd35; + st.global.f32 [%rd39], %f201; + ld.const.u64 %rd40, [params+112]; + cvta.to.global.u64 %rd41, %rd40; + add.s64 %rd42, %rd41, %rd35; + st.global.f32 [%rd42], %f200; + ld.const.u64 %rd43, [params+120]; + cvta.to.global.u64 %rd44, %rd43; + add.s64 %rd45, %rd44, %rd35; + st.global.f32 [%rd45], %f202; + ld.const.u64 %rd46, [params+128]; + cvta.to.global.u64 %rd47, %rd46; + add.s64 %rd48, %rd47, %rd35; + st.global.f32 [%rd48], %f201; + ld.const.u64 %rd49, [params+136]; + cvta.to.global.u64 %rd50, %rd49; + add.s64 %rd51, %rd50, %rd35; + st.global.f32 [%rd51], %f200; + ld.const.u64 %rd52, [params+168]; + cvta.to.global.u64 %rd53, %rd52; + add.s64 %rd54, %rd53, %rd35; + st.global.f32 [%rd54], %f194; + ld.const.u64 %rd55, [params+176]; + cvta.to.global.u64 %rd56, %rd55; + add.s64 %rd57, %rd56, %rd35; + st.global.f32 [%rd57], %f195; + ld.const.u64 %rd58, [params+184]; + cvta.to.global.u64 %rd59, %rd58; + add.s64 %rd60, %rd59, %rd35; + st.global.f32 [%rd60], %f196; + ld.const.u64 %rd61, [params+192]; + cvta.to.global.u64 %rd62, %rd61; + add.s64 %rd63, %rd62, %rd35; + st.global.f32 [%rd63], %f191; + ld.const.u64 %rd64, [params+200]; + cvta.to.global.u64 %rd65, %rd64; + add.s64 %rd66, %rd65, %rd35; + st.global.f32 [%rd66], %f192; + ld.const.u64 %rd67, [params+208]; + cvta.to.global.u64 %rd68, %rd67; + add.s64 %rd69, %rd68, %rd35; + st.global.f32 [%rd69], %f193; + +BB8_13: + ld.const.u64 %rd70, [params+72]; + cvta.to.global.u64 %rd71, %rd70; + shl.b64 %rd72, %rd4, 2; + add.s64 %rd73, %rd71, %rd72; + st.global.f32 [%rd73], %f48; -BB8_10: +BB8_14: ret; } diff --git a/src/bsdfs/diffuse.cpp b/src/bsdfs/diffuse.cpp index 90b61e57e..cc9138e3d 100644 --- a/src/bsdfs/diffuse.cpp +++ b/src/bsdfs/diffuse.cpp @@ -95,6 +95,7 @@ class SmoothDiffuse final : public BSDF { bs.eta = 1.f; bs.sampled_type = +BSDFFlags::DiffuseReflection; bs.sampled_component = 0; + bs.sampled_roughness = math::Infinity; UnpolarizedSpectrum value = m_reflectance->eval(si, active); diff --git a/src/bsdfs/roughconductor.cpp b/src/bsdfs/roughconductor.cpp index d0c2e4fad..42a60adf0 100755 --- a/src/bsdfs/roughconductor.cpp +++ b/src/bsdfs/roughconductor.cpp @@ -223,6 +223,7 @@ class RoughConductor final : public BSDF { bs.eta = 1.f; bs.sampled_component = 0; bs.sampled_type = +BSDFFlags::GlossyReflection; + bs.sampled_roughness = (distr.alpha_u() + distr.alpha_v()) / 2.f; // Ensure that this is a valid sample active &= neq(bs.pdf, 0.f) && Frame3f::cos_theta(bs.wo) > 0.f; diff --git a/src/bsdfs/roughdielectric.cpp b/src/bsdfs/roughdielectric.cpp index 344304f35..ca0f846af 100644 --- a/src/bsdfs/roughdielectric.cpp +++ b/src/bsdfs/roughdielectric.cpp @@ -263,6 +263,7 @@ class RoughDielectric final : public BSDF { bs.sampled_type = select(selected_r, UInt32(+BSDFFlags::GlossyReflection), UInt32(+BSDFFlags::GlossyTransmission)); + bs.sampled_roughness = (sample_distr.alpha_u() + sample_distr.alpha_v())/2; Float dwh_dwo = 0.f; diff --git a/src/bsdfs/roughplastic.cpp b/src/bsdfs/roughplastic.cpp index b8d5e9411..d1884da4e 100644 --- a/src/bsdfs/roughplastic.cpp +++ b/src/bsdfs/roughplastic.cpp @@ -225,12 +225,14 @@ class RoughPlastic final : public BSDF { masked(bs.wo, sample_specular) = reflect(si.wi, m); masked(bs.sampled_component, sample_specular) = 0; masked(bs.sampled_type, sample_specular) = +BSDFFlags::GlossyReflection; + masked(bs.sampled_roughness, sample_specular) = (distr.alpha_u() + distr.alpha_v())/2; } if (any_or(sample_diffuse)) { masked(bs.wo, sample_diffuse) = warp::square_to_cosine_hemisphere(sample2); masked(bs.sampled_component, sample_diffuse) = 1; masked(bs.sampled_type, sample_diffuse) = +BSDFFlags::DiffuseReflection; + masked(bs.sampled_roughness, sample_diffuse) = math::Infinity; } bs.pdf = pdf(ctx, si, bs.wo, active); diff --git a/src/conftest.py b/src/conftest.py index fe0e3f0f4..8bfd9bf1c 100644 --- a/src/conftest.py +++ b/src/conftest.py @@ -45,7 +45,7 @@ def fixture(): for variant in ['scalar_rgb', 'scalar_spectral', 'scalar_mono_polarized', 'packet_rgb', - 'packet_spectral']: + 'packet_spectral', 'gpu_autodiff_rgb']: generate_fixture(variant) del generate_fixture diff --git a/src/emitters/CMakeLists.txt b/src/emitters/CMakeLists.txt index 04c94441c..9cec633a9 100644 --- a/src/emitters/CMakeLists.txt +++ b/src/emitters/CMakeLists.txt @@ -6,6 +6,7 @@ add_plugin(constant constant.cpp) add_plugin(envmap envmap.cpp) add_plugin(directional directional.cpp) add_plugin(spot spot.cpp) +add_plugin(smootharea smootharea.cpp) # Register the test directory add_tests(${CMAKE_CURRENT_SOURCE_DIR}/tests) diff --git a/src/emitters/smootharea.cpp b/src/emitters/smootharea.cpp new file mode 100644 index 000000000..273a31b80 --- /dev/null +++ b/src/emitters/smootharea.cpp @@ -0,0 +1,167 @@ +#include +#include +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(mitsuba) +/**! + +.. _emitter-smootharea: + +Smooth area light (:monosp:`smootharea`) +---------------------------------------- + +.. pluginparameters:: + + * - radiance + - |spectrum| + - Specifies the emitted radiance in units of power per unit area per unit steradian. + (Default: :ref:`d65 `) + * - blur_size + - |float| + - Specifies the width of the smooth transition region from full emission to zero + at the borders of the area light, in uv space. (Default: 0.1) + +This plugin implements an area light with a smooth transition from full emission +to zero (black) at its borders. This type of light is usefull for differentiable +rendering since it typically avoids discontinuities around area lights. The transition +region is defined in uv space. This plugin should be used with a flat quadrilateral mesh +with texture coordinates that map to the unit square. + + */ + +template +class SmoothAreaLight final : public Emitter { +public: + MTS_IMPORT_BASE(Emitter, m_flags, m_shape, m_medium) + MTS_IMPORT_TYPES(Scene, Shape, Texture) + + SmoothAreaLight(const Properties &props) : Base(props) { + if (props.has_property("to_world")) + Throw("Found a 'to_world' transformation -- this is not allowed. " + "The area light inherits this transformation from its parent " + "shape."); + + m_radiance = props.texture("radiance", Texture::D65(1.f)); + m_blur_size = props.float_("blur_size", 0.1f); + + // TODO: detect if underlying spectrum really is spatially varying + m_flags = EmitterFlags::Surface | EmitterFlags::SpatiallyVarying; + } + + void set_shape(Shape *shape) override { + if (m_shape) + Throw("An area emitter can be only be attached to a single shape."); + + Base::set_shape(shape); + m_area_times_pi = m_shape->surface_area() * math::Pi; + } + + Float smooth_profile(Float x) const { + Float res(0); + res = select(x >= m_blur_size && x <= Float(1) - m_blur_size, Float(1), res); + res = select(x < m_blur_size && x > Float(0), x / m_blur_size, res); + res = select(x > Float(1) - m_blur_size && x < Float(1), + (1 - x) / m_blur_size, res); + return res; + } + + Spectrum eval(const SurfaceInteraction3f &si, Mask active) const override { + MTS_MASKED_FUNCTION(ProfilerPhase::EndpointEvaluate, active); + + return select( + Frame3f::cos_theta(si.wi) > 0.f, + unpolarized(m_radiance->eval(si, active)) + * smooth_profile(si.uv.x()) * smooth_profile(si.uv.y()), + 0.f + ); + } + + std::pair sample_ray(Float time, Float wavelength_sample, + const Point2f &sample2, const Point2f &sample3, + Mask active) const override { + MTS_MASKED_FUNCTION(ProfilerPhase::EndpointSampleRay, active); + + // 1. Sample spatial component + PositionSample3f ps = m_shape->sample_position(time, sample2, active); + + // 2. Sample directional component + Vector3f local = warp::square_to_cosine_hemisphere(sample3); + + // 3. Sample spectrum + SurfaceInteraction3f si(ps, zero(0.f)); + auto [wavelengths, spec_weight] = m_radiance->sample( + si, math::sample_shifted(wavelength_sample), active); + + spec_weight *= smooth_profile(ps.uv.x()) * smooth_profile(ps.uv.y()); + + return std::make_pair( + Ray3f(ps.p, Frame3f(ps.n).to_world(local), time, wavelengths), + unpolarized(spec_weight) * m_area_times_pi + ); + } + + std::pair + sample_direction(const Interaction3f &it, const Point2f &sample, Mask active) const override { + MTS_MASKED_FUNCTION(ProfilerPhase::EndpointSampleDirection, active); + + Assert(m_shape, "Can't sample from an area emitter without an associated Shape."); + + DirectionSample3f ds = m_shape->sample_direction(it, sample, active); + active &= dot(ds.d, ds.n) < 0.f && neq(ds.pdf, 0.f); + + SurfaceInteraction3f si(ds, it.wavelengths); + Spectrum spec = m_radiance->eval(si, active) / ds.pdf; + spec *= smooth_profile(ds.uv.x()) * smooth_profile(ds.uv.y()); + + ds.object = this; + return { ds, unpolarized(spec) & active }; + } + + Float pdf_direction(const Interaction3f &it, const DirectionSample3f &ds, + Mask active) const override { + MTS_MASKED_FUNCTION(ProfilerPhase::EndpointEvaluate, active); + + return select(dot(ds.d, ds.n) < 0.f, + m_shape->pdf_direction(it, ds, active), 0.f); + } + + ScalarBoundingBox3f bbox() const override { return m_shape->bbox(); } + + void traverse(TraversalCallback *callback) override { + callback->put_object("radiance", m_radiance.get()); + } + + void parameters_changed(const std::vector &keys) override { + if (string::contains(keys, "parent")) + m_area_times_pi = m_shape->surface_area() * math::Pi; + } + + std::string to_string() const override { + std::ostringstream oss; + oss << "SmoothAreaLight[" << std::endl + << " radiance = " << string::indent(m_radiance) << "," << std::endl + << " surface_area = "; + if (m_shape) oss << m_shape->surface_area(); + else oss << " "; + oss << "," << std::endl; + if (m_medium) oss << string::indent(m_medium->to_string()); + else oss << " "; + oss << std::endl << "]"; + return oss.str(); + } + + MTS_DECLARE_CLASS() +private: + ref m_radiance; + ScalarFloat m_area_times_pi = 0.f; + ScalarFloat m_blur_size; +}; + +MTS_IMPLEMENT_CLASS_VARIANT(SmoothAreaLight, Emitter) +MTS_EXPORT_PLUGIN(SmoothAreaLight, "Smooth Area emitter") +NAMESPACE_END(mitsuba) + diff --git a/src/integrators/CMakeLists.txt b/src/integrators/CMakeLists.txt index d3660f3b9..b9adf8c21 100644 --- a/src/integrators/CMakeLists.txt +++ b/src/integrators/CMakeLists.txt @@ -8,6 +8,7 @@ add_plugin(stokes stokes.cpp) add_plugin(moment moment.cpp) add_plugin(volpath volpath.cpp) add_plugin(volpathmis volpathmis.cpp) +add_plugin(pathreparam path-reparam.cpp) # Register the test directory add_tests(${CMAKE_CURRENT_SOURCE_DIR}/tests) diff --git a/src/integrators/path-reparam-utils.h b/src/integrators/path-reparam-utils.h new file mode 100644 index 000000000..d98baa559 --- /dev/null +++ b/src/integrators/path-reparam-utils.h @@ -0,0 +1,263 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(mitsuba) + +/* Helper for the correction factor in convolution sampling */ + +template +class VMFHemisphereIntegral { + +public: + + VMFHemisphereIntegral() { + m_k_res = 0; + m_t_res = 0; + + // TODO clean this + auto fs = Thread::thread()->file_resolver(); + fs::path file_path = fs->resolve("data/vmf-hemisphere.data"); + auto name = file_path.filename().string(); + + auto fail = [&](const char *descr, auto... args) { + Throw(("Error while loading file \"%s\": " + std::string(descr)) + .c_str(), name, args...); + }; + + Log(Debug, "Loading data from \"%s\" ..", name); + if (!fs::exists(file_path)) + fail("file not found"); + + ref mmap = new MemoryMappedFile(file_path); + + const char *ptr = (const char *) mmap->data(); + const char *eof = ptr + mmap->size(); + char buf[1025]; + + size_t nb_data_line = 0; + + std::unique_ptr tmp_data; + + while (ptr < eof) { + // Determine the offset of the next newline + const char *next = ptr; + advance(&next, eof, "\n"); + + // Copy buf into a 0-terminated buffer + size_t size = next - ptr; + if (size >= sizeof(buf) - 1) + fail("file contains an excessively long line! (%i characters)", size); + memcpy(buf, ptr, size); + buf[size] = '\0'; + + // Skip whitespace + const char *cur = buf, *eol = buf + size; + advance(&cur, eol, " \t\r"); + + bool parse_error = false; + + if (cur[0] == 'R') { + cur += 2; + const char *next2; + m_k_res = strtoul(cur, (char **) &next2, 10); + cur = next2; + m_t_res = strtoul(cur, (char **) &next2, 10); + tmp_data = std::unique_ptr(new float[m_k_res*m_t_res]); + } else if (cur[0] != '#' && cur[0] != '\0') { + if (m_t_res == 0) + fail("the resolution of the data must be specified before"); + + if (nb_data_line >= m_k_res) + fail("too much data, found more than %i lines", m_k_res); + + for (size_t i = 0; i < m_t_res; ++i) { + const char *orig = cur; + float flt = strtof(cur, (char **) &cur); + parse_error |= cur == orig; + tmp_data[i + (nb_data_line) * m_k_res] = flt; + } + + nb_data_line += 1; + } + + if (unlikely(parse_error)) + fail("could not parse line \"%s\"", buf); + + ptr = next + 1; + } + + m_data = DynamicBuffer::copy(&tmp_data[0], m_k_res * m_t_res); + + Log(Debug, "Loaded VMFHemisphereIntegral data, %ix%i.", m_k_res, m_t_res); + + } + + Float eval(float k, Float costheta, mask_t active) const { + using UInt32 = uint32_array_t; + using Point2u = Point; + using Point2f = Point; + using Vector2f = Vector; + using Vector2u = Vector; + + Vector2u size(m_t_res, m_k_res); + + Vector2f uv(costheta, mapping_K_U(Float(k))); + + uv = min(uv, 1.f); + uv = max(uv, 0.f); + uv *= Vector2f(size - 1u); + + Point2u pos = min(Point2u(uv), size - 2u); + + Point2f w1 = uv - Point2f(pos), + w0 = 1.f - w1; + + UInt32 index = pos.x() + pos.y() * (uint32_t) size.x(); + + uint32_t width = (uint32_t) size.x(); + Float v00 = gather(m_data, index, active); + Float v10 = gather(m_data, index + 1u, active); + Float v01 = gather(m_data, index + width, active); + Float v11 = gather(m_data, index + width + 1u, active); + + Float s0 = fmadd(w0.x(), v00, w1.x() * v10), + s1 = fmadd(w0.x(), v01, w1.x() * v11); + + return fmadd(w0.y(), s0, w1.y() * s1); + } + + Float mapping_U_K(Float u) const { + Float u_max = 6.f; + return 0.1f * pow(10.f, u * u_max) - 0.1f; + } + + Float mapping_K_U(Float k) const { + Float u_max = 6.f; + return log(10.f * k + 1.f) / (log(10.f) * u_max); + } + +private: + + template + void advance(const char **start_, const char *end, const char (&delim)[N]) { + const char *start = *start_; + + while (true) { + bool is_delim = false; + for (size_t i = 0; i < N; ++i) + if (*start == delim[i]) + is_delim = true; + if ((is_delim ^ Negate) || start == end) + break; + ++start; + } + + *start_ = start; + } + + size_t m_k_res; + size_t m_t_res; + + DynamicBuffer m_data; +}; + +// Helpers for duplicating data in large CUDA arrays + +template Value concatD(const Value &a, const Value &b) { + using T = + std::conditional_t, value_t, Value>; + using UInt = uint_array_t; + using Mask = mask_t; + if constexpr (is_cuda_array_v) { + size_t N = slices(a); + if (slices(a) != slices(b)) { + Throw("DiffPathIntegrator::concatD: cannot concat arrays with " + "different sizes (not implemented)."); + } + UInt index = arange(N * 2); + Mask m = index < N; + index = select(m, index, index - N); + return select(m, gather(a, index, m), + gather(b, index, !m)); + } else { + Throw("DiffPathIntegrator::concatD: can only concat cuda arrays."); + } +} + +template Value makePairD(const Value &a) { + using T = + std::conditional_t, value_t, Value>; + using UInt = uint_array_t; + using Mask = mask_t; + if constexpr (is_cuda_array_v) { + size_t N = slices(a); + if (N > 0) { + UInt index = arange(N * 2); + Mask m = index < N; + index = select(m, index, index - N); + return gather(a, index); + } else { + return Value(); + } + } else { + Throw("DiffPathIntegrator::makePairD: can only makePairD cuda arrays."); + } +} + +// Helpers for sampling large CUDA Arrays + +template > +Float samplePair1D(const Mask &m, Sampler *sampler) { + size_t N = slices(m) / 2; + using UInt = uint_array_t; + UInt indices = arange(N); + Mask m0 = gather(m, indices); + Mask m1 = gather(m, indices + N); + Float sample = sampler->next_1d(m0 || m1); + return makePairD(sample); +} + +template , + typename Point2 = Point> +Point2 samplePair2D(const Mask &m, Sampler *sampler) { + using UInt = uint_array_t; + size_t N = slices(m) / 2; + UInt indices = arange(N); + Mask m0 = gather(m, indices); + Mask m1 = gather(m, indices + N); + Point2 sample = sampler->next_2d(m0 || m1); + return makePairD(sample); +} + +template > +Float sample1D(const Mask &m, Sampler *sampler) { + using UInt = uint_array_t; + size_t N = slices(m) / 2; + UInt indices = arange(N); + Mask m0 = gather(m, indices); + Mask m1 = gather(m, indices + N); + return concatD(sampler->next_1d(m0), sampler->next_1d(m1)); +} + +template , + typename Point2 = Point> +Point2 sample2D(const Mask &m, Sampler *sampler) { + using UInt = uint_array_t; + size_t N = slices(m) / 2; + UInt indices = arange(N); + Mask m0 = gather(m, indices); + Mask m1 = gather(m, indices + N); + return concatD(sampler->next_2d(m0), sampler->next_2d(m1)); +} + +NAMESPACE_END(mitsuba) diff --git a/src/integrators/path-reparam.cpp b/src/integrators/path-reparam.cpp new file mode 100644 index 000000000..a8e904acc --- /dev/null +++ b/src/integrators/path-reparam.cpp @@ -0,0 +1,841 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include "path-reparam-utils.h" + +#define REUSE_CAMERA_RAYS 1 + +NAMESPACE_BEGIN(mitsuba) + +/**! + +.. _integrator-pathreparam: + +Differentiable path tracer (:monosp:`pathreparam`) +-------------------------------------------------- + +.. pluginparameters:: + + * - max_depth + - |int| + - Specifies the longest path depth in the generated output image (where -1 + corresponds to :math:`\infty`). A value of 1 will only render directly + visible light sources. 2 will lead to single-bounce (direct-only) + illumination, and so on. (Default: -1) + * - rr_depth + - |int| + - Specifies the minimum path depth, after which the implementation will + start to use the *russian roulette* path termination criterion. (Default: 5) + * - dc_light_samples + - |int| + - Specifies the number of samples for reparameterizing direct lighting + integrals. (Default: 4) + * - dc_bsdf_samples + - |int| + - Specifies the number of samples for reparameterizing BSDFs integrals. + (Default: 4) + * - dc_cam_samples + - |int| + - Specifies the number of samples for reparameterizing pixel integrals. + (Default: 4) + * - conv_threshold + - |float| + - Specifies the BSDFs roughness threshold that activates convolutions. + (Default: 0.15f) + * - use_convolution + - |bool| + - Enable convolution for rough BSDFs. (Default: yes, i.e. |true|) + * - kappa_conv + - |float| + - Specifies the kappa parameter of von Mises-Fisher distributions for BSDFs + convolutions. (Default: 1000.f) + * - use_convolution_envmap + - |bool| + - Enable convolution for environment maps. (Default: yes, i.e. |true|) + * - kappa_conv_envmap + - |float| + - Specifies the kappa parameter of von Mises-Fisher distributions for + environment map convolutions. (Default: 1000.f) + * - use_variance_reduction + - |bool| + - Enable variation reduction. (Default: yes, i.e. |true|) + * - disable_gradient_diffuse + - |bool| + - Disable reparameterization for diffuse scattering. (Default: no, i.e. |false|) + * - disable_gradient_bounce + - |int| + - Disable reparameterization after several scattering events. (Default: 10) + +This integrator implements the reparameterization technique described in the +`article `_ +"Reparameterizing discontinuous integrands for differentiable rendering". +It is based on the integrator :ref:`path ` and it applies +reparameterizations to rendering integrals in order to account for discontinuities +when pixel values are differentiated using GPU modes and the Python API. + +This plugin supports environment maps and area lights with the plugin +:ref:`smootharea `, which is similar to the plugin +:ref:`area ` with smoothly decreasing radiant exitance at the +borders of the area light geometry to avoid discontinuities. The area light +geometry should be flat and it should have valid uv coordinates (see +:ref:`smootharea ` for details). Other light +sources will lead to incorrect partial derivatives. Large area lights also +result in significant bias since the convolution technique described in the +paper is only applied to environment maps and rough/diffuse BSDF integrals. + +Another limitation of this implementation is memory usage on the GPU: automatic +differentiation for an entire path tracer typically requires several GB of GPU +memory. The rendering must sometimes be split into various rendering passes with +small sample counts in order to fit into GPU memory. + +.. note:: This integrator does not handle participating media + + */ + +template +class PathReparamIntegrator : public MonteCarloIntegrator { +public: + MTS_IMPORT_BASE(MonteCarloIntegrator, m_max_depth, m_rr_depth) + MTS_IMPORT_TYPES(Scene, Sampler, Medium, Emitter, EmitterPtr, BSDF, BSDFPtr) + + PathReparamIntegrator(const Properties &props) : Base(props) { + m_dc_light_samples = props.size_("dc_light_samples", 4); + m_dc_bsdf_samples = props.size_("dc_bsdf_samples", 4); + m_dc_cam_samples = props.size_("dc_cam_samples", 4); + m_conv_threshold = props.float_("conv_threshold", 0.15f); + m_kappa_conv = props.float_("kappa_conv", 1000.f); + m_kappa_conv_envmap = props.float_("kappa_conv_envmap", 100000.f); + m_use_convolution = props.bool_("use_convolution", true); + m_use_convolution_envmap = props.bool_("use_convolution_envmap", true); + m_use_variance_reduction = props.bool_("use_variance_reduction", true); + m_disable_gradient_diffuse = props.bool_("disable_gradient_diffuse", false); + m_disable_gradient_bounce = props.size_("disable_gradient_bounce", 1000); + + Log(Debug, "Changes of variables in light integrals using %i samples", + m_dc_light_samples); + Log(Debug, "Changes of variables in BSDFs integrals using %i samples", + m_dc_bsdf_samples); + Log(Debug, "Changes of variables in pixel integrals using %i samples", + m_dc_cam_samples); + Log(Debug, "Changes of variables using convolution if roughness > %f", + m_conv_threshold); + Log(Debug, "Convolutions using kernel with kappa = %f", + m_kappa_conv); + Log(Debug, "Variance reduction %s", + m_use_variance_reduction ? "enabled" : "disabled"); + Log(Debug, "Convolutions %s", + m_use_convolution ? "enabled" : "disabled"); + Log(Debug, "Convolutions for envmap %s", + m_use_convolution_envmap ? "enabled" : "disabled"); + Log(Debug, "Gradient of diffuse reflections %s", + m_disable_gradient_diffuse ? "disabled" : "enabled"); + Log(Debug, "Disable gradients after bounce %i", m_disable_gradient_bounce); + Log(Debug, "Reusing camera samples is %s", + REUSE_CAMERA_RAYS ? "enabled" : "disabled"); + } + + std::pair sample(const Scene *scene, + Sampler *sampler, + const RayDifferential3f &primary_ray_, + const Medium * /* medium */, + Float * /* aovs */, + Mask active_primary) const override { + if constexpr (is_cuda_array_v) { + RayDifferential3f primary_ray = primary_ray_; + + // Estimate kappa for the convolution of pixel integrals, based on ray + // differentials. + Float angle = acos(min(dot(primary_ray.d_x, primary_ray.d), + dot(primary_ray.d_y, primary_ray.d))); + Float target_mean_cos = + min(cos(angle * 0.4f /*arbitrary*/), Float(1.f - 1e-7f)); + + // The vMF distribution has an analytic expression for the mean cosine: + // mean = 1 + 2/(exp(2*k)-1) - 1/k. + // For large values of kappa, 1-1/k is a precise approximation of this + // function. It can be inverted to find k from the mean cosine. + Float kappa_camera = Float(1.f) / (Float(1.f) - target_mean_cos); + + const size_t nb_pimary_rays = slices(primary_ray.d); + const UInt32 arange_indices = arange(nb_pimary_rays); + + Spectrum result(0.f); + + // ---------------- Convolution of pixel integrals ------------- + + // Detect discontinuities in a small vMF kernel around each ray. + + std::vector rays(m_dc_cam_samples); + std::vector sis(m_dc_cam_samples); + + Frame frame_input = Frame(primary_ray.d); + + Vector3f dir_conv_0, dir_conv_1; + + // Sample the integrals and gather intersections + for (size_t cs = 0; cs < m_dc_cam_samples; cs++) { + Vector3f vMF_sample_cs = warp::square_to_von_mises_fisher( + sampler->next_2d(active_primary), kappa_camera); + Vector3f dir_conv_cs = frame_input.to_world(vMF_sample_cs); + + primary_ray.d = dir_conv_cs; + sis[cs] = scene->ray_intersect(primary_ray, HitComputeMode::Least, active_primary); + sis[cs].compute_differentiable_shape_position(active_primary); + + rays[cs] = RayDifferential(primary_ray); + + // Keep two directions for creating pairs of paths. + // We choose the last samples since they have less + // chances of being used in the estimation of the + // discontinuity. + if (cs == m_dc_cam_samples - 2) + dir_conv_0 = dir_conv_cs; + if (cs == m_dc_cam_samples - 1) + dir_conv_1 = dir_conv_cs; + } + + Point3f discontinuity = estimate_discontinuity(rays, sis, active_primary); + Vector3f discontinuity_dir = normalize(discontinuity - primary_ray.o); + + // The following rotation seems to be the identity transformation, but it actually + // changes the partial derivatives. + + // Create the differentiable rotation + Vector3f axis = cross(detach(discontinuity_dir), discontinuity_dir); + Float cosangle = dot(discontinuity_dir, detach(discontinuity_dir)); + Transform4f rotation = rotation_from_axis_cosangle(axis, cosangle); + + // Tracks radiance scaling due to index of refraction changes + Float eta(1.f); + + // MIS weight for intersected emitters (set by prev. iteration) + Float emission_weight(1.f); + + // Make pairs of rays (reuse 2 samples) and apply rotation + Spectrum throughput(1.f); + +#if !REUSE_CAMERA_RAYS + // Resample two rays. This tends to add bias on silhouettes. + dir_conv_0 = frame_input.to_world( + warp::square_to_von_mises_fisher( + sampler->next_2d(active_primary), kappa_camera)); + dir_conv_1 = frame_input.to_world( + warp::square_to_von_mises_fisher( + sampler->next_2d(active_primary), kappa_camera)); +#endif + + // NOTE: here we detach because the rays will be passed to Optix, no need for autodiff + Vector ray_d_0 = rotation.transform_affine(detach(dir_conv_0)); + Vector ray_d_1 = rotation.transform_affine(detach(dir_conv_1)); + + Vector3f ray_d = concatD(ray_d_0, ray_d_1); + Point3f ray_o = makePairD(primary_ray.o); + Wavelength ray_w = makePairD(primary_ray.wavelengths); + + Ray3f ray = Ray3f(ray_o, ray_d, 0.0, ray_w); + + Mask active(true); + set_slices(active, nb_pimary_rays * 2); + + // Recompute differentiable pdf + Float vMF_pdf_diff_0 = warp::square_to_von_mises_fisher_pdf( + frame_input.to_local(ray_d_0), kappa_camera); + Float vMF_pdf_diff_1 = warp::square_to_von_mises_fisher_pdf( + frame_input.to_local(ray_d_1), kappa_camera); + Float vMF_pdf_diff = concatD(vMF_pdf_diff_0, vMF_pdf_diff_1); + + // Apply differentiable weight and keep for variance reduction + throughput *= vMF_pdf_diff / detach(vMF_pdf_diff); // NOTE: detach here so we only divide the gradient by the pdf + Float current_weight = vMF_pdf_diff / detach(vMF_pdf_diff); + + // ---------------------- First intersection ---------------------- + + auto si = scene->ray_intersect(ray, HitComputeMode::Differentiable, active); + + Mask valid_ray_pair = si.is_valid(); + + Mask valid_ray = + gather(valid_ray_pair, arange_indices) || + gather(valid_ray_pair, arange_indices + nb_pimary_rays); + + EmitterPtr emitter = si.emitter(scene); + + for (size_t depth = 1;; ++depth) { + + // ---------------- Intersection with emitters ---------------- + { + Spectrum emission(0.f); + emission[active] = emission_weight * throughput * emitter->eval(si, active); + + Spectrum emission_0 = gather(emission, arange_indices); + Spectrum emission_1 = gather(emission, arange_indices + nb_pimary_rays); + + Float weights_0 = gather(current_weight, arange_indices); + Float weights_1 = gather(current_weight, arange_indices + nb_pimary_rays); + + if (depth >= m_disable_gradient_bounce) { + result += detach(emission_0) * 0.5f; // NOTE: detach so nothing is added to the gradient + result += detach(emission_1) * 0.5f; + } else if (m_use_variance_reduction) { + // Avoid numerical errors due to tiny weights + weights_0 = select(abs(weights_0) < 0.00001f, Float(1.f), weights_0); + weights_1 = select(abs(weights_1) < 0.00001f, Float(1.f), weights_1); + + // Variance reduction, assumption that contribution = weight * constant + result += (emission_0 - emission_1 / weights_1 * (weights_0 - detach(weights_0))) * 0.5f; // NOTE: detach here so to only add `e_1/w_1*w_0` to the gradient (only try to reduce the variance of the gradient) + result += (emission_1 - emission_0 / weights_0 * (weights_1 - detach(weights_1))) * 0.5f; + } else { + result += emission_0 * 0.5f; + result += emission_1 * 0.5f; + } + } + + active &= si.is_valid(); + + // Russian roulette: try to keep path weights equal to one, + // while accounting for the solid angle compression at refractive + // index boundaries. Stop with at least some probability to avoid + // getting stuck (e.g. due to total internal reflection) + if (int(depth) > m_rr_depth) { + Float q = min(hmax(throughput) * sqr(eta), .95f); + active &= sample1D(active, sampler) < q; + throughput *= rcp(q); + } + + if (none(active) || (uint32_t) depth >= (uint32_t) m_max_depth) + break; + + // --------------------- Emitter sampling --------------------- + + BSDFContext ctx; + BSDFPtr bsdf = si.bsdf(ray); + Mask active_e = active && has_flag(bsdf->flags(), BSDFFlags::Smooth); + + // Sample the light integral at each active shading point. + // Several samples are used for estimating discontinuities + // in light visibility. + auto [emitter_ls, emitter_pdf] = scene->sample_emitter( + si, samplePair1D(active_e, sampler), active_e); + + Mask is_envmap = emitter_ls->is_environment() && active_e; + + Point3f position_discontinuity(0.f); + UInt32 hits(0); + + std::vector ds_ls(m_dc_light_samples); + std::vector emitter_val_ls(m_dc_light_samples); + std::vector is_occluded_ls(m_dc_light_samples); + + auto ds_ls_main = emitter_ls->sample_direction(si, samplePair2D(active_e, sampler), active_e).first; + Frame frame_main_ls(ds_ls_main.d); + + for (size_t ls = 0; ls < m_dc_light_samples; ls++) { + std::tie(ds_ls[ls], emitter_val_ls[ls]) = + emitter_ls->sample_direction( + si, samplePair2D(active_e, sampler), active_e); + + if (m_use_convolution_envmap) { + Vector3f sample_ls = + warp::square_to_von_mises_fisher( + sample2D(active_e, sampler), + m_kappa_conv_envmap); + + // Update with the pdf of the convolution kernel + ds_ls[ls].pdf[is_envmap] = warp::square_to_von_mises_fisher_pdf( + sample_ls, m_kappa_conv_envmap); + sample_ls = frame_main_ls.to_world(sample_ls); + ds_ls[ls].d[is_envmap] = sample_ls; + } + + Mask active_ls = active_e && neq(ds_ls[ls].pdf, 0.f); + + // Check masking for active rays + Ray3f ray_ls(si.p, ds_ls[ls].d, math::RayEpsilon * (1.f + hmax(abs(si.p))), + ds_ls[ls].dist * (1.f - math::ShadowEpsilon), + si.time, si.wavelengths); + ray_ls.maxt[is_envmap] = math::Infinity; + + auto si_ls = scene->ray_intersect(ray_ls, HitComputeMode::Least, active_ls); + si_ls.compute_differentiable_shape_position(active_ls); + + is_occluded_ls[ls] = neq(si_ls.shape, nullptr); + position_discontinuity[is_occluded_ls[ls]] += si_ls.p; + hits = select(is_occluded_ls[ls], hits + 1, hits); + + if (m_use_convolution_envmap) { + // The contribution is radiance * kernel / ds_ls_main.pdf / kernel (pdf) + emitter_val_ls[ls][is_envmap] = emitter_ls->eval(si_ls, is_envmap) / ds_ls_main.pdf; + } + + // The contribution is 0 when the light is not visible + emitter_val_ls[ls][is_occluded_ls[ls]] = Spectrum(0.f); + } + + // Compute differentiable rotations from emitter samples + + Mask use_reparam = hits > 0.f; + position_discontinuity[use_reparam] = position_discontinuity / hits; + + Vector3f direction_discontinuity(0.f); + direction_discontinuity[use_reparam] = normalize(position_discontinuity - si.p); + Vector3f direction_discontinuity_detach = detach(direction_discontinuity); + + // TODO: maybe should use same logic as in BSDF sampling (detach in normalize()) + // Vector3f direction_discontinuity_detach = normalize(detach(position_discontinuity) - si.p); + + Vector3f axis_ls = cross(direction_discontinuity_detach, direction_discontinuity); + Float cosangle_ls = dot(direction_discontinuity, direction_discontinuity_detach); + Transform4f rotation_ls = rotation_from_axis_cosangle(axis_ls, cosangle_ls); + + std::vector contribs_ls(m_dc_light_samples); + + // Reuse all the emitter samples and compute differentiable contributions + + for (size_t ls = 0; ls < m_dc_light_samples; ls++) { + + // Recompute direction + ds_ls[ls].d[use_reparam] = rotation_ls.transform_affine(detach(ds_ls[ls].d)); + + if (m_use_convolution_envmap) { + // Recompute the value of convolution kernel + ds_ls[ls].pdf[use_reparam && is_envmap] = warp::square_to_von_mises_fisher_pdf( + frame_main_ls.to_local(ds_ls[ls].d), m_kappa_conv_envmap); + } + + // Recompute the contribution when a reparameterization is used + Mask visible_and_hit = use_reparam && (!is_occluded_ls[ls]); + + Ray3f ray_ls(si.p, ds_ls[ls].d, + math::RayEpsilon * (1.f + hmax(abs(si.p))), + ds_ls[ls].dist + 1.f, + si.time, si.wavelengths); + + auto si_ls = scene->ray_intersect( + ray_ls, HitComputeMode::Differentiable, + visible_and_hit); + + Spectrum e_val_reparam = emitter_ls->eval(si_ls, visible_and_hit) / detach(ds_ls[ls].pdf); + + if (m_use_convolution_envmap) { + e_val_reparam[visible_and_hit && is_envmap] *= ds_ls[ls].pdf / ds_ls_main.pdf; + } + + emitter_val_ls[ls][visible_and_hit] = e_val_reparam; + + if (m_use_convolution_envmap) { + // Update emitter pdf for MIS + Float pdf_emitter = emitter_ls->pdf_direction(si, ds_ls[ls], is_envmap); + ds_ls[ls].pdf[is_envmap] = detach(pdf_emitter); + } + + // Compute contribution + + Mask active_c = active_e && neq(ds_ls[ls].pdf, 0.f); + + // Query the BSDF for that emitter-sampled direction + Vector3f wo = si.to_local(ds_ls[ls].d); + Spectrum bsdf_val = bsdf->eval(ctx, si, wo, active_c); + + // Determine probability of having sampled that same + // direction using BSDF sampling. + Float bsdf_pdf = bsdf->pdf(ctx, si, wo, active_c); + + Float mis = select(ds_ls[ls].delta, 1.f, mis_weight(ds_ls[ls].pdf * emitter_pdf, bsdf_pdf)); + + contribs_ls[ls] = throughput * emitter_val_ls[ls] / emitter_pdf * bsdf_val * mis; + } + + // Accumulate contributions and variance reduction (in pairs of paths) + if (m_dc_light_samples > 1) { + Spectrum contrib(0.f); + for (size_t ls = 0; ls < m_dc_light_samples; ls++) { + contrib += contribs_ls[ls]; + } + + contrib /= m_dc_light_samples; + + // Add the contribution of this light sample. + // The weight is the current weight of the throughput. + Spectrum emitter_sampling(0.f); + emitter_sampling[active_e] += contrib; + + Spectrum emitter_sampling_0 = gather(emitter_sampling, arange_indices); + Spectrum emitter_sampling_1 = gather(emitter_sampling, arange_indices + nb_pimary_rays); + + Float weights_0 = gather(current_weight, arange_indices); + Float weights_1 = gather(current_weight, arange_indices + nb_pimary_rays); + + // Here the weights weights_0 and weights_1 come from previous + // bsdf sampling, their gradients are uncorrelated to the sampled emissions + // **of the other path** emitter_sampling_0 and emitter_sampling_1. + if (depth >= m_disable_gradient_bounce) { + result += detach(emitter_sampling_0) * 0.5f; + result += detach(emitter_sampling_1) * 0.5f; + } else if (m_use_variance_reduction) { + weights_0 = select(abs(weights_0) < 0.00001f, Float(1.f), weights_0); + weights_1 = select(abs(weights_1) < 0.00001f, Float(1.f), weights_1); + + result += (emitter_sampling_0 - emitter_sampling_1 / weights_1 * (weights_0 - detach(weights_0))) * 0.5f; + result += (emitter_sampling_1 - emitter_sampling_0 / weights_0 * (weights_1 - detach(weights_1))) * 0.5f; + } else { + result += emitter_sampling_0 * 0.5f; + result += emitter_sampling_1 * 0.5f; + } + } else { + Throw("PathReparamIntegrator: m_dc_light_samples < 2 not implemented!"); + } + + // ----------------------- BSDF sampling ---------------------- + + Float component_sample = samplePair1D(active, sampler); + + auto sample_main_bs = bsdf->sample(ctx, si, component_sample, samplePair2D(active, sampler), active).first; + + active &= sample_main_bs.pdf > 0.f; + + // TODO: BSDFs should fill the `sampled_roughness` field + Mask convolution = Mask(m_use_convolution) && active + && sample_main_bs.sampled_roughness > m_conv_threshold; + + if (any(has_flag(sample_main_bs.sampled_type, BSDFFlags::Delta))) + Log(Error, "This pluggin does not support perfectly specular reflections" + " and transmissions. Rough materials should be used instead."); + + Frame frame_main_bs(sample_main_bs.wo); + std::vector ds_bs(m_dc_bsdf_samples); + + // Compute directions to samples either from the bsdf or the + // convolution of the bsdf. Only the first one is + // used for the light paths. + for (size_t bs = 0; bs < m_dc_bsdf_samples; bs++) { + Vector2f samples = sample2D(active, sampler); + // Convolution: sample a vmf lobe + Vector3f sample_bs = warp::square_to_von_mises_fisher(samples, m_kappa_conv); + sample_bs = frame_main_bs.to_world(sample_bs); + + // Otherwise: must be uncorrelated, but can sample the same component + auto [sample_bs_noconv, bsdf_val_bs] = bsdf->sample(ctx, si, component_sample, samples, active); + + ds_bs[bs] = select(convolution, sample_bs, sample_bs_noconv.wo); + } + + // Sample all these rays for discontinuity estimation + std::vector rays_bs(m_dc_bsdf_samples); + std::vector sis_bs(m_dc_bsdf_samples); + + Mask use_reparam_bs(false); + for (size_t bs = 0; bs < m_dc_bsdf_samples; bs++) { + rays_bs[bs] = si.spawn_ray(si.to_world(ds_bs[bs])); + sis_bs[bs] = scene->ray_intersect(rays_bs[bs], HitComputeMode::Least, active); + sis_bs[bs].compute_differentiable_shape_position(active); + // Set use_reparam_bs to true if find hit + use_reparam_bs = use_reparam_bs || (active && neq(sis_bs[bs].shape, nullptr)); + } + + if (m_disable_gradient_diffuse) { + use_reparam_bs &= !convolution; + current_weight = select(use_reparam_bs, current_weight, detach(current_weight)); + } + + Point3f discontinuity_bs = estimate_discontinuity(rays_bs, sis_bs, active); + + Vector3f direction_diff = normalize(discontinuity_bs - si.p); + Vector3f discontinuity_bs_detach = detach(discontinuity_bs); + Vector3f direction_detach = normalize(discontinuity_bs_detach - si.p); + + Vector3f axis_bs = cross(direction_detach, direction_diff); + Float cosangle_bs = dot(direction_diff, direction_detach); + Transform4f rotation_bs = rotation_from_axis_cosangle(axis_bs, cosangle_bs); // This rotation is in world space + + // Initialize the BSDF sample from the initial sample, eta and + // sampled_type do not change since the same component is sampled. + BSDFSample3 sample_bs = sample_main_bs; + + // Reuse one direction sampled from either the BSDF or the convolution kernel + // around the main direction. + sample_bs.wo = ds_bs[0]; // Reuse the first one, could be any of them + + // Apply the differentiable rotation + // Warning, the direction must be detached such that it follows the discontinuities + // Warning, this rotation in world space, but wo is in local space + sample_bs.wo[use_reparam_bs] = si.to_local(rotation_bs.transform_affine( + si.to_world(detach(sample_bs.wo)))); + + // Compute the differentiable BSDF value for the differentiable direction + Spectrum bsdf_value = bsdf->eval(ctx, si, sample_bs.wo, active); + + // Compute the pdf of the convolution kernel for the selected direction + // Warning: need to transform to a frame centered around the Z axis + Float pdf_conv_new_dir = warp::square_to_von_mises_fisher_pdf(frame_main_bs.to_local(sample_bs.wo), + m_kappa_conv); + + // Multiply the BSDF value by the convolution kernel. Use a + // correction term for the convolution (otherwise less energy + // at grazing angles) + Float cosangle_vmf = sample_bs.wo.z(); + Float correction_factor = m_vmf_hemisphere.eval(m_kappa_conv, cosangle_vmf, convolution); + + bsdf_value = select(convolution, bsdf_value * pdf_conv_new_dir / correction_factor, bsdf_value); + + // Compute the value of default importance sampling pdf of the BSDF. + // Used when convolution is disabled and for MIS + Float bsdf_pdf_default = bsdf->pdf(ctx, si, sample_bs.wo, active); + + /* The pdf should be: + - When not using changes of variables, the undetached pdf + because the sample are sampled from the standard pdf + - When using changes of variables and convolution, + the pdf of the main direction (not detached) times + the detached pdf using for sampling the convolution kernel + (always detached pdf because the samples don't move wrt the rotating sampling pdf) + - When not using the convolution, detached pdf */ + Float bsdf_pdf = select(convolution, + sample_main_bs.pdf * pdf_conv_new_dir, + bsdf_pdf_default); + bsdf_pdf[use_reparam_bs] = select(convolution, + sample_main_bs.pdf * detach(pdf_conv_new_dir), + detach(bsdf_pdf_default)); + Spectrum bsdf_value_pdf = bsdf_value / bsdf_pdf; + + /* Compute weights for variance reduction + These weights should be: + - just 1 if no change of variable is used + - Weights whose expected gradient is 0 and value is + close to bsdf_value_pdf. */ + // TODO: these weights should be colors. + + Mask set_weights = use_reparam_bs && (bsdf_pdf > 0.001f); + current_weight = select(set_weights && convolution, + current_weight * detach(bsdf_value_pdf[0]) * pdf_conv_new_dir / detach(pdf_conv_new_dir), + current_weight); + current_weight = select(set_weights && !convolution, + current_weight * detach(bsdf_value_pdf[0]) * bsdf_pdf_default / detach(bsdf_pdf_default), + current_weight); + throughput *= bsdf_value_pdf; + + active &= any(neq(throughput, 0.f)); + + if (none(active)) + break; + + eta *= sample_bs.eta; + + // Intersect the BSDF ray against the scene geometry + ray = si.spawn_ray(si.to_world(sample_bs.wo)); + auto si_bsdf = scene->ray_intersect(ray, HitComputeMode::Differentiable, active); + + // Determine probability of having sampled that same + // direction using emitter sampling. + emitter = si_bsdf.emitter(scene, active); + DirectionSample3f ds(si_bsdf, si); + ds.object = emitter; + + if (any_or(neq(emitter, nullptr))) { + Float emitter_pdf = + select(has_flag(sample_bs.sampled_type, BSDFFlags::Delta), 0.f, + scene->pdf_emitter_direction(si, ds, active)); + + // Always use the standard importance sampling pdf of the BSDF, + // since this is the pdf used for MIS weights when sampling emitters. + emission_weight = mis_weight(bsdf_pdf_default, emitter_pdf); + } + + si = std::move(si_bsdf); + } + + return { result, valid_ray }; + } else { + ENOKI_MARK_USED(scene); + ENOKI_MARK_USED(sampler); + ENOKI_MARK_USED(primary_ray_); + ENOKI_MARK_USED(active_primary); + Throw("PathReparamIntegrator: currently this integrator must be run on the GPU."); + return {Spectrum(0.f), Mask(false)}; + } + } + + //! @} + // ============================================================= + + std::string to_string() const override { + return tfm::format("PathReparamIntegrator[\n" + " max_depth = %i,\n" + " rr_depth = %i\n" + "]", m_max_depth, m_rr_depth); + } + + MTS_DECLARE_CLASS() + +protected: + // TODO: try power heuristic, could reduce bias in gradient with large area lights + template Value mis_weight(Value pdf_a, Value pdf_b) const { + pdf_a *= pdf_a; + pdf_b *= pdf_b; + return select(pdf_a > 0.f, pdf_a / (pdf_a + pdf_b), Value(0.f)); + }; + + mitsuba::Transform rotation_from_axis_cosangle(Vector3f axis, Float cosangle) const { + Float ax = axis.x(), + ay = axis.y(), + az = axis.z(); + Float axy = ax * ay, + axz = ax * az, + ayz = ay * az; + + Matrix3f ux(0.f, -az, ay, + az, 0.f, -ax, + -ay, ax, 0.f); + + Matrix3f uu(sqr(ax), axy, axz, + axy, sqr(ay), ayz, + axz, ayz, sqr(az)); + + Matrix3f R = identity() * cosangle + ux + rcp(1 + cosangle) * uu; + + return mitsuba::Transform(Matrix4f(R)); + }; + + Point3f estimate_discontinuity(const std::vector &rays, + const std::vector &sis, + const Mask &/*mask*/) const { + + using Matrix = enoki::Matrix; + + size_t nb_samples = rays.size(); + + if (rays.size() < 2 || rays.size() != sis.size()) + Throw("PathReparamIntegrator::estimate_discontinuity: invalid number of samples for discontinuity estimation"); + + Point3f ray0_p_attached = sis[0].p; + Vector3f ray0_n = sis[0].n; + + UInt32 is_ray1_hit_uint = select(neq(sis[1].shape, nullptr), UInt32(1), UInt32(0)); + Point3f ray1_p_attached = sis[1].p; + Vector3f ray1_n = sis[1].n; + Vector3f ray1_d = rays[1].d; + + for (size_t i = 2; i < nb_samples; i++) { + Mask diff = neq(sis[0].shape, sis[i].shape); + Mask i_hit = neq(sis[i].shape, nullptr); + is_ray1_hit_uint = select(diff, select(i_hit, UInt32(1), UInt32(0)), is_ray1_hit_uint); + ray1_p_attached = select(diff, sis[i].p, ray1_p_attached); + ray1_n = select(diff, sis[i].n, ray1_n); + ray1_d = select(diff, rays[i].d, ray1_d); + } + + Mask is_ray1_hit = is_ray1_hit_uint > 0; + + // Guess occlusion for pairs of samples + + Point3f res(0.f); + + // if only one hit: return this hit + Mask only_hit_0 = neq(sis[0].shape, nullptr) && !is_ray1_hit; + res[only_hit_0] = ray0_p_attached; + + Mask only_hit_1 = is_ray1_hit && eq(sis[0].shape, nullptr); + res[only_hit_1] = ray1_p_attached; + + Mask has_two_hits = neq(sis[0].shape, nullptr) && is_ray1_hit; + + // Compute occlusion between planes and hitpoints: sign of + // dot(normal, hitpoint - hitpoint). Test if the origin of the rays + // is on the same side as the other hit. + Float occ_plane_0 = + dot(ray0_n, ray1_p_attached - ray0_p_attached) * + dot(ray0_n, rays[0].o - ray0_p_attached); + Float occ_plane_1 = dot(ray1_n, ray0_p_attached - ray1_p_attached) * + dot(ray0_n, rays[0].o - ray0_p_attached); + + Mask plane_0_occludes_1 = has_two_hits && (occ_plane_0 < 0.f); + Mask plane_1_occludes_0 = has_two_hits && (occ_plane_1 < 0.f); + + Mask simple_occluder_0 = plane_0_occludes_1 && !plane_1_occludes_0; + Mask simple_occluder_1 = plane_1_occludes_0 && !plane_0_occludes_1; + Mask plane_intersection = has_two_hits && !simple_occluder_1 && !simple_occluder_0; + + /* simple_occluder */ + + res[simple_occluder_0] = ray0_p_attached; + res[simple_occluder_1] = ray1_p_attached; + + /* same_normals */ + + Mask same_normals = plane_intersection && abs(dot(ray0_n, ray1_n)) > 0.99f; + plane_intersection &= !same_normals; + res[same_normals] = ray0_p_attached; + + /* plane_intersection */ + +#if 1 + // Compute the intersection between 3 planes: + // 2 planes defined by the ray intersections and + // the normals at these points, and 1 plane containing + // the ray directions. + + Vector3f N0 = ray0_n; + Vector3f N1 = ray1_n; + Vector3f P0 = ray0_p_attached; + Vector3f P1 = ray1_p_attached; + + // Normal of the third plane, defined using + // attached positions (this prevents bad correlations + // between the displacement of the intersection and + // the sampled positions) + + Vector3f N = cross(P0 - rays[0].o, P1 - rays[0].o); + Float norm_N = norm(N); + + // Set a default intersection if the problem is ill-defined + res[plane_intersection] = ray0_p_attached; + + Mask invertible = plane_intersection && norm_N > 0.001f; + + Matrix A = Matrix::from_rows(N0, N1, N); + Float b0 = dot(P0, N0); + Float b1 = dot(P1, N1); + Float b2 = dot(rays[0].o, N); + Vector3f B(b0, b1, b2); + Matrix invA = enoki::inverse(A); + res[invertible] = invA * B; +#else + // Simply choose one of the intersections. + // This is a good strategy in many situations. + res[plane_intersection] = ray0_p_attached; +#endif + + return res; + + } + +private: + size_t m_disable_gradient_bounce; + size_t m_dc_light_samples; + size_t m_dc_bsdf_samples; + size_t m_dc_cam_samples; + ScalarFloat m_conv_threshold; + ScalarFloat m_kappa_conv; + ScalarFloat m_kappa_conv_envmap; + bool m_use_variance_reduction; + bool m_use_convolution; + bool m_use_convolution_envmap; + bool m_disable_gradient_diffuse; + + VMFHemisphereIntegral m_vmf_hemisphere; +}; + +MTS_IMPLEMENT_CLASS_VARIANT(PathReparamIntegrator, MonteCarloIntegrator); +MTS_EXPORT_PLUGIN(PathReparamIntegrator, "Differentiable Path Tracer integrator"); +NAMESPACE_END(mitsuba) diff --git a/src/integrators/tests/test_pathreparam.py b/src/integrators/tests/test_pathreparam.py new file mode 100644 index 000000000..ce9d55c99 --- /dev/null +++ b/src/integrators/tests/test_pathreparam.py @@ -0,0 +1,393 @@ +import mitsuba +import pytest +import enoki as ek +import numpy as np + +from mitsuba.python.test.util import fresolver_append_path + +# Convert flat array into a vector of arrays (will be included in next enoki release) +def ravel(buf, dim = 3): + from mitsuba.core import UInt32, Point2f, Point3f + idx = dim * UInt32.arange(ek.slices(buf) // dim) + if dim == 2: + return Point2f(ek.gather(buf, idx), ek.gather(buf, idx + 1)) + elif dim == 3: + return Point3f(ek.gather(buf, idx), ek.gather(buf, idx + 1), ek.gather(buf, idx + 2)) + +# Return contiguous flattened array (will be included in next enoki release) +def unravel(source, target, dim = 3): + from mitsuba.core import UInt32 + idx = UInt32.arange(ek.slices(source)) + for i in range(dim): + ek.scatter(target, source[i], dim * idx + i) + + +def write_gradient_image(grad, name): + """Convert signed floats to blue/red gradient exr image""" + from mitsuba.core import Bitmap + + convert_to_rgb = True + + if convert_to_rgb: + # Compute RGB channels for .exr image (no grad = black) + grad_R = grad.copy() + grad_R[grad_R < 0] = 0.0 + grad_B = grad.copy() + grad_B[grad_B > 0] = 0.0 + grad_B *= -1.0 + grad_G = grad.copy() * 0.0 + + grad_np = np.concatenate((grad_R, grad_G, grad_B), axis=2) + else: + grad_np = np.concatenate((grad, grad, grad), axis=2) + + print('Writing', name + ".exr") + Bitmap(grad_np).write(name + ".exr") + + +def render_gradient(scene, passes, diff_params): + """Render radiance and gradient image using forward autodiff""" + from mitsuba.python.autodiff import render + + fsize = scene.sensors()[0].film().size() + + img = np.zeros((fsize[1], fsize[0], 3), dtype=np.float32) + grad = np.zeros((fsize[1], fsize[0], 1), dtype=np.float32) + for i in range(passes): + img_i = render(scene) + ek.forward(diff_params, i == passes - 1) + + grad_i = ek.gradient(img_i).numpy().reshape(fsize[1], fsize[0], -1)[:, :, [0]] + img_i = img_i.numpy().reshape(fsize[1], fsize[0], -1) + + # Remove NaNs + grad_i[grad_i != grad_i] = 0 + img_i[img_i != img_i] = 0 + + grad += grad_i + img += img_i + + return img / passes, grad / passes + + +def compute_groundtruth(make_scene, integrator, spp, passes, epsilon): + """Render groundtruth radiance and gradient image using finite difference""" + from mitsuba.python.autodiff import render + + def render_offset(offset): + scene = make_scene(integrator, spp, offset) + fsize = scene.sensors()[0].film().size() + + values = render(scene) + for i in range(passes-1): + values += render(scene) + values /= passes + + return values.numpy().reshape(fsize[1], fsize[0], -1) + + gradient = (render_offset(epsilon) - render_offset(-epsilon)) / (2.0 * ek.norm(epsilon)) + + image = render_offset(0.0) + + return image, gradient[:, :, [0]] + + +diff_integrator_default = { "type" : "pathreparam", "max_depth" : 2 } +ref_integrator_default = { "type" : "path", "max_depth" : 2 } + + +def check_finite_difference(test_name, + make_scene, + get_diff_params, + diff_integrator=diff_integrator_default, + diff_spp=4, + diff_passes=8, + ref_integrator=ref_integrator_default, + ref_spp=128, + ref_passes=10, + ref_eps=0.002, + error_threshold=0.05): + """Compare resulting image and image gradient with finite difference method""" + from mitsuba.core import Bitmap, Struct + from mitsuba.python.autodiff import render + + # Render groundtruth image and gradients (using finite difference) + img_ref, grad_ref = compute_groundtruth(make_scene, ref_integrator, ref_spp, ref_passes, ref_eps) + + ek.cuda_malloc_trim() + + scene = make_scene(diff_integrator, diff_spp, 0.0) + fsize = scene.sensors()[0].film().size() + img, grad = render_gradient(scene, diff_passes, get_diff_params(scene)) + + error_img = np.abs(img_ref - img).mean() + error_grad = np.abs(grad_ref - grad).mean() / grad_ref.max() + + if error_img > error_threshold: + print("error_img:", error_img) + Bitmap(img_ref).write('%s_img_ref.exr' % test_name) + Bitmap(img).write('%s_img.exr' % test_name) + assert False + + if error_grad > error_threshold: + print("error_grad:", error_grad) + scale = np.abs(grad_ref).max() + write_gradient_image(grad_ref / scale, '%s_grad_ref' % test_name) + write_gradient_image(grad / scale, '%s_grad' % test_name) + Bitmap(img_ref).write('%s_img_ref.exr' % test_name) + Bitmap(img).write('%s_img.exr' % test_name) + assert False + + +def update_vertex_buffer(scene, object_name, diff_trafo): + """Apply the given transformation to mesh vertex positions and call update scene""" + from mitsuba.python.util import traverse + + params = traverse(scene) + key = object_name + '.vertex_positions_buf' + + vertex_positions_buf = params[key] + vertex_positions = ravel(vertex_positions_buf) + + vertex_positions_t = diff_trafo.transform_point(vertex_positions) + + unravel(vertex_positions_t, params[key]) + params.set_dirty(key) + + params.update() + + +# ---------------- +# Template scene + + +mitsuba.set_variant("scalar_rgb") +from mitsuba.core import ScalarTransform4f + +scene_template_dict = { + "type" : "scene", + "integrator" : { "type" : "path", "max_depth" : 2 }, + "sensor" : { + "type" : "perspective", + "fov_axis" : "smaller", + "near_clip" : 0.1, + "far_clip" : 2800, + "focus_distance" : 1000, + "fov" : 10, + "to_world" : ScalarTransform4f.look_at([0, 0, 10], [0, 0, 0], [0, 1, 0]), + "sampler" : { + "type" : "independent", + "sample_count" : 4 + }, + "film" : { + "type" : "hdrfilm", + "width" : 48, + "height" : 48, + "filter" : { "type" : "box" } + } + }, + "planemesh" : { + "type" : "obj", + "id" : "planemesh", + "to_world" : ScalarTransform4f.scale(2.0), + "filename" : "resources/data/obj/xy_plane.obj", + "bsdf" : { "type" : "diffuse" } + }, + "light_shape" : { + "type" : "obj", + "id" : "light_shape", + "to_world" : ScalarTransform4f.translate([10, 0, 15]) * ScalarTransform4f.rotate([1, 0, 0], 180), + "filename" : "resources/data/obj/xy_plane.obj", + "smooth_area_light" : { + "type" : "smootharea", + "radiance" : { "type": "spectrum", "value": 100 } + } + }, + "object" : { + "type" : "obj", + "id" : "object", + "to_world" : ScalarTransform4f.translate([0, 0, 1.0]), + "filename" : "resources/data/obj/smooth_empty_cube.obj" + } +} + + +# ---------------- +# Tests + + +# @pytest.mark.slow +# def test01_light_position(variant_gpu_autodiff_rgb): +# from mitsuba.core import Float, Vector3f, Transform4f, ScalarTransform4f, ScalarVector3f, xml + +# if ek.cuda_mem_get_info()[1] < int(1e9): +# pytest.skip('Insufficient GPU memory') + +# @fresolver_append_path +# def make_scene(integrator, spp, param): +# scene_dict = dict(scene_template_dict) +# scene_dict["integrator"] = integrator +# scene_dict["sensor"]["sampler"]["sample_count"] = spp +# scene_dict["light_shape"]["to_world"] = ScalarTransform4f.translate(ScalarVector3f(10, 0, 15) + param) * ScalarTransform4f.rotate([1, 0, 0], 180) +# return xml.load_dict(scene_dict) + +# def get_diff_param(scene): +# diff_param = Float(0.0) +# ek.set_requires_gradient(diff_param) +# diff_trafo = Transform4f.translate(diff_param) +# update_vertex_buffer(scene, 'light_shape', diff_trafo) +# return diff_param + +# check_finite_difference("light_position", make_scene, get_diff_param) + + +# @pytest.mark.slow +# def test02_object_position(variant_gpu_autodiff_rgb): +# from mitsuba.core import Float, Transform4f, ScalarTransform4f, ScalarVector3f, xml +# from mitsuba.python.util import traverse + +# if ek.cuda_mem_get_info()[1] < int(1e9): +# pytest.skip('Insufficient GPU memory') + +# @fresolver_append_path +# def make_scene(integrator, spp, param): +# scene_dict = dict(scene_template_dict) +# scene_dict["integrator"] = integrator +# scene_dict["sensor"]["sampler"]["sample_count"] = spp +# scene_dict["object"]["to_world"] = ScalarTransform4f.translate(ScalarVector3f(0, 0, 1) + param) +# return xml.load_dict(scene_dict) + +# def get_diff_param(scene): +# diff_param = Float(0.0) +# ek.set_requires_gradient(diff_param) +# diff_trafo = Transform4f.translate(diff_param) +# update_vertex_buffer(scene, 'object', diff_trafo) +# return diff_param + +# check_finite_difference("object_position", make_scene, get_diff_param) + + +# @pytest.mark.slow +# def test03_object_rotation(variant_gpu_autodiff_rgb): +# from mitsuba.core import Float, Transform4f, ScalarTransform4f, ScalarVector3f, xml +# from mitsuba.python.util import traverse + +# if ek.cuda_mem_get_info()[1] < int(1e9): +# pytest.skip('Insufficient GPU memory') + +# @fresolver_append_path +# def make_scene(integrator, spp, param): +# scene_dict = dict(scene_template_dict) +# scene_dict["integrator"] = integrator +# scene_dict["sensor"]["sampler"]["sample_count"] = spp +# scene_dict["object"]["to_world"] = ScalarTransform4f.rotate([1, 0, 0], param) +# return xml.load_dict(scene_dict) + +# def get_diff_param(scene): +# diff_param = Float(0.0) +# ek.set_requires_gradient(diff_param) +# diff_trafo = Transform4f.rotate([1, 0, 0], diff_param) +# update_vertex_buffer(scene, 'object', diff_trafo) +# return diff_param + +# check_finite_difference("object_rotation", make_scene, get_diff_param, ref_eps=0.05) + + +# @pytest.mark.slow +# def test04_object_scaling(variant_gpu_autodiff_rgb): +# from mitsuba.core import Float, Transform4f, ScalarTransform4f, xml +# from mitsuba.python.util import traverse + +# if ek.cuda_mem_get_info()[1] < int(1e9): +# pytest.skip('Insufficient GPU memory') + +# @fresolver_append_path +# def make_scene(integrator, spp, param): +# scene_dict = dict(scene_template_dict) +# scene_dict["integrator"] = integrator +# scene_dict["sensor"]["sampler"]["sample_count"] = spp +# scene_dict["object"]["to_world"] = ScalarTransform4f.scale([1 + param, 1, 1]) +# return xml.load_dict(scene_dict) + +# def get_diff_param(scene): +# diff_param = Float(0.0) +# ek.set_requires_gradient(diff_param) +# diff_trafo = Transform4f.scale([1 + diff_param, 1, 1]) +# update_vertex_buffer(scene, 'object', diff_trafo) +# return diff_param + +# check_finite_difference("object_scaling", make_scene, get_diff_param) + + +@pytest.mark.slow +def test05_glossy_reflection(variant_gpu_autodiff_rgb): + from mitsuba.core import Float, Transform4f, ScalarTransform4f, ScalarVector3f, xml + from mitsuba.python.util import traverse + + if ek.cuda_mem_get_info()[1] < int(1e9): + pytest.skip('Insufficient GPU memory') + + @fresolver_append_path + def make_scene(integrator, spp, param): + scene_dict = dict(scene_template_dict) + scene_dict["integrator"] = integrator + scene_dict["integrator"]["max_depth"] = 3 + scene_dict["sensor"]["sampler"]["sample_count"] = spp + scene_dict["sensor"]["fov"] = 15 + scene_dict["planemesh"]["bsdf"] = { "type" : "roughconductor", "alpha" : 0.05 } + scene_dict["planemesh"]["filename"] = "resources/data/obj/xy_plane_rough.obj" + scene_dict["planemesh"]["to_world"] = ScalarTransform4f.rotate([1, 0, 0], -25) + scene_dict["object"]["to_world"] = ScalarTransform4f.translate(ScalarVector3f(0, 0.6, 1) + param) + return xml.load_dict(scene_dict) + + def get_diff_param(scene): + diff_param = Float(0.0) + ek.set_requires_gradient(diff_param) + diff_trafo = Transform4f.translate(diff_param) + update_vertex_buffer(scene, 'object', diff_trafo) + return diff_param + + check_finite_difference("glossy_reflection", make_scene, get_diff_param, diff_passes=16, ref_eps=0.015) + + +# TODO fix this test +@pytest.mark.skip +@pytest.mark.slow +def test06_envmap(variant_gpu_autodiff_rgb): + from mitsuba.core import Float, Transform4f, ScalarTransform4f, xml + from mitsuba.python.util import traverse + + if ek.cuda_mem_get_info()[1] < int(1e9): + pytest.skip('Insufficient GPU memory') + + @fresolver_append_path + def make_scene(integrator, spp, param): + scene_dict = dict(scene_template_dict) + scene_dict["integrator"] = integrator + scene_dict["sensor"]["sampler"]["sample_count"] = spp + del scene_dict["light_shape"] + scene_dict["envmap"] = { + "type" : "envmap", + "scale" : 1.0, + "filename" : "resources/data/envmap/park.hdr", + "to_world" : ScalarTransform4f.rotate([1, 0, 0], 90) + } + scene_dict["object"]["to_world"] = ScalarTransform4f.translate(param) * scene_dict["object"]["to_world"] + return xml.load_dict(scene_dict) + + def get_diff_param(scene): + diff_param = Float(0.0) + ek.set_requires_gradient(diff_param) + diff_trafo = Transform4f.translate(diff_param) + update_vertex_buffer(scene, 'object', diff_trafo) + return diff_param + + diff_integrator = { + "type" : "pathreparam", + "max_depth" : 2, + "kappa_conv_envmap" : 10000000 + } + + check_finite_difference("envmap", make_scene, get_diff_param, diff_integrator=diff_integrator, error_threshold=0.1) + +# TODO add tests for area+envmap \ No newline at end of file diff --git a/src/integrators/vmf-hemisphere.data b/src/integrators/vmf-hemisphere.data new file mode 100644 index 000000000..45f21f0f4 --- /dev/null +++ b/src/integrators/vmf-hemisphere.data @@ -0,0 +1,111 @@ +# This 2D data provides the integral of vMF lobes on the upper hemisphere. The +# rows correspond to values of the concentration parameter k. It has been +# mapped to the interval [0,1] using u = log(10*k+1)/(log(10)*6), such that the +# first line (u = 0) corresponds to k = 0, and the last line (u = 1) to +# k ~= 10^5. The other dimension (the columns) correspond to the cosine of the +# angle between the vertical axis and the mean direction of the vMF distribution. +# The first column corresponds to a direction orthogonal to the z axis, and the +# last column corresponds to the z axis. Each dimension is sampled linearly. + +# Resolution in K (lines) and T (columns) +R 100 100 +0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 0.500000 +0.500000 0.500609 0.500615 0.500708 0.500826 0.500922 0.500889 0.500868 0.500864 0.500837 0.500851 0.500838 0.500850 0.500972 0.500990 0.501047 0.501090 0.501066 0.501064 0.501059 0.501093 0.501064 0.501124 0.501176 0.501157 0.501254 0.501340 0.501396 0.501476 0.501605 0.501648 0.501697 0.501739 0.501826 0.501923 0.501950 0.502058 0.502103 0.502122 0.502185 0.502158 0.502205 0.502338 0.502408 0.502444 0.502511 0.502530 0.502622 0.502668 0.502722 0.502746 0.502853 0.502915 0.502893 0.502822 0.502811 0.502812 0.502892 0.502855 0.502813 0.502850 0.502820 0.502852 0.502928 0.502985 0.503020 0.503018 0.503124 0.503200 0.503322 0.503345 0.503327 0.503338 0.503347 0.503324 0.503358 0.503410 0.503480 0.503628 0.503586 0.503701 0.503626 0.503604 0.503680 0.503718 0.503667 0.503697 0.503853 0.503888 0.503809 0.503919 0.504049 0.503930 0.503913 0.504028 0.503912 0.503926 0.503953 0.504041 0.504093 +0.500000 0.499501 0.499584 0.499658 0.499718 0.499734 0.499745 0.499927 0.500084 0.500213 0.500231 0.500313 0.500388 0.500515 0.500644 0.500824 0.500933 0.500952 0.501103 0.501199 0.501246 0.501314 0.501399 0.501551 0.501719 0.501785 0.501816 0.501824 0.501859 0.501861 0.501998 0.502086 0.502085 0.502126 0.502235 0.502392 0.502430 0.502455 0.502566 0.502643 0.502745 0.502861 0.502966 0.503134 0.503203 0.503302 0.503394 0.503387 0.503535 0.503634 0.503663 0.503738 0.503797 0.503796 0.503896 0.504050 0.504144 0.504214 0.504281 0.504468 0.504558 0.504587 0.504683 0.504913 0.504982 0.504953 0.504942 0.505185 0.505358 0.505528 0.505680 0.505759 0.505893 0.505962 0.506132 0.506348 0.506460 0.506717 0.506910 0.506939 0.507053 0.507106 0.507130 0.507303 0.507298 0.507414 0.507496 0.507486 0.507437 0.507541 0.507625 0.507695 0.507782 0.507722 0.507800 0.507952 0.508052 0.508081 0.508144 0.508158 +0.500000 0.499996 0.500124 0.500250 0.500364 0.500680 0.500840 0.500921 0.501081 0.501260 0.501411 0.501447 0.501585 0.501783 0.501991 0.502229 0.502418 0.502611 0.502784 0.502843 0.503012 0.503235 0.503359 0.503443 0.503559 0.503623 0.503633 0.503799 0.503893 0.504007 0.504143 0.504321 0.504524 0.504701 0.504825 0.505045 0.505225 0.505382 0.505514 0.505649 0.505702 0.505874 0.505957 0.506020 0.506229 0.506375 0.506364 0.506397 0.506596 0.506720 0.506864 0.506998 0.507216 0.507224 0.507391 0.507457 0.507535 0.507661 0.507756 0.507849 0.508027 0.508193 0.508373 0.508569 0.508656 0.508839 0.509020 0.509074 0.509280 0.509385 0.509522 0.509607 0.509690 0.509849 0.509927 0.510091 0.510216 0.510314 0.510536 0.510639 0.510650 0.510822 0.510970 0.511012 0.511231 0.511342 0.511410 0.511502 0.511741 0.511899 0.511873 0.512014 0.512294 0.512512 0.512648 0.512881 0.513000 0.512911 0.513051 0.513099 +0.500000 0.500487 0.500706 0.500837 0.501092 0.501316 0.501522 0.501678 0.501891 0.502144 0.502289 0.502441 0.502625 0.502796 0.502948 0.503163 0.503358 0.503541 0.503725 0.503921 0.504137 0.504334 0.504515 0.504689 0.504887 0.505096 0.505287 0.505404 0.505680 0.505928 0.506132 0.506439 0.506625 0.506862 0.506981 0.507030 0.507122 0.507386 0.507529 0.507803 0.507908 0.508066 0.508204 0.508493 0.508709 0.508924 0.509239 0.509376 0.509532 0.509748 0.509832 0.509973 0.510267 0.510502 0.510673 0.510896 0.511018 0.511171 0.511461 0.511686 0.511704 0.511863 0.512134 0.512237 0.512372 0.512626 0.512819 0.513057 0.513219 0.513395 0.513597 0.513699 0.513831 0.514000 0.514146 0.514204 0.514426 0.514737 0.514735 0.514930 0.515025 0.515178 0.515343 0.515593 0.515919 0.516173 0.516431 0.516670 0.516953 0.517005 0.517195 0.517338 0.517465 0.517684 0.517883 0.518089 0.518227 0.518480 0.518764 0.519014 +0.500000 0.500858 0.501071 0.501380 0.501641 0.501920 0.502199 0.502441 0.502706 0.502955 0.503301 0.503569 0.503770 0.503952 0.504209 0.504347 0.504487 0.504747 0.504921 0.505159 0.505357 0.505655 0.505844 0.506142 0.506388 0.506612 0.506818 0.507041 0.507244 0.507554 0.507691 0.507846 0.508166 0.508526 0.508801 0.509127 0.509347 0.509609 0.509886 0.510204 0.510502 0.510689 0.511028 0.511265 0.511470 0.511789 0.512195 0.512420 0.512768 0.513003 0.513320 0.513482 0.513645 0.513901 0.514219 0.514450 0.514724 0.514904 0.515220 0.515373 0.515813 0.515957 0.516295 0.516630 0.516958 0.517137 0.517444 0.517656 0.517926 0.518133 0.518423 0.518732 0.519043 0.519304 0.519487 0.519802 0.520117 0.520300 0.520552 0.520809 0.521026 0.521228 0.521528 0.521659 0.521899 0.522129 0.522366 0.522596 0.522867 0.523071 0.523271 0.523511 0.523823 0.524121 0.524430 0.524715 0.524886 0.525219 0.525410 0.525705 +0.500000 0.500112 0.500393 0.500720 0.501022 0.501378 0.501643 0.502073 0.502484 0.502808 0.503269 0.503662 0.503925 0.504159 0.504581 0.504773 0.505190 0.505509 0.505872 0.506217 0.506561 0.506816 0.507179 0.507499 0.507858 0.508212 0.508580 0.508896 0.509184 0.509616 0.509870 0.510250 0.510449 0.510777 0.511112 0.511444 0.511713 0.512135 0.512492 0.512831 0.513138 0.513392 0.513709 0.513973 0.514320 0.514642 0.515063 0.515443 0.515764 0.516120 0.516506 0.516884 0.517343 0.517633 0.518006 0.518224 0.518634 0.519075 0.519388 0.519758 0.520076 0.520433 0.520781 0.521177 0.521576 0.521807 0.522113 0.522507 0.522876 0.523161 0.523431 0.523748 0.524085 0.524303 0.524692 0.525105 0.525579 0.525888 0.526252 0.526473 0.526826 0.527147 0.527340 0.527812 0.528263 0.528804 0.529021 0.529267 0.529589 0.529982 0.530339 0.530591 0.530765 0.531121 0.531499 0.531839 0.532191 0.532513 0.532741 0.532844 +0.500000 0.499400 0.499783 0.500211 0.500560 0.500929 0.501414 0.501918 0.502401 0.502771 0.503180 0.503575 0.503986 0.504340 0.504757 0.505242 0.505644 0.506045 0.506419 0.506894 0.507347 0.507789 0.508155 0.508521 0.509062 0.509613 0.510064 0.510446 0.510906 0.511329 0.511720 0.512209 0.512626 0.513121 0.513638 0.514091 0.514472 0.514877 0.515267 0.515716 0.516049 0.516524 0.516970 0.517389 0.517726 0.518230 0.518695 0.519191 0.519689 0.520053 0.520415 0.520825 0.521260 0.521647 0.522009 0.522307 0.522729 0.523204 0.523638 0.524077 0.524560 0.525019 0.525444 0.525784 0.526170 0.526693 0.527035 0.527398 0.527933 0.528216 0.528654 0.529083 0.529466 0.529822 0.530180 0.530688 0.531133 0.531549 0.531943 0.532388 0.532795 0.533251 0.533574 0.533861 0.534355 0.534577 0.535145 0.535521 0.535995 0.536298 0.536775 0.537166 0.537502 0.538129 0.538412 0.538868 0.539497 0.540040 0.540478 0.541230 +0.500000 0.501087 0.501583 0.502115 0.502600 0.503191 0.503760 0.504280 0.504750 0.505200 0.505758 0.506286 0.506764 0.507278 0.507783 0.508319 0.508782 0.509333 0.509796 0.510363 0.510796 0.511228 0.511722 0.512177 0.512756 0.513264 0.513715 0.514307 0.514900 0.515347 0.515925 0.516452 0.517072 0.517613 0.518066 0.518569 0.518994 0.519589 0.520171 0.520736 0.521320 0.521850 0.522413 0.522859 0.523525 0.523991 0.524530 0.525023 0.525481 0.525993 0.526426 0.527036 0.527486 0.528112 0.528569 0.529146 0.529561 0.530115 0.530689 0.531124 0.531668 0.532132 0.532645 0.533211 0.533655 0.534189 0.534646 0.535154 0.535632 0.536047 0.536495 0.537100 0.537712 0.538196 0.538743 0.539239 0.539788 0.540316 0.540784 0.541265 0.541814 0.542414 0.542981 0.543539 0.544030 0.544625 0.545153 0.545575 0.545915 0.546368 0.546902 0.547436 0.547836 0.548453 0.548990 0.549427 0.549816 0.550228 0.550766 0.551055 +0.500000 0.501256 0.501884 0.502452 0.503023 0.503669 0.504273 0.504912 0.505634 0.506390 0.506971 0.507558 0.508159 0.508796 0.509351 0.509972 0.510687 0.511280 0.511868 0.512462 0.513045 0.513698 0.514333 0.515015 0.515669 0.516315 0.516879 0.517458 0.518022 0.518651 0.519249 0.519901 0.520547 0.521212 0.521796 0.522319 0.522943 0.523576 0.524228 0.524938 0.525507 0.526230 0.526833 0.527503 0.528111 0.528848 0.529419 0.530013 0.530672 0.531305 0.532023 0.532648 0.533314 0.534085 0.534680 0.535293 0.535840 0.536400 0.537052 0.537680 0.538261 0.538957 0.539608 0.540327 0.540846 0.541509 0.542167 0.542807 0.543381 0.544026 0.544557 0.545231 0.545884 0.546565 0.547111 0.547773 0.548360 0.549031 0.549707 0.550329 0.550972 0.551536 0.552174 0.552696 0.553206 0.553862 0.554446 0.555063 0.555655 0.556198 0.556763 0.557440 0.558001 0.558455 0.559103 0.559853 0.560485 0.561192 0.561664 0.562462 +0.500000 0.500366 0.501082 0.501842 0.502540 0.503331 0.504102 0.504920 0.505718 0.506455 0.507221 0.507974 0.508824 0.509633 0.510365 0.511151 0.511937 0.512705 0.513390 0.514119 0.514824 0.515553 0.516338 0.517126 0.517749 0.518544 0.519311 0.520042 0.520724 0.521474 0.522264 0.523069 0.523857 0.524653 0.525323 0.526046 0.526864 0.527639 0.528317 0.529121 0.529904 0.530682 0.531543 0.532324 0.533145 0.533821 0.534710 0.535388 0.536153 0.536942 0.537766 0.538545 0.539266 0.540062 0.540842 0.541632 0.542521 0.543317 0.544122 0.544944 0.545573 0.546245 0.547063 0.547792 0.548612 0.549360 0.550156 0.550915 0.551715 0.552363 0.553097 0.553873 0.554736 0.555525 0.556283 0.557148 0.557984 0.558832 0.559497 0.560187 0.560930 0.561641 0.562410 0.563325 0.564082 0.564777 0.565531 0.566308 0.567100 0.567830 0.568510 0.569397 0.570202 0.571008 0.571746 0.572563 0.573286 0.573994 0.574674 0.575271 +0.500000 0.500513 0.501463 0.502379 0.503196 0.504114 0.505016 0.505936 0.506813 0.507744 0.508686 0.509593 0.510474 0.511420 0.512313 0.513180 0.514125 0.515112 0.515970 0.516888 0.517737 0.518701 0.519502 0.520394 0.521386 0.522292 0.523117 0.523971 0.524830 0.525799 0.526734 0.527725 0.528574 0.529573 0.530528 0.531403 0.532426 0.533301 0.534306 0.535339 0.536251 0.537171 0.538088 0.539041 0.539920 0.540784 0.541708 0.542619 0.543523 0.544367 0.545313 0.546240 0.547200 0.548105 0.548912 0.549906 0.550806 0.551792 0.552789 0.553717 0.554463 0.555387 0.556325 0.557281 0.558182 0.559134 0.560069 0.561007 0.561955 0.562930 0.563879 0.564867 0.565762 0.566617 0.567494 0.568356 0.569267 0.570054 0.571020 0.571955 0.572801 0.573646 0.574584 0.575540 0.576372 0.577354 0.578363 0.579288 0.580302 0.581274 0.582096 0.582884 0.583732 0.584608 0.585542 0.586531 0.587632 0.588417 0.589155 0.590191 +0.500000 0.501050 0.502072 0.503171 0.504292 0.505413 0.506498 0.507632 0.508765 0.509812 0.510838 0.511889 0.512844 0.513964 0.515075 0.516039 0.517172 0.518208 0.519277 0.520354 0.521436 0.522548 0.523672 0.524697 0.525732 0.526907 0.528030 0.529210 0.530313 0.531363 0.532358 0.533446 0.534576 0.535605 0.536753 0.537861 0.538938 0.540032 0.541157 0.542290 0.543303 0.544314 0.545490 0.546475 0.547627 0.548705 0.549888 0.550916 0.552020 0.553090 0.554055 0.555057 0.556107 0.557251 0.558305 0.559347 0.560469 0.561542 0.562575 0.563656 0.564682 0.565730 0.566743 0.567837 0.568909 0.569916 0.570919 0.572055 0.573135 0.574053 0.575272 0.576416 0.577564 0.578666 0.579676 0.580673 0.581840 0.583058 0.584099 0.585092 0.586134 0.587254 0.588141 0.589325 0.590293 0.591279 0.592328 0.593410 0.594502 0.595666 0.596809 0.597813 0.598810 0.599879 0.600805 0.601906 0.603005 0.604013 0.605035 0.606037 +0.500000 0.501745 0.503030 0.504305 0.505699 0.506958 0.508146 0.509448 0.510771 0.512043 0.513409 0.514629 0.515914 0.517064 0.518308 0.519497 0.520720 0.522041 0.523342 0.524522 0.525812 0.527120 0.528309 0.529561 0.530920 0.532211 0.533401 0.534621 0.535808 0.537160 0.538478 0.539744 0.541053 0.542417 0.543587 0.544857 0.546049 0.547365 0.548689 0.549879 0.551123 0.552392 0.553682 0.554888 0.556242 0.557577 0.558860 0.560131 0.561432 0.562684 0.564017 0.565312 0.566579 0.567925 0.569210 0.570423 0.571777 0.573158 0.574422 0.575645 0.576797 0.578112 0.579295 0.580521 0.581734 0.582991 0.584186 0.585532 0.586809 0.587924 0.589098 0.590379 0.591596 0.592923 0.594100 0.595411 0.596642 0.597938 0.599144 0.600466 0.601720 0.602940 0.604055 0.605425 0.606786 0.608047 0.609345 0.610769 0.612000 0.613178 0.614408 0.615735 0.616981 0.618210 0.619701 0.620655 0.621972 0.623269 0.624636 0.625976 +0.500000 0.502176 0.503651 0.505195 0.506726 0.508247 0.509714 0.511224 0.512768 0.514365 0.515849 0.517275 0.518816 0.520305 0.521749 0.523218 0.524757 0.526099 0.527643 0.529233 0.530717 0.532100 0.533606 0.534995 0.536523 0.537988 0.539372 0.540916 0.542358 0.543768 0.545250 0.546745 0.548261 0.549719 0.551164 0.552615 0.554091 0.555633 0.557211 0.558693 0.560223 0.561602 0.563102 0.564624 0.566088 0.567683 0.569077 0.570511 0.572005 0.573456 0.574872 0.576395 0.577953 0.579537 0.581063 0.582613 0.584071 0.585477 0.587046 0.588551 0.590078 0.591567 0.593139 0.594614 0.596069 0.597669 0.599184 0.600711 0.602255 0.603713 0.605104 0.606560 0.608057 0.609443 0.610910 0.612385 0.613787 0.615168 0.616548 0.617997 0.619411 0.620924 0.622393 0.623859 0.625263 0.626697 0.628179 0.629455 0.631017 0.632332 0.633837 0.635414 0.636791 0.638203 0.639755 0.641145 0.642523 0.643890 0.645451 0.647114 +0.500000 0.501266 0.503086 0.504941 0.506751 0.508486 0.510157 0.511878 0.513599 0.515326 0.517115 0.518842 0.520688 0.522451 0.524098 0.525774 0.527552 0.529322 0.531060 0.532873 0.534619 0.536412 0.538193 0.540071 0.541860 0.543676 0.545491 0.547241 0.548998 0.550712 0.552526 0.554271 0.556007 0.557679 0.559361 0.561072 0.562829 0.564524 0.566238 0.567928 0.569696 0.571428 0.573065 0.574794 0.576478 0.578178 0.579962 0.581694 0.583373 0.585058 0.586802 0.588485 0.590259 0.592094 0.593878 0.595562 0.597326 0.599028 0.600685 0.602407 0.604233 0.605792 0.607522 0.609292 0.611097 0.612909 0.614576 0.616339 0.618083 0.619879 0.621549 0.623308 0.625079 0.626711 0.628298 0.630164 0.631971 0.633684 0.635369 0.637109 0.638664 0.640404 0.642015 0.643690 0.645395 0.647058 0.648653 0.650376 0.652072 0.653684 0.655349 0.657055 0.658779 0.660319 0.661939 0.663607 0.665263 0.667041 0.668754 0.670564 +0.500000 0.501860 0.503970 0.506082 0.508160 0.510297 0.512491 0.514507 0.516613 0.518628 0.520714 0.522842 0.524835 0.526959 0.528981 0.531100 0.533080 0.535083 0.537117 0.539214 0.541270 0.543303 0.545372 0.547325 0.549334 0.551401 0.553375 0.555526 0.557697 0.559741 0.561624 0.563742 0.565723 0.567810 0.569751 0.571722 0.573689 0.575663 0.577697 0.579787 0.581766 0.583753 0.585665 0.587721 0.589720 0.591706 0.593783 0.595753 0.597694 0.599758 0.601786 0.603683 0.605782 0.607903 0.609966 0.611939 0.613923 0.615878 0.617808 0.619847 0.621822 0.623898 0.625897 0.627776 0.629782 0.631712 0.633580 0.635510 0.637388 0.639432 0.641494 0.643458 0.645467 0.647388 0.649377 0.651349 0.653378 0.655314 0.657389 0.659284 0.661248 0.663232 0.665128 0.667204 0.669223 0.671103 0.673107 0.674975 0.676918 0.678739 0.680633 0.682579 0.684438 0.686236 0.688105 0.689835 0.691695 0.693480 0.695183 0.696842 +0.500000 0.503333 0.505714 0.508014 0.510305 0.512742 0.515204 0.517504 0.519778 0.522033 0.524415 0.526677 0.529085 0.531576 0.533972 0.536279 0.538599 0.540914 0.543346 0.545721 0.548083 0.550478 0.552945 0.555264 0.557582 0.559957 0.562246 0.564551 0.566862 0.569234 0.571485 0.573673 0.576035 0.578390 0.580561 0.582906 0.585235 0.587593 0.589927 0.592219 0.594549 0.596815 0.599129 0.601424 0.603781 0.606053 0.608431 0.610617 0.612955 0.615155 0.617381 0.619662 0.622033 0.624102 0.626515 0.628837 0.631185 0.633399 0.635633 0.637969 0.640291 0.642530 0.644840 0.647130 0.649253 0.651520 0.653744 0.655875 0.658077 0.660331 0.662546 0.664841 0.667042 0.669316 0.671486 0.673589 0.675789 0.677950 0.680248 0.682424 0.684611 0.686816 0.688978 0.691081 0.693299 0.695400 0.697540 0.699726 0.702003 0.704206 0.706429 0.708576 0.710714 0.712873 0.714983 0.717005 0.719114 0.721340 0.723602 0.725465 +0.500000 0.502900 0.505637 0.508408 0.511007 0.513867 0.516657 0.519478 0.522207 0.524783 0.527520 0.530260 0.532986 0.535696 0.538409 0.541082 0.543805 0.546558 0.549343 0.552082 0.554762 0.557434 0.560130 0.562880 0.565474 0.568100 0.570825 0.573579 0.576246 0.578938 0.581547 0.584138 0.586800 0.589422 0.592061 0.594626 0.597381 0.600017 0.602713 0.605239 0.607893 0.610468 0.613175 0.615786 0.618525 0.621104 0.623803 0.626426 0.629027 0.631615 0.634267 0.636937 0.639539 0.642100 0.644528 0.647167 0.649827 0.652474 0.655050 0.657684 0.660291 0.662853 0.665356 0.667899 0.670469 0.673010 0.675607 0.678272 0.680786 0.683295 0.685920 0.688376 0.690879 0.693257 0.695756 0.698303 0.700736 0.703179 0.705811 0.708219 0.710738 0.713331 0.715768 0.718073 0.720397 0.722872 0.725314 0.727854 0.730204 0.732722 0.735230 0.737704 0.740087 0.742256 0.744635 0.746990 0.749463 0.751673 0.754017 0.756270 +0.500000 0.502598 0.505687 0.508656 0.511788 0.514970 0.518153 0.521486 0.524660 0.527757 0.530835 0.533954 0.537075 0.540285 0.543445 0.546543 0.549613 0.552714 0.555715 0.558759 0.561857 0.564839 0.567860 0.571007 0.574092 0.577195 0.580320 0.583441 0.586455 0.589527 0.592557 0.595605 0.598770 0.601743 0.604824 0.607994 0.611048 0.614045 0.617090 0.620107 0.623119 0.626217 0.629261 0.632189 0.635206 0.638253 0.641213 0.644161 0.647169 0.650188 0.653152 0.656197 0.659121 0.662157 0.664938 0.667873 0.670764 0.673762 0.676610 0.679384 0.682276 0.685153 0.688017 0.690975 0.693840 0.696718 0.699549 0.702457 0.705351 0.708229 0.711126 0.713832 0.716753 0.719607 0.722288 0.725137 0.727887 0.730627 0.733314 0.736097 0.738785 0.741485 0.744246 0.746892 0.749587 0.752292 0.754860 0.757436 0.760091 0.762661 0.765315 0.767926 0.770520 0.772903 0.775494 0.778090 0.780635 0.783219 0.785896 0.788704 +0.500000 0.503385 0.506956 0.510547 0.514045 0.517472 0.520997 0.524474 0.528012 0.531598 0.535236 0.538795 0.542321 0.545757 0.549278 0.552818 0.556308 0.559912 0.563475 0.566980 0.570502 0.574025 0.577562 0.581154 0.584617 0.588082 0.591554 0.595064 0.598502 0.601952 0.605403 0.608721 0.612257 0.615786 0.619291 0.622793 0.626258 0.629747 0.633157 0.636607 0.639868 0.643238 0.646700 0.650141 0.653422 0.656749 0.660178 0.663572 0.666870 0.670206 0.673529 0.676894 0.680102 0.683478 0.686759 0.690014 0.693292 0.696442 0.699783 0.702991 0.706329 0.709521 0.712649 0.715917 0.719158 0.722327 0.725549 0.728755 0.731895 0.734972 0.738055 0.741197 0.744381 0.747496 0.750524 0.753506 0.756565 0.759506 0.762449 0.765456 0.768447 0.771373 0.774340 0.777236 0.780167 0.782996 0.785934 0.788867 0.791649 0.794581 0.797484 0.800330 0.803036 0.805815 0.808602 0.811128 0.813824 0.816523 0.819200 0.822200 +0.500000 0.503480 0.507442 0.511546 0.515617 0.519690 0.523649 0.527666 0.531612 0.535582 0.539634 0.543618 0.547594 0.551622 0.555588 0.559600 0.563618 0.567616 0.571593 0.575587 0.579580 0.583518 0.587495 0.591404 0.595275 0.599140 0.603044 0.607036 0.611042 0.614892 0.618797 0.622738 0.626689 0.630448 0.634246 0.638081 0.641923 0.645668 0.649585 0.653376 0.657264 0.661050 0.664826 0.668642 0.672363 0.676006 0.679768 0.683360 0.687010 0.690629 0.694374 0.697986 0.701772 0.705332 0.709100 0.712714 0.716314 0.719874 0.723423 0.726935 0.730451 0.733999 0.737405 0.740866 0.744309 0.747807 0.751404 0.754882 0.758336 0.761850 0.765213 0.768675 0.772044 0.775437 0.778792 0.782114 0.785280 0.788497 0.791744 0.794936 0.798108 0.801301 0.804371 0.807637 0.810700 0.813742 0.816840 0.819885 0.822882 0.825907 0.828959 0.831978 0.834863 0.837938 0.840724 0.843708 0.846420 0.849201 0.851884 0.854772 +0.500000 0.503986 0.508618 0.513144 0.517657 0.522126 0.526741 0.531252 0.535694 0.540211 0.544820 0.549441 0.553913 0.558398 0.562863 0.567421 0.571901 0.576438 0.580860 0.585330 0.589909 0.594384 0.598882 0.603261 0.607642 0.612091 0.616573 0.620942 0.625241 0.629693 0.633990 0.638338 0.642833 0.646924 0.651221 0.655443 0.659650 0.663902 0.668206 0.672461 0.676721 0.680904 0.685182 0.689333 0.693423 0.697589 0.701731 0.705850 0.709858 0.713950 0.718066 0.722133 0.726105 0.730171 0.734117 0.738023 0.741907 0.745873 0.749801 0.753577 0.757336 0.760998 0.764849 0.768687 0.772519 0.776177 0.779843 0.783437 0.787163 0.790816 0.794379 0.797867 0.801374 0.805015 0.808430 0.811782 0.815133 0.818536 0.821885 0.825199 0.828479 0.831751 0.834961 0.838251 0.841520 0.844727 0.847983 0.851014 0.854102 0.857096 0.860130 0.863131 0.866208 0.869123 0.872001 0.874866 0.877736 0.880713 0.883512 0.886260 +0.500000 0.504853 0.509982 0.515093 0.520057 0.525045 0.530169 0.535275 0.540467 0.545463 0.550332 0.555275 0.560390 0.565332 0.570161 0.575106 0.580166 0.584915 0.589887 0.594874 0.599806 0.604758 0.609772 0.614689 0.619667 0.624520 0.629314 0.634175 0.639054 0.643913 0.648710 0.653423 0.658221 0.663023 0.667820 0.672574 0.677234 0.681932 0.686653 0.691265 0.695876 0.700404 0.704895 0.709344 0.713880 0.718417 0.722801 0.727380 0.731718 0.736151 0.740503 0.744776 0.749068 0.753460 0.757739 0.761972 0.766224 0.770448 0.774575 0.778645 0.782622 0.786727 0.790704 0.794718 0.798702 0.802703 0.806408 0.810348 0.814172 0.818103 0.821743 0.825502 0.829183 0.832821 0.836436 0.840023 0.843609 0.847026 0.850565 0.853962 0.857564 0.861032 0.864295 0.867588 0.870879 0.874109 0.877430 0.880491 0.883541 0.886607 0.889601 0.892611 0.895575 0.898448 0.901395 0.904004 0.906973 0.909919 0.912846 0.915443 +0.500000 0.506258 0.511819 0.517425 0.523131 0.528833 0.534505 0.540062 0.545784 0.551346 0.556793 0.562333 0.568012 0.573584 0.579030 0.584406 0.589803 0.595378 0.600889 0.606395 0.611792 0.617238 0.622680 0.628040 0.633518 0.638743 0.644039 0.649326 0.654612 0.659882 0.665097 0.670405 0.675661 0.680869 0.686100 0.691161 0.696209 0.701326 0.706312 0.711229 0.716230 0.721335 0.726267 0.731060 0.735951 0.740770 0.745474 0.750259 0.754992 0.759549 0.764179 0.768817 0.773465 0.778027 0.782494 0.787013 0.791374 0.795856 0.800210 0.804538 0.808846 0.813150 0.817392 0.821501 0.825398 0.829497 0.833672 0.837597 0.841506 0.845412 0.849164 0.852892 0.856602 0.860366 0.863914 0.867551 0.871100 0.874559 0.878013 0.881495 0.884811 0.888241 0.891544 0.894843 0.898075 0.901207 0.904321 0.907309 0.910187 0.913034 0.915960 0.918783 0.921586 0.924396 0.927065 0.929763 0.932390 0.935014 0.937622 0.940132 +0.500000 0.505761 0.511986 0.518216 0.524442 0.530608 0.536852 0.542997 0.549147 0.555381 0.561607 0.567703 0.574081 0.580342 0.586402 0.592626 0.598659 0.604806 0.610814 0.616814 0.622752 0.628682 0.634707 0.640639 0.646633 0.652486 0.658356 0.664197 0.669876 0.675598 0.681208 0.686873 0.692556 0.698314 0.703964 0.709451 0.714882 0.720413 0.725794 0.731063 0.736308 0.741386 0.746694 0.752030 0.757182 0.762414 0.767480 0.772298 0.777370 0.782273 0.787044 0.791992 0.796716 0.801466 0.806134 0.810882 0.815500 0.819975 0.824498 0.828950 0.833347 0.837591 0.841840 0.846127 0.850324 0.854430 0.858493 0.862529 0.866449 0.870408 0.874111 0.877821 0.881509 0.885172 0.888870 0.892375 0.895798 0.899213 0.902661 0.905990 0.909217 0.912398 0.915560 0.918683 0.921605 0.924583 0.927415 0.930229 0.933019 0.935774 0.938372 0.940966 0.943648 0.946301 0.948698 0.951009 0.953283 0.955657 0.957878 0.960077 +0.500000 0.506952 0.513786 0.520611 0.527436 0.534268 0.541111 0.548041 0.554746 0.561508 0.568166 0.574927 0.581541 0.588217 0.594999 0.601727 0.608409 0.615005 0.621709 0.628230 0.634887 0.641373 0.647722 0.654125 0.660476 0.666852 0.673228 0.679598 0.685861 0.692210 0.698432 0.704391 0.710473 0.716462 0.722514 0.728367 0.734146 0.739958 0.745804 0.751584 0.757333 0.763063 0.768578 0.774154 0.779575 0.784996 0.790224 0.795476 0.800735 0.805673 0.810696 0.815674 0.820605 0.825456 0.830294 0.835141 0.839867 0.844484 0.849007 0.853453 0.857970 0.862345 0.866580 0.870720 0.874813 0.878903 0.882943 0.886825 0.890648 0.894428 0.898129 0.901812 0.905421 0.908888 0.912311 0.915553 0.918869 0.922041 0.925191 0.928206 0.931185 0.934080 0.936867 0.939650 0.942446 0.945038 0.947614 0.950155 0.952680 0.955036 0.957359 0.959653 0.961871 0.963955 0.965991 0.967979 0.969871 0.971648 0.973506 0.975138 +0.500000 0.507389 0.514973 0.522558 0.530132 0.537593 0.545084 0.552504 0.559777 0.567237 0.574757 0.582092 0.589335 0.596571 0.603708 0.611058 0.618382 0.625375 0.632474 0.639734 0.646738 0.653759 0.660700 0.667682 0.674520 0.681396 0.688096 0.694836 0.701627 0.708377 0.714863 0.721361 0.727873 0.734389 0.740847 0.747006 0.753105 0.759210 0.765154 0.771183 0.777166 0.782829 0.788593 0.794349 0.799913 0.805502 0.811055 0.816416 0.821797 0.826994 0.832137 0.837334 0.842410 0.847331 0.852215 0.857047 0.861678 0.866345 0.870799 0.875152 0.879516 0.883948 0.888166 0.892138 0.896120 0.900059 0.903884 0.907607 0.911286 0.914918 0.918306 0.921772 0.925048 0.928349 0.931585 0.934753 0.937758 0.940704 0.943588 0.946336 0.949078 0.951545 0.953979 0.956357 0.958646 0.960886 0.963128 0.965336 0.967314 0.969274 0.971189 0.973033 0.974851 0.976551 0.978240 0.979781 0.981298 0.982771 0.984278 0.985595 +0.500000 0.507982 0.516213 0.524438 0.532455 0.540607 0.548779 0.556765 0.564995 0.572995 0.581008 0.589014 0.597005 0.605022 0.613000 0.620860 0.628627 0.636418 0.644198 0.651833 0.659583 0.667044 0.674484 0.681781 0.689295 0.696596 0.703801 0.710989 0.717993 0.724970 0.731870 0.738832 0.745599 0.752278 0.758960 0.765630 0.772256 0.778714 0.785117 0.791299 0.797347 0.803353 0.809234 0.815072 0.820921 0.826396 0.831938 0.837578 0.842946 0.848171 0.853348 0.858402 0.863398 0.868209 0.873073 0.877769 0.882342 0.886875 0.891296 0.895561 0.899812 0.903957 0.907975 0.911776 0.915576 0.919266 0.922833 0.926368 0.929812 0.933080 0.936273 0.939298 0.942310 0.945215 0.948099 0.950833 0.953499 0.955998 0.958395 0.960782 0.963087 0.965248 0.967379 0.969413 0.971417 0.973305 0.975171 0.976865 0.978559 0.980188 0.981679 0.983177 0.984573 0.985918 0.987176 0.988303 0.989399 0.990467 0.991451 0.992459 +0.500000 0.508766 0.517597 0.526509 0.535153 0.544033 0.552780 0.561618 0.570405 0.579201 0.587828 0.596551 0.605232 0.613629 0.622132 0.630718 0.639083 0.647486 0.655860 0.664202 0.672341 0.680625 0.688634 0.696629 0.704534 0.712204 0.719912 0.727532 0.735083 0.742434 0.749756 0.757073 0.764375 0.771347 0.778233 0.785123 0.791926 0.798531 0.805025 0.811470 0.817779 0.824051 0.830185 0.836100 0.841899 0.847743 0.853235 0.858766 0.864000 0.869259 0.874539 0.879539 0.884445 0.889348 0.893809 0.898339 0.902755 0.907039 0.911165 0.915133 0.919035 0.922842 0.926557 0.930129 0.933623 0.936954 0.940160 0.943219 0.946241 0.949117 0.951947 0.954710 0.957360 0.959866 0.962285 0.964604 0.966805 0.968923 0.970893 0.972899 0.974779 0.976491 0.978235 0.979789 0.981332 0.982813 0.984210 0.985540 0.986807 0.988023 0.989116 0.990125 0.991139 0.991999 0.992898 0.993744 0.994472 0.995153 0.995795 0.996425 +0.500000 0.509897 0.519435 0.529036 0.538635 0.548236 0.557760 0.567050 0.576623 0.586107 0.595513 0.604912 0.614077 0.623448 0.632631 0.641811 0.650899 0.659927 0.668935 0.677793 0.686514 0.695218 0.703645 0.712157 0.720525 0.728860 0.736948 0.744952 0.752678 0.760392 0.768053 0.775634 0.782967 0.790305 0.797560 0.804617 0.811435 0.818182 0.824662 0.831093 0.837544 0.843692 0.849716 0.855593 0.861336 0.866916 0.872612 0.878058 0.883187 0.888372 0.893377 0.898125 0.902795 0.907368 0.911734 0.916001 0.920134 0.924171 0.928009 0.931699 0.935327 0.938785 0.942123 0.945347 0.948435 0.951426 0.954280 0.957050 0.959619 0.962162 0.964631 0.966925 0.969176 0.971201 0.973252 0.975139 0.976990 0.978786 0.980405 0.981952 0.983393 0.984862 0.986107 0.987339 0.988486 0.989593 0.990552 0.991489 0.992364 0.993157 0.993899 0.994647 0.995255 0.995853 0.996405 0.996926 0.997378 0.997807 0.998154 0.998500 +0.500000 0.510376 0.520753 0.531267 0.541597 0.551993 0.562447 0.572609 0.582865 0.593079 0.603152 0.613107 0.623049 0.632954 0.642841 0.652673 0.662456 0.671913 0.681442 0.690991 0.700266 0.709483 0.718547 0.727444 0.736353 0.744838 0.753413 0.761694 0.769876 0.778007 0.785919 0.793719 0.801218 0.808667 0.816054 0.823198 0.830348 0.837333 0.844126 0.850668 0.857036 0.863222 0.869252 0.875122 0.880853 0.886368 0.891843 0.897112 0.902178 0.907060 0.911797 0.916355 0.920708 0.924867 0.928986 0.932962 0.936770 0.940461 0.943871 0.947238 0.950537 0.953639 0.956593 0.959356 0.961959 0.964437 0.966781 0.969053 0.971273 0.973351 0.975392 0.977262 0.979004 0.980657 0.982229 0.983686 0.985050 0.986290 0.987456 0.988577 0.989654 0.990669 0.991634 0.992478 0.993258 0.993979 0.994705 0.995336 0.995889 0.996395 0.996839 0.997248 0.997668 0.998037 0.998330 0.998607 0.998864 0.999079 0.999253 0.999407 +0.500000 0.511644 0.522823 0.534042 0.545329 0.556615 0.567650 0.578726 0.589648 0.600634 0.611546 0.622291 0.633054 0.643759 0.654225 0.664759 0.675038 0.685232 0.695322 0.705132 0.714916 0.724548 0.734039 0.743428 0.752442 0.761547 0.770474 0.779071 0.787665 0.795859 0.803999 0.812248 0.820017 0.827650 0.835026 0.842334 0.849306 0.856307 0.862947 0.869206 0.875513 0.881694 0.887590 0.893109 0.898675 0.904065 0.909351 0.914422 0.919190 0.923692 0.928185 0.932434 0.936454 0.940427 0.944007 0.947508 0.950880 0.954096 0.957007 0.959815 0.962554 0.965113 0.967696 0.970053 0.972305 0.974458 0.976423 0.978282 0.980024 0.981660 0.983144 0.984618 0.986005 0.987262 0.988453 0.989503 0.990506 0.991487 0.992372 0.993163 0.993891 0.994533 0.995173 0.995729 0.996249 0.996715 0.997129 0.997507 0.997864 0.998180 0.998471 0.998699 0.998907 0.999096 0.999256 0.999400 0.999521 0.999623 0.999736 0.999831 +0.500000 0.511808 0.523973 0.536078 0.548395 0.560476 0.572663 0.584787 0.596825 0.608591 0.620333 0.631881 0.643467 0.654750 0.665860 0.676959 0.687969 0.698780 0.709553 0.720026 0.730436 0.740607 0.750543 0.760355 0.769988 0.779238 0.788463 0.797566 0.806316 0.814764 0.823002 0.831007 0.838765 0.846361 0.853778 0.860921 0.867914 0.874763 0.881387 0.887759 0.893743 0.899582 0.905225 0.910588 0.915639 0.920685 0.925475 0.930148 0.934516 0.938774 0.942786 0.946598 0.950166 0.953680 0.956811 0.959849 0.962807 0.965597 0.968137 0.970625 0.972842 0.975043 0.977126 0.979071 0.980909 0.982612 0.984206 0.985664 0.986988 0.988282 0.989355 0.990366 0.991275 0.992140 0.992960 0.993753 0.994431 0.995046 0.995608 0.996106 0.996605 0.997031 0.997419 0.997779 0.998088 0.998371 0.998635 0.998841 0.999021 0.999172 0.999321 0.999449 0.999558 0.999642 0.999729 0.999792 0.999848 0.999884 0.999923 0.999958 +0.500000 0.513415 0.526530 0.539510 0.552684 0.565836 0.578813 0.591588 0.604315 0.616937 0.629326 0.641715 0.654092 0.666128 0.677967 0.689871 0.701469 0.712804 0.724069 0.735139 0.746029 0.756636 0.767008 0.777215 0.787270 0.797038 0.806427 0.815625 0.824501 0.833289 0.841671 0.849687 0.857577 0.865127 0.872441 0.879367 0.886096 0.892690 0.898959 0.904828 0.910554 0.916111 0.921513 0.926511 0.931377 0.935947 0.940331 0.944396 0.948389 0.952077 0.955541 0.958752 0.961987 0.964984 0.967762 0.970417 0.972894 0.975167 0.977282 0.979297 0.981188 0.982926 0.984514 0.985962 0.987321 0.988592 0.989774 0.990804 0.991801 0.992712 0.993444 0.994151 0.994774 0.995377 0.995941 0.996472 0.996908 0.997309 0.997660 0.997995 0.998284 0.998516 0.998738 0.998936 0.999113 0.999252 0.999365 0.999488 0.999595 0.999673 0.999748 0.999786 0.999840 0.999875 0.999912 0.999935 0.999952 0.999963 0.999977 0.999986 +0.500000 0.513897 0.528097 0.542256 0.556206 0.570241 0.584370 0.598173 0.611558 0.625236 0.638625 0.651916 0.664931 0.677696 0.690174 0.702815 0.715191 0.727148 0.738977 0.750538 0.761943 0.772891 0.783780 0.794201 0.804631 0.814544 0.823938 0.833194 0.842165 0.850813 0.859412 0.867486 0.875105 0.882671 0.889953 0.896930 0.903433 0.909792 0.915709 0.921143 0.926596 0.931816 0.936637 0.941225 0.945779 0.949777 0.953652 0.957287 0.960696 0.963919 0.966955 0.969760 0.972440 0.974853 0.977193 0.979255 0.981187 0.983075 0.984720 0.986318 0.987732 0.989023 0.990177 0.991195 0.992152 0.993066 0.993881 0.994611 0.995254 0.995817 0.996326 0.996809 0.997233 0.997599 0.997945 0.998237 0.998485 0.998719 0.998894 0.999069 0.999215 0.999346 0.999455 0.999541 0.999638 0.999711 0.999763 0.999817 0.999863 0.999889 0.999911 0.999940 0.999950 0.999960 0.999972 0.999979 0.999987 0.999992 0.999997 1.000000 +0.500000 0.515110 0.530450 0.545647 0.560694 0.575808 0.590553 0.605364 0.620187 0.634706 0.649135 0.663436 0.677561 0.691274 0.704600 0.717725 0.730664 0.743224 0.755478 0.767414 0.779266 0.790598 0.801596 0.812189 0.822462 0.832389 0.842011 0.851339 0.860200 0.868914 0.877067 0.884845 0.892383 0.899565 0.906493 0.912968 0.919186 0.925244 0.930819 0.936104 0.940894 0.945540 0.949998 0.954012 0.957902 0.961494 0.964912 0.968079 0.970926 0.973677 0.976169 0.978458 0.980559 0.982477 0.984242 0.985897 0.987368 0.988757 0.989992 0.991112 0.992113 0.993040 0.993917 0.994691 0.995369 0.995989 0.996499 0.996955 0.997398 0.997776 0.998091 0.998364 0.998629 0.998839 0.999039 0.999203 0.999323 0.999439 0.999525 0.999622 0.999681 0.999746 0.999800 0.999837 0.999873 0.999903 0.999921 0.999933 0.999958 0.999966 0.999973 0.999982 0.999987 0.999989 0.999994 0.999996 0.999996 0.999997 0.999997 0.999998 +0.500000 0.517111 0.533558 0.549695 0.565749 0.582013 0.598188 0.614069 0.629583 0.644744 0.660063 0.675035 0.689855 0.704276 0.718418 0.732382 0.745710 0.758954 0.771711 0.784017 0.795993 0.807758 0.818991 0.829872 0.840423 0.850470 0.860047 0.869307 0.878073 0.886431 0.894527 0.902066 0.909332 0.916078 0.922762 0.928871 0.934529 0.939940 0.945035 0.949682 0.953882 0.957904 0.961759 0.965347 0.968641 0.971628 0.974374 0.976962 0.979230 0.981424 0.983413 0.985239 0.986961 0.988408 0.989762 0.990990 0.992108 0.993114 0.993903 0.994668 0.995372 0.995939 0.996471 0.996994 0.997453 0.997835 0.998143 0.998436 0.998661 0.998877 0.999047 0.999216 0.999344 0.999450 0.999532 0.999618 0.999697 0.999757 0.999794 0.999831 0.999873 0.999899 0.999928 0.999942 0.999954 0.999971 0.999977 0.999984 0.999986 0.999992 0.999993 0.999994 0.999994 0.999997 0.999998 0.999999 1.000000 1.000000 1.000000 1.000000 +0.500000 0.517250 0.534794 0.552529 0.569814 0.587207 0.604351 0.621132 0.637784 0.654432 0.670911 0.687158 0.702623 0.717943 0.733001 0.747385 0.761307 0.774849 0.788137 0.800744 0.812985 0.824735 0.836250 0.847056 0.857517 0.867362 0.877006 0.885983 0.894569 0.902795 0.910482 0.917567 0.924241 0.930468 0.936301 0.941927 0.947210 0.952091 0.956701 0.960745 0.964598 0.968109 0.971407 0.974495 0.977216 0.979685 0.981954 0.984022 0.985826 0.987438 0.988941 0.990350 0.991590 0.992648 0.993633 0.994494 0.995232 0.995913 0.996513 0.996985 0.997445 0.997851 0.998147 0.998428 0.998676 0.998923 0.999127 0.999290 0.999422 0.999549 0.999643 0.999701 0.999759 0.999812 0.999846 0.999877 0.999906 0.999928 0.999944 0.999955 0.999964 0.999972 0.999978 0.999982 0.999984 0.999991 0.999994 0.999996 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.518078 0.537343 0.556251 0.575142 0.593715 0.611930 0.630025 0.648031 0.665622 0.682833 0.699713 0.716181 0.732099 0.747550 0.762593 0.777212 0.791246 0.804915 0.817949 0.830283 0.842143 0.853477 0.864172 0.874441 0.884353 0.893642 0.902170 0.910373 0.918162 0.925432 0.932140 0.938481 0.944334 0.949650 0.954529 0.959034 0.963205 0.967027 0.970576 0.973915 0.976953 0.979592 0.982032 0.984198 0.986189 0.987901 0.989441 0.990847 0.992118 0.993226 0.994171 0.995034 0.995774 0.996396 0.996985 0.997432 0.997835 0.998162 0.998456 0.998695 0.998917 0.999097 0.999259 0.999383 0.999509 0.999606 0.999700 0.999757 0.999805 0.999844 0.999884 0.999916 0.999933 0.999947 0.999961 0.999975 0.999985 0.999985 0.999988 0.999993 0.999994 0.999996 0.999996 0.999998 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.520295 0.540744 0.561027 0.581105 0.601033 0.620773 0.640111 0.659157 0.677959 0.695957 0.713570 0.730898 0.747727 0.763871 0.779327 0.794436 0.808771 0.822499 0.835653 0.848230 0.860171 0.871476 0.882303 0.892253 0.901432 0.910217 0.918570 0.926162 0.933217 0.939701 0.945769 0.951467 0.956612 0.961225 0.965484 0.969387 0.973099 0.976248 0.979140 0.981695 0.984065 0.986137 0.988050 0.989674 0.991036 0.992333 0.993435 0.994378 0.995222 0.995938 0.996592 0.997114 0.997562 0.997964 0.998286 0.998591 0.998865 0.999064 0.999241 0.999421 0.999538 0.999634 0.999715 0.999768 0.999818 0.999857 0.999886 0.999918 0.999946 0.999960 0.999971 0.999977 0.999985 0.999989 0.999994 0.999994 0.999994 0.999997 0.999997 0.999998 0.999998 0.999999 0.999999 0.999999 0.999999 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.522248 0.544030 0.565687 0.587104 0.608212 0.629241 0.650059 0.670286 0.689874 0.708903 0.727417 0.745480 0.763053 0.779679 0.795851 0.811195 0.826054 0.840031 0.853228 0.865649 0.877350 0.888350 0.898679 0.908436 0.917362 0.925622 0.933280 0.940387 0.946863 0.952678 0.957976 0.962897 0.967197 0.971271 0.974903 0.978074 0.981025 0.983607 0.985877 0.987832 0.989537 0.991125 0.992443 0.993581 0.994585 0.995427 0.996177 0.996821 0.997383 0.997841 0.998196 0.998511 0.998801 0.999026 0.999207 0.999379 0.999507 0.999595 0.999676 0.999750 0.999808 0.999855 0.999895 0.999923 0.999940 0.999952 0.999970 0.999979 0.999985 0.999987 0.999992 0.999996 0.999996 0.999996 0.999998 0.999998 0.999998 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.523634 0.546988 0.570328 0.593643 0.616212 0.638348 0.660546 0.681961 0.702621 0.722785 0.742239 0.761048 0.779126 0.796400 0.812931 0.828641 0.843383 0.857580 0.870760 0.883038 0.894376 0.904970 0.914776 0.923837 0.932107 0.939798 0.946858 0.953159 0.958753 0.963847 0.968442 0.972717 0.976418 0.979682 0.982527 0.985151 0.987315 0.989272 0.990888 0.992347 0.993613 0.994687 0.995586 0.996369 0.997005 0.997529 0.997990 0.998365 0.998674 0.998928 0.999131 0.999295 0.999437 0.999556 0.999657 0.999741 0.999803 0.999840 0.999884 0.999911 0.999933 0.999951 0.999966 0.999972 0.999978 0.999985 0.999988 0.999994 0.999996 0.999998 0.999999 0.999999 0.999999 0.999999 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.525909 0.551055 0.575720 0.600444 0.625083 0.648992 0.672257 0.694681 0.716791 0.737697 0.758365 0.777738 0.796372 0.813992 0.830558 0.846296 0.860932 0.874679 0.887256 0.899115 0.910143 0.920046 0.929295 0.937564 0.945125 0.951891 0.958065 0.963614 0.968537 0.973070 0.976908 0.980218 0.983245 0.985829 0.988094 0.989970 0.991553 0.992996 0.994232 0.995256 0.996085 0.996817 0.997407 0.997885 0.998317 0.998669 0.998950 0.999163 0.999336 0.999484 0.999604 0.999698 0.999780 0.999832 0.999874 0.999898 0.999917 0.999934 0.999950 0.999958 0.999966 0.999979 0.999988 0.999992 0.999993 0.999993 0.999994 0.999998 0.999998 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.527032 0.554243 0.580769 0.607024 0.632788 0.658276 0.683228 0.707041 0.730053 0.752290 0.773468 0.793644 0.812521 0.830538 0.847187 0.862898 0.877555 0.890966 0.903364 0.914733 0.925042 0.934456 0.942832 0.950406 0.957165 0.963233 0.968456 0.973141 0.977218 0.980833 0.984042 0.986730 0.988977 0.990911 0.992487 0.993785 0.994959 0.995932 0.996760 0.997397 0.997911 0.998366 0.998743 0.999026 0.999246 0.999420 0.999542 0.999652 0.999749 0.999807 0.999853 0.999884 0.999911 0.999931 0.999948 0.999965 0.999970 0.999982 0.999984 0.999989 0.999994 0.999994 0.999995 0.999997 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.530295 0.559265 0.588137 0.616236 0.643961 0.670777 0.696754 0.721605 0.745963 0.768825 0.790607 0.811175 0.830536 0.848418 0.865106 0.880554 0.894616 0.907436 0.919105 0.929620 0.939065 0.947507 0.954948 0.961511 0.967314 0.972398 0.976845 0.980692 0.984012 0.986811 0.989198 0.991132 0.992770 0.994200 0.995349 0.996327 0.997111 0.997749 0.998214 0.998615 0.998932 0.999160 0.999354 0.999512 0.999633 0.999719 0.999781 0.999839 0.999878 0.999907 0.999935 0.999953 0.999972 0.999979 0.999986 0.999990 0.999995 0.999997 0.999998 0.999998 0.999999 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.530486 0.561771 0.592356 0.623002 0.652366 0.680906 0.708503 0.734707 0.760029 0.783783 0.806463 0.827106 0.846732 0.864822 0.881384 0.896358 0.909806 0.922046 0.933029 0.942657 0.951177 0.958691 0.965337 0.971154 0.976008 0.980092 0.983663 0.986752 0.989308 0.991451 0.993119 0.994580 0.995723 0.996613 0.997332 0.997938 0.998408 0.998766 0.999086 0.999307 0.999472 0.999597 0.999710 0.999785 0.999851 0.999903 0.999941 0.999966 0.999974 0.999979 0.999983 0.999990 0.999994 0.999995 0.999996 0.999998 0.999998 0.999998 0.999999 0.999999 0.999999 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.533971 0.567431 0.600152 0.632380 0.663815 0.694222 0.723516 0.750703 0.776671 0.800911 0.823686 0.844686 0.863946 0.881709 0.897611 0.911963 0.924901 0.936195 0.946080 0.954888 0.962285 0.968831 0.974430 0.979126 0.983084 0.986344 0.989145 0.991431 0.993323 0.994805 0.995912 0.996909 0.997659 0.998262 0.998720 0.999028 0.999270 0.999468 0.999627 0.999731 0.999801 0.999865 0.999904 0.999921 0.999942 0.999958 0.999966 0.999974 0.999982 0.999990 0.999993 0.999997 0.999997 0.999998 0.999998 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.536344 0.572030 0.607210 0.641813 0.675110 0.706936 0.737435 0.766324 0.793096 0.818166 0.841335 0.862128 0.881257 0.898611 0.913829 0.927336 0.939084 0.949334 0.958327 0.965845 0.972226 0.977542 0.982142 0.985739 0.988757 0.991218 0.993212 0.994755 0.996000 0.996998 0.997760 0.998321 0.998775 0.999123 0.999353 0.999546 0.999666 0.999766 0.999849 0.999902 0.999929 0.999958 0.999971 0.999978 0.999987 0.999996 0.999997 0.999997 0.999998 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.538132 0.576343 0.614164 0.650758 0.686042 0.719731 0.751537 0.781192 0.808986 0.834490 0.857867 0.878494 0.897258 0.913655 0.928042 0.940684 0.951380 0.960381 0.968049 0.974436 0.979798 0.984191 0.987761 0.990571 0.992784 0.994586 0.995945 0.996995 0.997746 0.998395 0.998836 0.999190 0.999450 0.999613 0.999745 0.999842 0.999889 0.999923 0.999953 0.999968 0.999982 0.999990 0.999992 0.999995 0.999997 0.999998 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.541232 0.582573 0.622476 0.661666 0.699063 0.734635 0.767686 0.798297 0.826417 0.851855 0.874620 0.894775 0.912685 0.928033 0.941314 0.952463 0.961980 0.969874 0.976550 0.981906 0.986137 0.989482 0.992110 0.994173 0.995640 0.996800 0.997673 0.998363 0.998834 0.999209 0.999459 0.999652 0.999764 0.999841 0.999888 0.999930 0.999962 0.999980 0.999986 0.999991 0.999995 0.999998 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.544240 0.587996 0.631155 0.672793 0.712184 0.748991 0.783252 0.814830 0.843211 0.868861 0.891363 0.910730 0.927416 0.941720 0.953655 0.963715 0.971835 0.978376 0.983600 0.987643 0.990889 0.993267 0.995137 0.996532 0.997542 0.998320 0.998851 0.999239 0.999493 0.999679 0.999791 0.999867 0.999913 0.999946 0.999960 0.999978 0.999988 0.999991 0.999996 0.999998 0.999998 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.547690 0.594628 0.640415 0.684379 0.725621 0.764169 0.799308 0.831376 0.859924 0.885110 0.906720 0.925347 0.940758 0.953691 0.964332 0.972947 0.979879 0.985088 0.989120 0.992162 0.994427 0.996090 0.997286 0.998172 0.998786 0.999213 0.999476 0.999659 0.999778 0.999863 0.999914 0.999947 0.999970 0.999985 0.999992 0.999998 0.999999 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.550989 0.601557 0.650454 0.697079 0.740611 0.780485 0.816630 0.848851 0.876856 0.901057 0.921769 0.938817 0.952909 0.964441 0.973579 0.980623 0.985995 0.989933 0.992909 0.995157 0.996691 0.997765 0.998547 0.999083 0.999416 0.999644 0.999780 0.999873 0.999920 0.999956 0.999977 0.999981 0.999984 0.999995 0.999998 0.999998 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.554701 0.608568 0.660259 0.709085 0.754781 0.795941 0.832816 0.865029 0.892937 0.916371 0.935673 0.951136 0.963761 0.973416 0.980900 0.986605 0.990786 0.993762 0.995862 0.997279 0.998239 0.998872 0.999275 0.999535 0.999725 0.999834 0.999914 0.999948 0.999970 0.999985 0.999994 0.999998 0.999999 0.999999 0.999999 0.999999 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.558929 0.616823 0.671652 0.722991 0.770406 0.812817 0.850380 0.882342 0.908888 0.930858 0.948415 0.962213 0.972934 0.981025 0.987003 0.991173 0.994220 0.996335 0.997659 0.998526 0.999113 0.999475 0.999690 0.999833 0.999905 0.999947 0.999977 0.999986 0.999994 0.999998 0.999998 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.563050 0.624581 0.682955 0.737570 0.786857 0.830025 0.867191 0.898231 0.923768 0.944029 0.959784 0.971891 0.980755 0.987144 0.991558 0.994638 0.996620 0.997967 0.998809 0.999280 0.999561 0.999773 0.999881 0.999929 0.999957 0.999969 0.999980 0.999996 0.999998 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.567252 0.632614 0.694838 0.751658 0.802301 0.846512 0.883788 0.913793 0.937601 0.956160 0.969835 0.979771 0.986787 0.991678 0.994811 0.996908 0.998192 0.998985 0.999447 0.999720 0.999865 0.999942 0.999966 0.999988 0.999996 0.999999 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.572143 0.642117 0.707525 0.766884 0.819103 0.863170 0.899202 0.927679 0.949697 0.966160 0.977896 0.985883 0.991274 0.994785 0.997048 0.998365 0.999111 0.999537 0.999760 0.999882 0.999946 0.999976 0.999990 0.999994 0.999997 0.999997 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.577646 0.652052 0.721254 0.783130 0.836140 0.879815 0.914848 0.941611 0.961260 0.975132 0.984480 0.990618 0.994475 0.996929 0.998373 0.999166 0.999584 0.999788 0.999908 0.999956 0.999977 0.999983 0.999997 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.583073 0.662406 0.735518 0.799528 0.853152 0.896291 0.929234 0.953519 0.970402 0.981936 0.989550 0.994100 0.996818 0.998330 0.999161 0.999610 0.999820 0.999934 0.999965 0.999986 0.999992 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.589095 0.673676 0.750070 0.815921 0.870034 0.911673 0.942398 0.964097 0.978706 0.987889 0.993480 0.996610 0.998287 0.999188 0.999667 0.999859 0.999938 0.999970 0.999988 0.999995 0.999998 0.999999 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.596001 0.685743 0.765871 0.833131 0.886216 0.926252 0.954493 0.973645 0.985194 0.992177 0.996079 0.998117 0.999148 0.999637 0.999870 0.999954 0.999985 0.999997 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.602092 0.697573 0.780936 0.849448 0.901654 0.939560 0.965177 0.980878 0.990105 0.995224 0.997784 0.999070 0.999625 0.999868 0.999959 0.999987 0.999993 0.999996 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.609066 0.710498 0.797117 0.866785 0.917672 0.952164 0.974123 0.986841 0.993703 0.997239 0.998874 0.999577 0.999853 0.999951 0.999984 0.999997 0.999999 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.616751 0.724203 0.814412 0.883552 0.932044 0.962988 0.981093 0.991265 0.996250 0.998512 0.999481 0.999840 0.999956 0.999985 0.999994 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.624126 0.737432 0.830099 0.898790 0.944591 0.972296 0.987281 0.994647 0.997941 0.999298 0.999774 0.999927 0.999974 0.999995 0.999996 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.633846 0.753420 0.847888 0.914709 0.956821 0.980114 0.991774 0.996931 0.998953 0.999688 0.999924 0.999985 0.999997 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.643231 0.768120 0.864484 0.929138 0.966968 0.986139 0.994817 0.998344 0.999531 0.999893 0.999972 0.999995 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.652901 0.784628 0.880935 0.942262 0.975455 0.990803 0.997096 0.999229 0.999812 0.999965 0.999995 0.999997 0.999998 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.663892 0.801253 0.897889 0.954542 0.982583 0.994414 0.998428 0.999646 0.999919 0.999979 0.999995 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 +0.500000 0.674463 0.816952 0.912700 0.964759 0.988094 0.996713 0.999246 0.999863 0.999979 0.999996 0.999998 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.686479 0.834692 0.927430 0.974059 0.992538 0.998194 0.999656 0.999961 0.999994 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.698518 0.851042 0.940726 0.981349 0.995364 0.999027 0.999864 0.999986 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.711133 0.867702 0.952867 0.987180 0.997246 0.999597 0.999952 0.999995 0.999996 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.725362 0.884405 0.963747 0.991602 0.998581 0.999838 0.999987 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.739688 0.900657 0.972796 0.994860 0.999315 0.999945 0.999998 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.753539 0.915539 0.980540 0.997089 0.999715 0.999977 0.999998 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.769975 0.929815 0.986468 0.998428 0.999903 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.785466 0.943247 0.991296 0.999205 0.999965 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.801347 0.954865 0.994438 0.999659 0.999991 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.817844 0.965510 0.996907 0.999849 0.999997 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.835059 0.974341 0.998345 0.999952 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.851706 0.981793 0.999150 0.999988 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.869107 0.987592 0.999638 0.999993 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.885584 0.991931 0.999860 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.901583 0.994996 0.999948 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.916273 0.997214 0.999982 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.930533 0.998500 0.999994 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.944085 0.999269 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.955912 0.999674 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.966039 0.999875 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.974945 0.999948 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.981886 0.999987 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.987860 0.999994 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.992302 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.995151 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.997311 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.998493 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 +0.500000 0.999289 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \ No newline at end of file diff --git a/src/libcore/python/spectrum_v.cpp b/src/libcore/python/spectrum_v.cpp index e6c89f885..ea5a9d7fd 100644 --- a/src/libcore/python/spectrum_v.cpp +++ b/src/libcore/python/spectrum_v.cpp @@ -31,7 +31,7 @@ MTS_PY_EXPORT(Spectrum) { m.def("xyz_to_srgb", vectorize(&xyz_to_srgb), "rgb"_a, "active"_a = true, D(xyz_to_srgb)); - if constexpr (is_rgb_v) { + if constexpr (is_rgb_v || is_spectral_v) { m.def("srgb_to_xyz", vectorize(&srgb_to_xyz), "rgb"_a, "active"_a = true, D(srgb_to_xyz)); } diff --git a/src/libcore/python/xml_v.cpp b/src/libcore/python/xml_v.cpp index e70964da9..e1e610b88 100644 --- a/src/libcore/python/xml_v.cpp +++ b/src/libcore/python/xml_v.cpp @@ -111,7 +111,7 @@ ref load_dict(const py::dict &dict, std::map> & else class_ = PluginManager::instance()->get_plugin_class(type, GET_VARIANT()); - bool within_emitter = (class_->parent()->alias() == "emitter"); + bool within_emitter = (!is_scene && class_->parent()->alias() == "emitter"); Properties props(type); for (auto& [k, value] : dict) { diff --git a/src/librender/mesh.cpp b/src/librender/mesh.cpp index 75dfc2a61..bc88c5d54 100644 --- a/src/librender/mesh.cpp +++ b/src/librender/mesh.cpp @@ -50,6 +50,9 @@ Mesh::Mesh(const std::string &name, ScalarSize vertex_count, m_vertex_normals_buf.managed(); m_vertex_texcoords_buf.managed(); + if constexpr (is_cuda_array_v) + cuda_sync(); + m_mesh = true; set_children(); } @@ -396,13 +399,40 @@ Mesh::barycentric_coordinates(const SurfaceInteraction3f &si, return {w, u, v}; } -MTS_VARIANT void Mesh::fill_surface_interaction(const Ray3f & /*ray*/, - const Float *cache, - SurfaceInteraction3f &si, - Mask active) const { - // Barycentric coordinates within triangle - Float b1 = cache[0], - b2 = cache[1]; +MTS_VARIANT typename Mesh::SurfaceInteraction3f +Mesh::fill_surface_interaction(const Ray3f &ray, + const Float *cache, + const UInt32 &cache_indices, + SurfaceInteraction3f si, + Mask active) const { + MTS_MASK_ARGUMENT(active); + + // Check whether the SurfaceInteraction need to be differentiable w.t.r. m_vertex_positions_buf + bool differentiable_pos = is_diff_array_v && requires_gradient(m_vertex_positions_buf); + + Float b1, b2; + if (!cache || differentiable_pos) { + // Recompute ray / triangle intersection to get differentiable b1, b2 and t + Mask valid; + Float t; + std::tie(valid, b1, b2, t) = ray_intersect_triangle(si.prim_index, ray, active); + + // Kill the ray if we can't recompute the triangle intersection + masked(si.t, !valid && active) = math::Infinity; + + // Replace the data by differentiable data + active &= valid; + masked(si.t, active) = t; + } else { + if constexpr (is_cuda_array_v){ + b1 = gather(cache[0], cache_indices, active); + b2 = gather(cache[1], cache_indices, active); + } else { + ENOKI_MARK_USED(cache_indices); + b1 = cache[0]; + b2 = cache[1]; + } + } Float b0 = 1.f - b1 - b2; @@ -459,6 +489,30 @@ MTS_VARIANT void Mesh::fill_surface_interaction(const Ray3f & / // Tangents si.dp_du[active] = dp_du; si.dp_dv[active] = dp_dv; + + return si; +} + +MTS_VARIANT std::pair::Point3f, typename Mesh::Normal3f> +Mesh::differentiable_position(const SurfaceInteraction3f &si, Mask active) const { + // NOTE: here we assume that the si was computed using HitComputeMode::Least + Float b1 = si.uv.x(), + b2 = si.uv.y(); + Float b0 = 1.f - b1 - b2; + + auto fi = face_indices(si.prim_index, active); + + Point3f p0 = vertex_position(fi[0], active), + p1 = vertex_position(fi[1], active), + p2 = vertex_position(fi[2], active); + + Vector3f dp0 = p1 - p0, + dp1 = p2 - p0; + + // Face normal + Normal3f n = normalize(cross(dp0, dp1)); + Point3f p = p0 * detach(b0) + p1 * detach(b1) + p2 * detach(b2); + return { p, n }; } MTS_VARIANT std::pair::Vector3f, typename Mesh::Vector3f> @@ -793,11 +847,16 @@ MTS_VARIANT void Mesh::traverse(TraversalCallback *callback) { MTS_VARIANT void Mesh::parameters_changed(const std::vector &keys) { if (keys.empty() || string::contains(keys, "vertex_positions_buf")) { - if (has_vertex_normals()) - recompute_vertex_normals(); + if constexpr (is_cuda_array_v) { + cuda_eval(); + cuda_sync(); + } recompute_bbox(); + if (has_vertex_normals()) + recompute_vertex_normals(); + area_distr_build(); Base::parameters_changed(); } diff --git a/src/librender/python/scene_v.cpp b/src/librender/python/scene_v.cpp index 7c549fd82..e33886779 100644 --- a/src/librender/python/scene_v.cpp +++ b/src/librender/python/scene_v.cpp @@ -39,7 +39,7 @@ MTS_PY_EXPORT(Scene) { MTS_PY_CLASS(Scene, Object) .def(py::init()) .def("ray_intersect", - vectorize(&Scene::ray_intersect), + vectorize(py::overload_cast(&Scene::ray_intersect, py::const_)), "ray"_a, "active"_a = true, D(Scene, ray_intersect)) .def("ray_test", vectorize(&Scene::ray_test), diff --git a/src/librender/python/shape_v.cpp b/src/librender/python/shape_v.cpp index 6ee549246..fe040bcfa 100644 --- a/src/librender/python/shape_v.cpp +++ b/src/librender/python/shape_v.cpp @@ -30,7 +30,7 @@ MTS_PY_EXPORT(Shape) { "ray"_a, "active"_a = true, D(Shape, ray_intersect)) .def("ray_test", vectorize(&Shape::ray_test), "ray"_a, "active"_a = true) .def("fill_surface_interaction", &Shape::fill_surface_interaction, - "ray"_a, "cache"_a, "si"_a, "active"_a = true) // TODO vectorize this + "ray"_a, "cache"_a, "cache_indices"_a, "si"_a, "active"_a = true) // TODO vectorize this .def("bbox", py::overload_cast<>( &Shape::bbox, py::const_), D(Shape, bbox)) .def("bbox", py::overload_cast( diff --git a/src/librender/scene.cpp b/src/librender/scene.cpp index 5275f9112..aad2af928 100644 --- a/src/librender/scene.cpp +++ b/src/librender/scene.cpp @@ -110,11 +110,23 @@ Scene::ray_intersect(const Ray3f &ray, Mask active) const { MTS_MASKED_FUNCTION(ProfilerPhase::RayIntersect, active); if constexpr (is_cuda_array_v) - return ray_intersect_gpu(ray, active); + return ray_intersect_gpu(ray, HitComputeMode::Default, active); else return ray_intersect_cpu(ray, active); } +MTS_VARIANT typename Scene::SurfaceInteraction3f +Scene::ray_intersect(const Ray3f &ray, HitComputeMode mode, Mask active) const { + MTS_MASKED_FUNCTION(ProfilerPhase::RayIntersect, active); + + if constexpr (is_cuda_array_v) { + return ray_intersect_gpu(ray, mode, active); + } else { + ENOKI_MARK_USED(mode); + return ray_intersect_cpu(ray, active); + } +} + MTS_VARIANT typename Scene::SurfaceInteraction3f Scene::ray_intersect_naive(const Ray3f &ray, Mask active) const { MTS_MASKED_FUNCTION(ProfilerPhase::RayIntersect, active); @@ -138,6 +150,32 @@ Scene::ray_test(const Ray3f &ray, Mask active) const { return ray_test_cpu(ray, active); } +MTS_VARIANT std::pair::EmitterPtr, Float> +Scene::sample_emitter(const Interaction3f &/*ref*/, + const Float &sample, + Mask active) const { + + ScalarFloat emitter_pdf(1.f); + EmitterPtr emitter; + if (likely(!m_emitters.empty())) { + if (m_emitters.size() == 1) { + // Fast path if there is only one emitter + emitter = (const Emitter *) m_emitters[0]; + if constexpr (is_cuda_array_v) { + set_slices(emitter, slices(sample)); + } + } else { + // Randomly pick an emitter according to the precomputed emitter distribution + UInt32 index = min(UInt32(sample * (ScalarFloat) m_emitters.size()), (uint32_t) m_emitters.size()-1); + emitter_pdf = 1.f / m_emitters.size(); + emitter = gather(m_emitters.data(), index, active); + } + } else { + Throw("Scene::sample_emitter_impl: Not implemented, scene must have emitters."); + } + return { emitter, emitter_pdf }; +} + MTS_VARIANT std::pair::DirectionSample3f, Spectrum> Scene::sample_emitter_direction(const Interaction3f &ref, const Point2f &sample_, bool test_visibility, Mask active) const { @@ -160,7 +198,7 @@ Scene::sample_emitter_direction(const Interaction3f &ref, const UInt32 index = min(UInt32(sample.x() * (ScalarFloat) m_emitters.size()), (uint32_t) m_emitters.size()-1); // Rescale sample.x() to lie in [0,1) again - sample.x() = (sample.x() - index*emitter_pdf) * m_emitters.size(); + sample.x() = (sample.x() - index * emitter_pdf) * m_emitters.size(); EmitterPtr emitter = gather(m_emitters.data(), index, active); @@ -214,9 +252,26 @@ MTS_VARIANT void Scene::traverse(TraversalCallback *callback) { } } -MTS_VARIANT void Scene::parameters_changed(const std::vector &/*keys*/) { +MTS_VARIANT void Scene::parameters_changed(const std::vector &keys) { if (m_environment) m_environment->set_scene(this); // TODO use parameters_changed({"scene"}) + + + bool update_accel = false; + for (auto &s : m_shapes) { + if (string::contains(keys, s->id()) || string::contains(keys, s->class_()->name())) { + update_accel = true; + break; + } + } + + if (update_accel) { + if constexpr (is_cuda_array_v) + accel_parameters_changed_gpu(); + else { + // TODO update Embree BVH or Mitsuba kdtree if necessary + } + } } MTS_VARIANT std::string Scene::to_string() const { diff --git a/src/librender/scene_embree.inl b/src/librender/scene_embree.inl index aba8977b0..5964dd2e3 100644 --- a/src/librender/scene_embree.inl +++ b/src/librender/scene_embree.inl @@ -77,8 +77,8 @@ Scene::ray_intersect_cpu(const Ray3f &ray, Mask active) const { // Create the cache for the Mesh shape Float cache[2] = { rh.hit.u, rh.hit.v }; - // Ask shape to fill in the rest - si.shape->fill_surface_interaction(ray, cache, si); + // Ask shape(s) to fill in the rest using the cache + si.fill_surface_interaction(ray, (void *)cache); // Gram-schmidt orthogonalization to compute local shading frame si.sh_frame.s = normalize( @@ -132,8 +132,8 @@ Scene::ray_intersect_cpu(const Ray3f &ray, Mask active) const { // Create the cache for the Mesh shapes Float cache[2] = { load(rh.hit.u), load(rh.hit.v) }; - // Ask shape(s) to fill in the rest - si.shape->fill_surface_interaction(ray, cache, si, hit); + // Ask shape(s) to fill in the rest using the cache + si.fill_surface_interaction(ray, (void *)cache, hit); // Gram-schmidt orthogonalization to compute local shading frame si.sh_frame.s = normalize( diff --git a/src/librender/scene_optix.inl b/src/librender/scene_optix.inl index 9246f8910..bd5fbc9e7 100644 --- a/src/librender/scene_optix.inl +++ b/src/librender/scene_optix.inl @@ -430,16 +430,30 @@ MTS_VARIANT void Scene::accel_release_gpu() { } MTS_VARIANT typename Scene::SurfaceInteraction3f -Scene::ray_intersect_gpu(const Ray3f &ray_, Mask active) const { +Scene::ray_intersect_gpu(const Ray3f &ray_, HitComputeMode mode, Mask active) const { if constexpr (is_cuda_array_v) { Assert(!m_shapes.empty()); OptixState &s = *(OptixState *) m_accel; + + if (mode == HitComputeMode::Differentiable && !is_diff_array_v) + Throw("ray_intersect_gpu(): variant should be autodiff when differentiable si is requested."); + Ray3f ray(ray_); size_t ray_count = std::max(slices(ray.o), slices(ray.d)); set_slices(ray, ray_count); set_slices(active, ray_count); - SurfaceInteraction3f si = empty(ray_count); + SurfaceInteraction3f si; + if (mode == HitComputeMode::Least) { + si = empty(1); // this is needed for virtual calls + si.t = empty(ray_count); + si.p = empty(ray_count); + si.uv = empty(ray_count); + si.prim_index = empty(ray_count); + si.shape = empty(ray_count); + } else { + si = empty(ray_count); + } // DEBUG mode: Explicitly instantiate `si` with NaN values. // As the integrator should only deal with the lanes of `si` for which @@ -484,7 +498,9 @@ Scene::ray_intersect_gpu(const Ray3f &ray_, Mask active) const // Out: Hit flag nullptr, // top_object - s.accel + s.accel, + // fill_surface_interaction + mode == HitComputeMode::Default }; cuda_memcpy_to_device(s.params, ¶ms, sizeof(OptixParams)); @@ -525,10 +541,17 @@ Scene::ray_intersect_gpu(const Ray3f &ray_, Mask active) const si.instance = nullptr; si.duv_dx = si.duv_dy = 0.f; - // Gram-schmidt orthogonalization to compute local shading frame - si.sh_frame.s = normalize( - fnmadd(si.sh_frame.n, dot(si.sh_frame.n, si.dp_du), si.dp_du)); - si.sh_frame.t = cross(si.sh_frame.n, si.sh_frame.s); + if (mode == HitComputeMode::Differentiable) { + // Cached info are not needed as they will be recomputed to be differentiable + si.fill_surface_interaction(ray, nullptr, active); + } + + if (mode != HitComputeMode::Least) { + // Gram-schmidt orthogonalization to compute local shading frame + si.sh_frame.s = normalize( + fnmadd(si.sh_frame.n, dot(si.sh_frame.n, si.dp_du), si.dp_du)); + si.sh_frame.t = cross(si.sh_frame.n, si.sh_frame.s); + } // Incident direction in local coordinates si.wi = select(si.is_valid(), si.to_local(-ray.d), -ray.d); @@ -536,6 +559,7 @@ Scene::ray_intersect_gpu(const Ray3f &ray_, Mask active) const return si; } else { ENOKI_MARK_USED(ray_); + ENOKI_MARK_USED(mode); ENOKI_MARK_USED(active); Throw("ray_intersect_gpu() should only be called in GPU mode."); } @@ -584,7 +608,9 @@ Scene::ray_test_gpu(const Ray3f &ray_, Mask active) const { // Out: Hit flag hit.data(), // top_object - s.accel + s.accel, + // fill_surface_interaction + false }; cuda_memcpy_to_device(s.params, ¶ms, sizeof(OptixParams)); diff --git a/src/librender/shape.cpp b/src/librender/shape.cpp index 6428a91e8..c06ddc3da 100644 --- a/src/librender/shape.cpp +++ b/src/librender/shape.cpp @@ -296,13 +296,21 @@ MTS_VARIANT typename Shape::Mask Shape::ray_te return ray_intersect(ray, unused).first; } -MTS_VARIANT void Shape::fill_surface_interaction(const Ray3f & /*ray*/, - const Float * /*cache*/, - SurfaceInteraction3f & /*si*/, - Mask /*active*/) const { +MTS_VARIANT typename Shape::SurfaceInteraction3f +Shape::fill_surface_interaction(const Ray3f & /*ray*/, + const Float * /*cache*/, + const UInt32 & /*cache_indices*/, + SurfaceInteraction3f /*si*/, + Mask /*active*/) const { NotImplementedError("fill_surface_interaction"); } +MTS_VARIANT std::pair::Point3f, typename Shape::Normal3f> +Shape::differentiable_position(const SurfaceInteraction3f & /*si*/, + Mask /*active*/) const { + NotImplementedError("differentiable_position"); +} + MTS_VARIANT typename Shape::SurfaceInteraction3f Shape::ray_intersect(const Ray3f &ray, Mask active) const { MTS_MASK_ARGUMENT(active); @@ -314,7 +322,7 @@ Shape::ray_intersect(const Ray3f &ray, Mask active) const { si.t = select(active, t, math::Infinity); if (any(active)) - fill_surface_interaction(ray, cache, si, active); + si = fill_surface_interaction(ray, cache, arange(slices(ray)), si, active); return si; } diff --git a/src/python/python/autodiff.py b/src/python/python/autodiff.py index ff7203890..ca4c1ce7c 100644 --- a/src/python/python/autodiff.py +++ b/src/python/python/autodiff.py @@ -1,6 +1,7 @@ from contextlib import contextmanager from typing import Union, Tuple import enoki as ek +from mitsuba.python.util import is_differentiable def _render_helper(scene, spp=None, sensor_index=0): @@ -122,7 +123,8 @@ def render(scene, spp: Union[None, int, Tuple[int, int]] = None, unbiased=False, optimizer: 'mitsuba.python.autodiff.Optimizer' = None, - sensor_index=0): + sensor_index=0, + pre_render_callback = lambda: None): """ Perform a differentiable of the scene `scene`, returning a floating point array containing RGB values and AOVs, if applicable. @@ -171,6 +173,11 @@ def render(scene, Parameter ``sensor_index`` (``int``): When the scene contains more than one sensor/camera, this parameter can be specified to select the desired sensor. + + Parameter ``pre_render_callback`` (``void callback()``): + Function called before rendering the scene. This is useful when + ``unbiased=True`` as one might want to update the scene in between + the two renders. """ if unbiased: if optimizer is None: @@ -180,8 +187,11 @@ def render(scene, spp = (spp, spp) with optimizer.disable_gradients(): + pre_render_callback() image = _render_helper(scene, spp=spp[0], sensor_index=sensor_index) + + pre_render_callback() image_diff = _render_helper(scene, spp=spp[1], sensor_index=sensor_index) ek.reattach(image, image_diff) @@ -189,6 +199,7 @@ def render(scene, if type(spp) is tuple: raise Exception('render(): unbiased=False requires that spp ' 'is either an integer or None!') + pre_render_callback() image = _render_helper(scene, spp=spp, sensor_index=sensor_index) return image @@ -209,7 +220,7 @@ def __init__(self, params, lr): """ self.set_learning_rate(lr) self.params = params - if not params.all_differentiable(): + if not all(is_differentiable(params[k]) for k in params.keys()): raise Exception('Optimizer.__init__(): all parameters should ' 'be differentiable!') self.state = {} diff --git a/src/python/python/util.py b/src/python/python/util.py index 7b32e8cea..2aaa49814 100644 --- a/src/python/python/util.py +++ b/src/python/python/util.py @@ -41,19 +41,7 @@ def __getitem__(self, key: str): return self.get_property(*(self.properties[key])) def __setitem__(self, key: str, value): - item = self.properties[key] - node = item[2] - while node is not None: - parent, depth = self.hierarchy[node] - - name = key - if parent is not None: - key, name = key.rsplit('.', 1) - - self.update_list.setdefault((depth, node), []) - self.update_list[(depth, node)].append(name) - - node = parent + item = self.set_dirty(key) return self.set_property(item[0], item[1], value) def __delitem__(self, key: str) -> None: @@ -87,12 +75,6 @@ def __next__(self): return ParameterMapItemIterator(self) - def all_differentiable(self): - for k in self.keys(): - if not is_differentiable(self[k]): - return False - return True - def torch(self) -> dict: """ Converts all Enoki arrays into PyTorch arrays and return them as a @@ -101,6 +83,29 @@ def torch(self) -> dict: """ return {k: v.torch().requires_grad_() for k, v in self.items()} + def set_dirty(self, key: str): + """ + Marks a specific parameter and its parent objects as dirty. A subsequent call + to :py:meth:`~mitsuba.python.util.ParameterMap.update()` will refresh their internal + state. This function is automatically called when overwriting a parameter using + :py:meth:`~mitsuba.python.util.ParameterMap.__setitem__()`. + """ + item = self.properties[key] + node = item[2] + while node is not None: + parent, depth = self.hierarchy[node] + + name = key + if parent is not None: + key, name = key.rsplit('.', 1) + + self.update_list.setdefault((depth, node), []) + self.update_list[(depth, node)].append(name) + + node = parent + + return item + def update(self) -> None: """ This function should be called at the end of a sequence of writes diff --git a/src/shapes/blender.cpp b/src/shapes/blender.cpp index 8598849aa..e8e4ccab9 100644 --- a/src/shapes/blender.cpp +++ b/src/shapes/blender.cpp @@ -317,6 +317,9 @@ class BlenderMesh final : public Mesh { if (has_uvs) m_vertex_texcoords_buf.managed(); + if constexpr (is_cuda_array_v) + cuda_sync(); + set_children(); } diff --git a/src/shapes/cylinder.cpp b/src/shapes/cylinder.cpp index 2cff13ffd..5c5f7ae08 100644 --- a/src/shapes/cylinder.cpp +++ b/src/shapes/cylinder.cpp @@ -327,37 +327,55 @@ class Cylinder final : public Shape { return valid_intersection; } - void fill_surface_interaction(const Ray3f &ray, const Float * /*cache*/, - SurfaceInteraction3f &si_out, Mask active) const override { + SurfaceInteraction3f fill_surface_interaction(const Ray3f &ray, + const Float * /*cache*/, + const UInt32 & /*cache_indices*/, + SurfaceInteraction3f si, + Mask active) const override { MTS_MASK_ARGUMENT(active); - SurfaceInteraction3f si(si_out); + // TODO: make si differentiable w.r.t. shape parameters if necessary - si.p = ray(si.t); + si.p[active] = ray(si.t); Vector3f local = m_to_object * si.p; Float phi = atan2(local.y(), local.x()); masked(phi, phi < 0.f) += 2.f * math::Pi; - si.uv = Point2f(phi * math::InvTwoPi, local.z() / m_length); + si.uv[active] = Point2f(phi * math::InvTwoPi, local.z() / m_length); Vector3f dp_du = 2.f * math::Pi * Vector3f(-local.y(), local.x(), 0.f); Vector3f dp_dv = Vector3f(0.f, 0.f, m_length); - si.dp_du = m_to_world.transform_affine(dp_du); - si.dp_dv = m_to_world.transform_affine(dp_dv); - si.n = Normal3f(normalize(cross(si.dp_du, si.dp_dv))); + si.dp_du[active] = m_to_world.transform_affine(dp_du); + si.dp_dv[active] = m_to_world.transform_affine(dp_dv); + si.n[active] = Normal3f(normalize(cross(si.dp_du, si.dp_dv))); /* Mitigate roundoff error issues by a normal shift of the computed intersection point */ - si.p += si.n * (m_radius - norm(head<2>(local))); + si.p[active] += si.n * (m_radius - norm(head<2>(local))); if (m_flip_normals) - si.n *= -1.f; + si.n[active] *= -1.f; - si.sh_frame.n = si.n; - si.time = ray.time; + si.sh_frame.n[active] = si.n; + masked(si.time, active) = ray.time; - si_out[active] = si; + return si; + } + + std::pair differentiable_position(const SurfaceInteraction3f &si, + Mask /*active*/) const override { + auto [local_x, local_y] = sincos(si.uv.x() * math::TwoPi); + Float local_z = si.uv.y() * detach(m_length); + + Point3f p = m_to_world.transform_affine(Point3f(local_x, local_y, local_z)); + + Vector3f dp_du = 2.f * math::Pi * Vector3f(-local_y, local_x, 0.f); + Vector3f dp_dv = Vector3f(0.f, 0.f, detach(m_length)); + Normal3f n = Normal3f(normalize(cross(m_to_world.transform_affine(dp_du), + m_to_world.transform_affine(dp_dv)))); + + return { p, n }; } std::pair normal_derivative(const SurfaceInteraction3f &si, diff --git a/src/shapes/disk.cpp b/src/shapes/disk.cpp index 9c72ead57..199cb4994 100644 --- a/src/shapes/disk.cpp +++ b/src/shapes/disk.cpp @@ -181,23 +181,35 @@ class Disk final : public Shape { && local.x()*local.x() + local.y()*local.y() <= 1; } - void fill_surface_interaction(const Ray3f &ray_, const Float *cache, - SurfaceInteraction3f &si_out, Mask active) const override { + SurfaceInteraction3f fill_surface_interaction(const Ray3f &ray_, + const Float *cache, + const UInt32 &cache_indices, + SurfaceInteraction3f si, + Mask active) const override { MTS_MASK_ARGUMENT(active); -#if !defined(MTS_ENABLE_EMBREE) - Float local_x = cache[0]; - Float local_y = cache[1]; -#else - ENOKI_MARK_USED(cache); - Ray3f ray = m_to_object.transform_affine(ray_); - Float t = -ray.o.z() * ray.d_rcp.z(); - Point3f local = ray(t); - Float local_x = local.x(); - Float local_y = local.y(); + // TODO: make si differentiable w.r.t. shape parameters if necessary + +#if defined(MTS_ENABLE_EMBREE) + cache = nullptr; #endif - SurfaceInteraction3f si(si_out); + Float local_x, local_y; + if (cache) { + if constexpr (is_cuda_array_v){ + local_x = gather(cache[0], cache_indices, active); + local_y = gather(cache[1], cache_indices, active); + } else { + local_x = cache[0]; + local_y = cache[1]; + } + } else { + Ray3f ray = m_to_object.transform_affine(ray_); + Float t = -ray.o.z() * ray.d_rcp.z(); + Point3f local = ray(t); + local_x = local.x(); + local_y = local.y(); + } Float r = norm(Point2f(local_x, local_y)), inv_r = rcp(r); @@ -208,16 +220,24 @@ class Disk final : public Shape { Float cos_phi = select(neq(r, 0.f), local_x * inv_r, 1.f), sin_phi = select(neq(r, 0.f), local_y * inv_r, 0.f); - si.dp_du = m_to_world * Vector3f( cos_phi, sin_phi, 0.f); - si.dp_dv = m_to_world * Vector3f(-sin_phi, cos_phi, 0.f); + si.dp_du[active] = m_to_world * Vector3f(cos_phi, sin_phi, 0.f); + si.dp_dv[active] = m_to_world * Vector3f(-sin_phi, cos_phi, 0.f); - si.n = m_frame.n; - si.sh_frame.n = m_frame.n; - si.uv = Point2f(r, v); - si.p = ray_(si.t); - si.time = ray_.time; + si.n[active] = m_frame.n; + si.sh_frame.n[active] = m_frame.n; + si.uv[active] = Point2f(r, v); + si.p[active] = ray_(si.t); + masked(si.time, active) = ray_.time; + + return si; + } - si_out[active] = si; + std::pair differentiable_position(const SurfaceInteraction3f &si, + Mask /*active*/) const override { + auto [c, s] = sincos(si.uv.y() * math::TwoPi); + Float local_x = si.uv.x() * c, + local_y = si.uv.x() * s; + return { m_to_world.transform_affine(Point3f(local_x, local_y, 0.f)) , m_frame.n }; } std::pair normal_derivative(const SurfaceInteraction3f & /*si*/, diff --git a/src/shapes/obj.cpp b/src/shapes/obj.cpp index 12cdc8301..f73f94e8d 100644 --- a/src/shapes/obj.cpp +++ b/src/shapes/obj.cpp @@ -289,6 +289,9 @@ class OBJMesh final : public Mesh { m_vertex_normals_buf.managed(); m_vertex_texcoords_buf.managed(); + if constexpr (is_cuda_array_v) + cuda_sync(); + for (const auto& v_ : vertex_map) { const VertexBinding *v = &v_; diff --git a/src/shapes/optix/cylinder.cuh b/src/shapes/optix/cylinder.cuh index e28b3fc92..2ca1bacc8 100644 --- a/src/shapes/optix/cylinder.cuh +++ b/src/shapes/optix/cylinder.cuh @@ -78,6 +78,7 @@ extern "C" __global__ void __closesthit__cylinder() { Vector3f p = fmaf(t, ray_d, ray_o); + Vector3f local = cylinder->to_object.transform_point(p); float phi = atan2(local.y(), local.x()); @@ -86,20 +87,23 @@ extern "C" __global__ void __closesthit__cylinder() { Vector2f uv = Vector2f(phi / (2.f * M_PI), local.z() / cylinder->length); - Vector3f dp_du = 2.f * M_PI * Vector3f(-local.y(), local.x(), 0.f); - Vector3f dp_dv = Vector3f(0.f, 0.f, cylinder->length); - dp_du = cylinder->to_world.transform_vector(dp_du); - dp_dv = cylinder->to_world.transform_vector(dp_dv); - Vector3f ns = Vector3f(normalize(cross(dp_du, dp_dv))); + Vector3f ng, ns, dp_du, dp_dv; + if (params.fill_surface_interaction) { + dp_du = 2.f * M_PI * Vector3f(-local.y(), local.x(), 0.f); + dp_dv = Vector3f(0.f, 0.f, cylinder->length); + dp_du = cylinder->to_world.transform_vector(dp_du); + dp_dv = cylinder->to_world.transform_vector(dp_dv); + ns = Vector3f(normalize(cross(dp_du, dp_dv))); - /* Mitigate roundoff error issues by a normal shift of the computed - intersection point */ - p += ns * (cylinder->radius - norm(Vector2f(local.x(), local.y()))); + /* Mitigate roundoff error issues by a normal shift of the computed + intersection point */ + p += ns * (cylinder->radius - norm(Vector2f(local.x(), local.y()))); - if (cylinder->flip_normals) - ns *= -1.f; + if (cylinder->flip_normals) + ns *= -1.f; - Vector3f ng = ns; + ng = ns; + } write_output_params(params, launch_index, sbt_data->shape_ptr, diff --git a/src/shapes/optix/disk.cuh b/src/shapes/optix/disk.cuh index 6f4b8875e..b4d1e4fd8 100644 --- a/src/shapes/optix/disk.cuh +++ b/src/shapes/optix/disk.cuh @@ -58,17 +58,21 @@ extern "C" __global__ void __closesthit__disk() { if (v < 0.f) v += 1.f; - float cos_phi = (r != 0.f ? local.x() * inv_r : 1.f), - sin_phi = (r != 0.f ? local.y() * inv_r : 0.f); - - Vector3f dp_du = disk->to_world.transform_vector(Vector3f( cos_phi, sin_phi, 0.f)); - Vector3f dp_dv = disk->to_world.transform_vector(Vector3f(-sin_phi, cos_phi, 0.f)); - - Vector3f ns = normalize(disk->to_world.transform_normal(Vector3f(0.f, 0.f, 1.f))); - Vector3f ng = ns; Vector2f uv = Vector2f(r, v); Vector3f p = ray_o_ + ray_d_ * t; + Vector3f ng, ns, dp_du, dp_dv; + if (params.fill_surface_interaction) { + float cos_phi = (r != 0.f ? local.x() * inv_r : 1.f), + sin_phi = (r != 0.f ? local.y() * inv_r : 0.f); + + dp_du = disk->to_world.transform_vector(Vector3f( cos_phi, sin_phi, 0.f)); + dp_dv = disk->to_world.transform_vector(Vector3f(-sin_phi, cos_phi, 0.f)); + + ns = normalize(disk->to_world.transform_normal(Vector3f(0.f, 0.f, 1.f))); + ng = ns; + } + write_output_params(params, launch_index, sbt_data->shape_ptr, optixGetPrimitiveIndex(), diff --git a/src/shapes/optix/mesh.cuh b/src/shapes/optix/mesh.cuh index 7136f5793..7952231be 100644 --- a/src/shapes/optix/mesh.cuh +++ b/src/shapes/optix/mesh.cuh @@ -47,41 +47,44 @@ extern "C" __global__ void __closesthit__mesh() { p1 = load_3d(mesh->vertex_positions, face.y()), p2 = load_3d(mesh->vertex_positions, face.z()); - Vector3f dp0 = p1 - p0, - dp1 = p2 - p0; Vector3f p = p0 * uv0 + p1 * uv1 + p2 * uv2; - Vector3f ng = normalize(cross(dp0, dp1)); - Vector3f dp_du, dp_dv; - coordinate_system(ng, dp_du, dp_dv); + Vector3f ng, ns, dp_du, dp_dv; - Vector3f ns; - if (mesh->vertex_normals != nullptr) { - Vector3f n0 = load_3d(mesh->vertex_normals, face.x()), - n1 = load_3d(mesh->vertex_normals, face.y()), - n2 = load_3d(mesh->vertex_normals, face.z()); + if (params.fill_surface_interaction) { + Vector3f dp0 = p1 - p0, + dp1 = p2 - p0; - ns = normalize(n0 * uv0 + n1 * uv1 + n2 * uv2); - } else { - ns = ng; - } + ng = normalize(cross(dp0, dp1)); + coordinate_system(ng, dp_du, dp_dv); + + if (mesh->vertex_normals != nullptr) { + Vector3f n0 = load_3d(mesh->vertex_normals, face.x()), + n1 = load_3d(mesh->vertex_normals, face.y()), + n2 = load_3d(mesh->vertex_normals, face.z()); + + ns = normalize(n0 * uv0 + n1 * uv1 + n2 * uv2); + } else { + ns = ng; + } - if (mesh->vertex_texcoords != nullptr) { - Vector2f t0 = load_2d(mesh->vertex_texcoords, face.x()), - t1 = load_2d(mesh->vertex_texcoords, face.y()), - t2 = load_2d(mesh->vertex_texcoords, face.z()); + if (mesh->vertex_texcoords != nullptr) { + Vector2f t0 = load_2d(mesh->vertex_texcoords, face.x()), + t1 = load_2d(mesh->vertex_texcoords, face.y()), + t2 = load_2d(mesh->vertex_texcoords, face.z()); - uv = t0 * uv0 + t1 * uv1 + t2 * uv2; + uv = t0 * uv0 + t1 * uv1 + t2 * uv2; - Vector2f dt0 = t1 - t0, - dt1 = t2 - t0; - float det = dt0.x() * dt1.y() - dt0.y() * dt1.x(); + Vector2f dt0 = t1 - t0, + dt1 = t2 - t0; + float det = dt0.x() * dt1.y() - dt0.y() * dt1.x(); - if (det != 0.f) { - float inv_det = 1.f / det; - dp_du = ( dt1.y() * dp0 - dt0.y() * dp1) * inv_det; - dp_dv = (-dt1.x() * dp0 + dt0.x() * dp1) * inv_det; + if (det != 0.f) { + float inv_det = 1.f / det; + dp_du = ( dt1.y() * dp0 - dt0.y() * dp1) * inv_det; + dp_dv = (-dt1.x() * dp0 + dt0.x() * dp1) * inv_det; + } } } diff --git a/src/shapes/optix/sphere.cuh b/src/shapes/optix/sphere.cuh index 85fc473c6..068b4a29e 100644 --- a/src/shapes/optix/sphere.cuh +++ b/src/shapes/optix/sphere.cuh @@ -77,28 +77,32 @@ extern "C" __global__ void __closesthit__sphere() { phi += 2.f * M_PI; Vector2f uv = Vector2f(phi / (2.f * M_PI), theta / M_PI); - Vector3f dp_du = Vector3f(-local.y(), local.x(), 0.f); - float rd = sqrt(rd_2), - inv_rd = 1.f / rd, - cos_phi = local.x() * inv_rd, - sin_phi = local.y() * inv_rd; + Vector3f ng, dp_du, dp_dv; + if (params.fill_surface_interaction) { + dp_du = Vector3f(-local.y(), local.x(), 0.f); - Vector3f dp_dv = Vector3f(local.z() * cos_phi, - local.z() * sin_phi, - -rd); + float rd = sqrt(rd_2), + inv_rd = 1.f / rd, + cos_phi = local.x() * inv_rd, + sin_phi = local.y() * inv_rd; - // Check for singularity - if (rd == 0.f) - dp_dv = Vector3f(1.f, 0.f, 0.f); + dp_dv = Vector3f(local.z() * cos_phi, + local.z() * sin_phi, + -rd); - dp_du = sphere->to_world.transform_vector(dp_du) * (2.f * M_PI); - dp_dv = sphere->to_world.transform_vector(dp_dv) * M_PI; + // Check for singularity + if (rd == 0.f) + dp_dv = Vector3f(1.f, 0.f, 0.f); - if (sphere->flip_normals) - ns = -ns; + dp_du = sphere->to_world.transform_vector(dp_du) * (2.f * M_PI); + dp_dv = sphere->to_world.transform_vector(dp_dv) * M_PI; - Vector3f ng = ns; + if (sphere->flip_normals) + ns = -ns; + + ng = ns; + } write_output_params(params, launch_index, sbt_data->shape_ptr, diff --git a/src/shapes/ply.cpp b/src/shapes/ply.cpp index b198a77f3..6deb426df 100644 --- a/src/shapes/ply.cpp +++ b/src/shapes/ply.cpp @@ -192,6 +192,9 @@ class PLYMesh final : public Mesh { m_vertex_normals_buf.managed(); m_vertex_texcoords_buf.managed(); + if constexpr (is_cuda_array_v) + cuda_sync(); + size_t packet_count = el.count / elements_per_packet; size_t remainder_count = el.count % elements_per_packet; size_t i_packet_size = i_struct_size * elements_per_packet; diff --git a/src/shapes/rectangle.cpp b/src/shapes/rectangle.cpp index 1ea27cb9f..6f9718832 100644 --- a/src/shapes/rectangle.cpp +++ b/src/shapes/rectangle.cpp @@ -177,34 +177,52 @@ class Rectangle final : public Shape { && abs(local.y()) <= 1.f; } - void fill_surface_interaction(const Ray3f &ray_, const Float *cache, - SurfaceInteraction3f &si_out, Mask active) const override { + SurfaceInteraction3f fill_surface_interaction(const Ray3f &ray_, + const Float *cache, + const UInt32 &cache_indices, + SurfaceInteraction3f si, + Mask active) const override { MTS_MASK_ARGUMENT(active); -#if !defined(MTS_ENABLE_EMBREE) - Float local_x = cache[0]; - Float local_y = cache[1]; -#else - ENOKI_MARK_USED(cache); - Ray3f ray = m_to_object.transform_affine(ray_); - Float t = -ray.o.z() * ray.d_rcp.z(); - Point3f local = ray(t); - Float local_x = local.x(); - Float local_y = local.y(); + // TODO: make si differentiable w.r.t. shape parameters if necessary + +#if defined(MTS_ENABLE_EMBREE) + cache = nullptr; #endif - SurfaceInteraction3f si(si_out); + Float local_x, local_y; + if (cache) { + if constexpr (is_cuda_array_v){ + local_x = gather(cache[0], cache_indices, active); + local_y = gather(cache[1], cache_indices, active); + } else { + local_x = cache[0]; + local_y = cache[1]; + } + } else { + Ray3f ray = m_to_object.transform_affine(ray_); + Float t = -ray.o.z() * ray.d_rcp.z(); + Point3f local = ray(t); + local_x = local.x(); + local_y = local.y(); + } - si.n = m_frame.n; - si.sh_frame.n = m_frame.n; - si.dp_du = m_frame.s; - si.dp_dv = m_frame.t; - si.p = ray_(si.t); - si.time = ray_.time; - si.uv = Point2f(fmadd(local_x, .5f, .5f), - fmadd(local_y, .5f, .5f)); + si.n[active] = m_frame.n; + si.sh_frame.n[active] = m_frame.n; + si.dp_du[active] = m_frame.s; + si.dp_dv[active] = m_frame.t; + si.p[active] = ray_(si.t); + masked(si.time, active) = ray_.time; + si.uv[active] = Point2f(fmadd(local_x, .5f, .5f), + fmadd(local_y, .5f, .5f)); + + return si; + } - si_out[active] = si; + std::pair differentiable_position(const SurfaceInteraction3f &si, + Mask /*active*/) const override { + Vector2f local = 2.f * (si.uv - 0.5f); + return { m_to_world.transform_affine(Point3f(local.x(), local.y(), 0.f)) , m_frame.n }; } std::pair normal_derivative(const SurfaceInteraction3f & /*si*/, diff --git a/src/shapes/serialized.cpp b/src/shapes/serialized.cpp index 74294d09a..2876ebb7f 100644 --- a/src/shapes/serialized.cpp +++ b/src/shapes/serialized.cpp @@ -276,6 +276,9 @@ class SerializedMesh final : public Mesh { m_vertex_texcoords_buf.managed(); m_faces_buf.managed(); + if constexpr (is_cuda_array_v) + cuda_sync(); + bool double_precision = has_flag(flags, TriMeshFlags::DoublePrecision); read_helper(stream, double_precision, m_vertex_positions_buf.data(), 3); diff --git a/src/shapes/sphere.cpp b/src/shapes/sphere.cpp index 51aef6793..e371e9a56 100644 --- a/src/shapes/sphere.cpp +++ b/src/shapes/sphere.cpp @@ -326,16 +326,19 @@ class Sphere final : public Shape { return solution_found && !out_bounds && !in_bounds && active; } - void fill_surface_interaction(const Ray3f &ray, const Float * /*cache*/, - SurfaceInteraction3f &si_out, Mask active) const override { + SurfaceInteraction3f fill_surface_interaction(const Ray3f &ray, + const Float * /*cache*/, + const UInt32 & /*cache_indices*/, + SurfaceInteraction3f si, + Mask active) const override { MTS_MASK_ARGUMENT(active); - SurfaceInteraction3f si(si_out); + // TODO: make si differentiable w.r.t. shape parameters if necessary - si.sh_frame.n = normalize(ray(si.t) - m_center); + si.sh_frame.n[active] = normalize(ray(si.t) - m_center); // Re-project onto the sphere to improve accuracy - si.p = fmadd(si.sh_frame.n, m_radius, m_center); + si.p[active] = fmadd(si.sh_frame.n, m_radius, m_center); Vector3f local = m_to_object.transform_affine(si.p); @@ -345,32 +348,46 @@ class Sphere final : public Shape { masked(phi, phi < 0.f) += 2.f * math::Pi; - si.uv = Point2f(phi * math::InvTwoPi, theta * math::InvPi); - si.dp_du = Vector3f(-local.y(), local.x(), 0.f); + si.uv[active] = Point2f(phi * math::InvTwoPi, theta * math::InvPi); + si.dp_du[active] = Vector3f(-local.y(), local.x(), 0.f); Float rd = sqrt(rd_2), inv_rd = rcp(rd), cos_phi = local.x() * inv_rd, sin_phi = local.y() * inv_rd; - si.dp_dv = Vector3f(local.z() * cos_phi, - local.z() * sin_phi, - -rd); + si.dp_dv[active] = Vector3f(local.z() * cos_phi, + local.z() * sin_phi, + -rd); Mask singularity_mask = active && eq(rd, 0.f); if (unlikely(any(singularity_mask))) si.dp_dv[singularity_mask] = Vector3f(1.f, 0.f, 0.f); - si.dp_du = m_to_world * si.dp_du * (2.f * math::Pi); - si.dp_dv = m_to_world * si.dp_dv * math::Pi; + si.dp_du[active] = m_to_world * si.dp_du * (2.f * math::Pi); + si.dp_dv[active] = m_to_world * si.dp_dv * math::Pi; if (m_flip_normals) - si.sh_frame.n = -si.sh_frame.n; + si.sh_frame.n[active] = -si.sh_frame.n; - si.n = si.sh_frame.n; - si.time = ray.time; + si.n[active] = si.sh_frame.n; + masked(si.time, active) = ray.time; - si_out[active] = si; + return si; + } + + std::pair differentiable_position(const SurfaceInteraction3f &si, + Mask /*active*/) const override { + Float phi = si.uv.x() * math::TwoPi, + theta = si.uv.y() * math::Pi; + + auto [s_theta, c_theta] = sincos(theta); + auto [s_phi, c_phi] = sincos(phi); + + Point3f p = Point3f(s_theta * c_phi, s_theta * s_phi, c_theta); + Normal3f n = p; + + return { m_to_world.transform_affine(p), m_to_world.transform_affine(n) }; } std::pair normal_derivative(const SurfaceInteraction3f &si,