diff --git a/.depend b/.depend index de9cdd5bfd80..70c50965806a 100644 --- a/.depend +++ b/.depend @@ -44,7 +44,8 @@ utils/clflags.cmx : \ utils/clflags.cmi utils/clflags.cmi : \ utils/profile.cmi \ - utils/misc.cmi + utils/misc.cmi \ + utils/config.cmi utils/compression.cmo : \ utils/compression.cmi utils/compression.cmx : \ @@ -61,16 +62,6 @@ utils/config.cmo : \ utils/config.cmx : \ utils/config.cmi utils/config.cmi : -utils/config_boot.cmo : \ - utils/config_boot.cmi -utils/config_boot.cmx : \ - utils/config_boot.cmi -utils/config_boot.cmi : -utils/config_main.cmo : \ - utils/config_main.cmi -utils/config_main.cmx : \ - utils/config_main.cmi -utils/config_main.cmi : utils/consistbl.cmo : \ utils/misc.cmi \ utils/consistbl.cmi @@ -220,6 +211,16 @@ utils/warnings.cmx : \ utils/warnings.cmi utils/warnings.cmi : \ utils/format_doc.cmi +utils/config/config_boot.cmo : \ + utils/config/config_boot.cmi +utils/config/config_boot.cmx : \ + utils/config/config_boot.cmi +utils/config/config_boot.cmi : +utils/config/config_main.cmo : \ + utils/config/config_main.cmi +utils/config/config_main.cmx : \ + utils/config/config_main.cmi +utils/config/config_main.cmi : parsing/ast_helper.cmo : \ parsing/syntaxerr.cmi \ parsing/parsetree.cmi \ @@ -2394,6 +2395,7 @@ bytecomp/bytelink.cmo : \ utils/consistbl.cmi \ utils/config.cmi \ utils/compression.cmi \ + driver/compenv.cmi \ file_formats/cmo_format.cmi \ utils/clflags.cmi \ utils/ccomp.cmi \ @@ -2413,6 +2415,7 @@ bytecomp/bytelink.cmx : \ utils/consistbl.cmx \ utils/config.cmx \ utils/compression.cmx \ + driver/compenv.cmx \ file_formats/cmo_format.cmi \ utils/clflags.cmx \ utils/ccomp.cmx \ @@ -2476,6 +2479,17 @@ bytecomp/bytepackager.cmi : \ utils/format_doc.cmi \ typing/env.cmi \ file_formats/cmo_format.cmi +bytecomp/byterntm.cmo : \ + utils/misc.cmi \ + bytecomp/bytesections.cmi \ + bytecomp/byterntm.cmi +bytecomp/byterntm.cmx : \ + utils/misc.cmx \ + bytecomp/bytesections.cmx \ + bytecomp/byterntm.cmi +bytecomp/byterntm.cmi : \ + utils/misc.cmi \ + bytecomp/bytesections.cmi bytecomp/bytesections.cmo : \ utils/config.cmi \ bytecomp/bytesections.cmi @@ -2563,14 +2577,6 @@ bytecomp/instruct.cmi : \ lambda/lambda.cmi \ typing/ident.cmi \ typing/env.cmi -bytecomp/meta.cmo : \ - bytecomp/instruct.cmi \ - bytecomp/meta.cmi -bytecomp/meta.cmx : \ - bytecomp/instruct.cmx \ - bytecomp/meta.cmi -bytecomp/meta.cmi : \ - bytecomp/instruct.cmi bytecomp/opcodes.cmo : \ bytecomp/opcodes.cmi bytecomp/opcodes.cmx : \ @@ -2596,7 +2602,6 @@ bytecomp/symtable.cmo : \ lambda/runtimedef.cmi \ typing/predef.cmi \ utils/misc.cmi \ - bytecomp/meta.cmi \ parsing/location.cmi \ lambda/lambda.cmi \ typing/ident.cmi \ @@ -2610,7 +2615,6 @@ bytecomp/symtable.cmx : \ lambda/runtimedef.cmx \ typing/predef.cmx \ utils/misc.cmx \ - bytecomp/meta.cmx \ parsing/location.cmx \ lambda/lambda.cmx \ typing/ident.cmx \ @@ -2806,6 +2810,7 @@ asmcomp/asmlink.cmo : \ utils/consistbl.cmi \ utils/config.cmi \ middle_end/compilenv.cmi \ + driver/compenv.cmi \ file_formats/cmx_format.cmi \ asmcomp/cmm_helpers.cmi \ asmcomp/cmm.cmi \ @@ -2827,6 +2832,7 @@ asmcomp/asmlink.cmx : \ utils/consistbl.cmx \ utils/config.cmx \ middle_end/compilenv.cmx \ + driver/compenv.cmx \ file_formats/cmx_format.cmi \ asmcomp/cmm_helpers.cmx \ asmcomp/cmm.cmx \ @@ -5230,6 +5236,7 @@ middle_end/flambda/closure_conversion.cmo : \ lambda/debuginfo.cmi \ middle_end/convert_primitives.cmi \ utils/config.cmi \ + middle_end/compilenv.cmi \ middle_end/compilation_unit.cmi \ middle_end/flambda/base_types/closure_origin.cmi \ middle_end/flambda/base_types/closure_id.cmi \ @@ -5259,6 +5266,7 @@ middle_end/flambda/closure_conversion.cmx : \ lambda/debuginfo.cmx \ middle_end/convert_primitives.cmx \ utils/config.cmx \ + middle_end/compilenv.cmx \ middle_end/compilation_unit.cmx \ middle_end/flambda/base_types/closure_origin.cmx \ middle_end/flambda/base_types/closure_id.cmx \ @@ -7613,9 +7621,9 @@ toplevel/byte/topeval.cmo : \ typing/outcometree.cmi \ typing/out_type.cmi \ utils/misc.cmi \ - bytecomp/meta.cmi \ parsing/location.cmi \ utils/load_path.cmi \ + bytecomp/instruct.cmi \ typing/ident.cmi \ typing/env.cmi \ bytecomp/emitcode.cmi \ @@ -7643,9 +7651,9 @@ toplevel/byte/topeval.cmx : \ typing/outcometree.cmi \ typing/out_type.cmx \ utils/misc.cmx \ - bytecomp/meta.cmx \ parsing/location.cmx \ utils/load_path.cmx \ + bytecomp/instruct.cmx \ typing/ident.cmx \ typing/env.cmx \ bytecomp/emitcode.cmx \ @@ -7700,7 +7708,6 @@ toplevel/byte/trace.cmo : \ typing/predef.cmi \ typing/path.cmi \ utils/misc.cmi \ - bytecomp/meta.cmi \ parsing/longident.cmi \ typing/ctype.cmi \ parsing/asttypes.cmi \ @@ -7713,7 +7720,6 @@ toplevel/byte/trace.cmx : \ typing/predef.cmx \ typing/path.cmx \ utils/misc.cmx \ - bytecomp/meta.cmx \ parsing/longident.cmx \ typing/ctype.cmx \ parsing/asttypes.cmx \ @@ -8055,6 +8061,7 @@ tools/objinfo.cmo : \ typing/ident.cmi \ utils/format_doc.cmi \ middle_end/flambda/export_info.cmi \ + utils/config.cmi \ middle_end/compilation_unit.cmi \ file_formats/cmxs_format.cmi \ file_formats/cmx_format.cmi \ @@ -8062,6 +8069,7 @@ tools/objinfo.cmo : \ file_formats/cmo_format.cmi \ file_formats/cmi_format.cmi \ bytecomp/bytesections.cmi \ + bytecomp/byterntm.cmi \ utils/binutils.cmi \ tools/objinfo.cmi tools/objinfo.cmx : \ @@ -8078,6 +8086,7 @@ tools/objinfo.cmx : \ typing/ident.cmx \ utils/format_doc.cmx \ middle_end/flambda/export_info.cmx \ + utils/config.cmx \ middle_end/compilation_unit.cmx \ file_formats/cmxs_format.cmi \ file_formats/cmx_format.cmi \ @@ -8085,6 +8094,7 @@ tools/objinfo.cmx : \ file_formats/cmo_format.cmi \ file_formats/cmi_format.cmx \ bytecomp/bytesections.cmx \ + bytecomp/byterntm.cmx \ utils/binutils.cmx \ tools/objinfo.cmi tools/objinfo.cmi : @@ -9515,6 +9525,7 @@ ocamldoc/odoc_misc.cmo : \ typing/path.cmi \ ocamldoc/odoc_types.cmi \ ocamldoc/odoc_messages.cmi \ + ocamldoc/odoc_global.cmi \ parsing/longident.cmi \ typing/btype.cmi \ ocamldoc/odoc_misc.cmi @@ -9525,6 +9536,7 @@ ocamldoc/odoc_misc.cmx : \ typing/path.cmx \ ocamldoc/odoc_types.cmx \ ocamldoc/odoc_messages.cmx \ + ocamldoc/odoc_global.cmx \ parsing/longident.cmx \ typing/btype.cmx \ ocamldoc/odoc_misc.cmi @@ -10504,9 +10516,11 @@ testsuite/lib/testing.cmx : \ testsuite/lib/testing.cmi : testsuite/tools/cmdline.cmo : \ testsuite/tools/harness.cmi \ + utils/config.cmi \ testsuite/tools/cmdline.cmi testsuite/tools/cmdline.cmx : \ testsuite/tools/harness.cmx \ + utils/config.cmx \ testsuite/tools/cmdline.cmi testsuite/tools/cmdline.cmi : \ testsuite/tools/harness.cmi @@ -10542,6 +10556,7 @@ testsuite/tools/environment.cmo : \ file_formats/cmt_format.cmi \ file_formats/cmo_format.cmi \ bytecomp/bytesections.cmi \ + bytecomp/byterntm.cmi \ testsuite/tools/environment.cmi testsuite/tools/environment.cmx : \ otherlibs/unix/unix.cmx \ @@ -10552,6 +10567,7 @@ testsuite/tools/environment.cmx : \ file_formats/cmt_format.cmx \ file_formats/cmo_format.cmi \ bytecomp/bytesections.cmx \ + bytecomp/byterntm.cmx \ testsuite/tools/environment.cmi testsuite/tools/environment.cmi : \ testsuite/tools/harness.cmi @@ -10595,13 +10611,20 @@ testsuite/tools/expect.cmi : \ parsing/location.cmi testsuite/tools/harness.cmo : \ otherlibs/unix/unix.cmi \ + utils/misc.cmi \ utils/config.cmi \ + bytecomp/byterntm.cmi \ testsuite/tools/harness.cmi testsuite/tools/harness.cmx : \ otherlibs/unix/unix.cmx \ + utils/misc.cmx \ utils/config.cmx \ + bytecomp/byterntm.cmx \ testsuite/tools/harness.cmi -testsuite/tools/harness.cmi : +testsuite/tools/harness.cmi : \ + utils/misc.cmi \ + utils/config.cmi \ + bytecomp/byterntm.cmi testsuite/tools/lexcmm.cmo : \ testsuite/tools/parsecmm.cmi \ utils/misc.cmi \ @@ -10663,12 +10686,14 @@ testsuite/tools/parsecmmaux.cmi : \ middle_end/backend_var.cmi testsuite/tools/testBytecodeBinaries.cmo : \ otherlibs/unix/unix.cmi \ + utils/misc.cmi \ testsuite/tools/harness.cmi \ testsuite/tools/environment.cmi \ utils/config.cmi \ testsuite/tools/testBytecodeBinaries.cmi testsuite/tools/testBytecodeBinaries.cmx : \ otherlibs/unix/unix.cmx \ + utils/misc.cmx \ testsuite/tools/harness.cmx \ testsuite/tools/environment.cmx \ utils/config.cmx \ @@ -10754,7 +10779,6 @@ testsuite/tools/test_in_prefix.cmo : \ driver/compmisc.cmi \ testsuite/tools/cmdline.cmi \ utils/clflags.cmi \ - bytecomp/bytelink.cmi \ testsuite/tools/test_in_prefix.cmi testsuite/tools/test_in_prefix.cmx : \ otherlibs/unix/unix.cmx \ @@ -10772,15 +10796,16 @@ testsuite/tools/test_in_prefix.cmx : \ driver/compmisc.cmx \ testsuite/tools/cmdline.cmx \ utils/clflags.cmx \ - bytecomp/bytelink.cmx \ testsuite/tools/test_in_prefix.cmi testsuite/tools/test_in_prefix.cmi : testsuite/tools/test_ld_conf.cmo : \ + otherlibs/unix/unix.cmi \ testsuite/tools/harness.cmi \ testsuite/tools/environment.cmi \ utils/config.cmi \ testsuite/tools/test_ld_conf.cmi testsuite/tools/test_ld_conf.cmx : \ + otherlibs/unix/unix.cmx \ testsuite/tools/harness.cmx \ testsuite/tools/environment.cmx \ utils/config.cmx \ diff --git a/.gitattributes b/.gitattributes index ebdbbd725558..d9753105e0c4 100644 --- a/.gitattributes +++ b/.gitattributes @@ -44,6 +44,21 @@ # the lines involved in the conflict, which is arguably worse #/Changes merge=union +testsuite/Makefile export-subst + +# Files and directories excluded from git-generated tarballs. +.github export-ignore +manual export-ignore +release-info export-ignore +testsuite/tests export-ignore +tools/ci export-ignore +.gitattributes export-ignore +.gitignore export-ignore +.gitmodules export-ignore +.mailmap export-ignore +ocaml-variants.install export-ignore +ocaml-variants.opam export-ignore + # No header for text and META files (would be too obtrusive). *.md typo.missing-header README* typo.missing-header @@ -92,6 +107,8 @@ build-aux typo.prune /manual typo.prune /manual/** typo.prune +tools/ci/appveyor/parallel linguist-vendored typo.prune + # configure is generated so do not check it configure typo.prune @@ -109,6 +126,7 @@ otherlibs/unix/symlink_win32.c typo.long-line # Some Unicode characters here and there utils/misc.ml typo.non-ascii runtime/sak.c typo.non-ascii +tools/opam/process.sh typo.non-ascii testsuite/tests/** typo.missing-header typo.long-line=may testsuite/tests/lib-bigarray-2/bigarrf.f typo.tab linguist-language=Fortran diff --git a/.github/workflows/build-msvc.yml b/.github/workflows/build-msvc.yml index 0ecffca6d32d..f639109dde7a 100644 --- a/.github/workflows/build-msvc.yml +++ b/.github/workflows/build-msvc.yml @@ -33,10 +33,13 @@ jobs: with: script: | // # Always test cl and clang-cl - let compilers = ['cl', 'clang-cl']; + let compilers = ['clang-cl']; // # Also test i686 MSVC let include = [ - {cc: 'cl', arch: 'i686'}]; + {os: 'windows-latest', cc: 'cl', arch: 'i686', opam: 'false', prefix: '$PROGRAMFILES/Бактріан🐫', libdir: 'relative'}, + {os: 'windows-2025', cc: 'cl', arch: 'x86_64', opam: 'true', prefix: 'C:\\\\Бактріан🐫'}, + {os: 'windows-2025', cc: 'cl', arch: 'i686', opam: 'true', prefix: 'C:\\\\Бактріан🐫'}]; + let libdir = ['absolute']; // # If this is a pull request, see if the PR has the // # 'CI: Full matrix' label. This is done using an API request, // # rather than from context.payload.pull_request.labels, since we @@ -52,10 +55,14 @@ jobs: // # Test Cygwin as well compilers.push('gcc'); // # Test bytecode-only Cygwin - include.push({cc: 'gcc', arch: 'x86_64', config_arg: '--disable-native-toplevel --disable-native-compiler'}); + include.push({os: 'windows-latest', prefix: '$PROGRAMFILES/Бактріан🐫', opam: ['false'], cc: 'gcc', arch: 'x86_64', libdir: 'absolute', config_arg: '--disable-native-toplevel --disable-native-compiler --with-target-sh=sh'}); + // # Test i686 MSVC absolute + include.push({os: 'windows-latest', prefix: '$PROGRAMFILES/Бактріан🐫', opam: ['false'], cc: 'cl', arch: 'i686', libdir: 'absolute'}); + // # Expand the main matrix to include relative testing + libdir.push('relative'); } } - return {config_arg: [''], arch: ['x86_64'], cc: compilers, include: include}; + return {os: ['windows-latest'], prefix: ['$PROGRAMFILES/Бактріан🐫'], opam: ['false'], config_arg: [''], arch: ['x86_64'], cc: compilers, libdir: libdir, include: include}; - name: Determine if the testsuite should be skipped id: skip uses: actions/github-script@v7 @@ -73,17 +80,17 @@ jobs: build: permissions: {} - runs-on: windows-latest + runs-on: ${{ matrix.os }} needs: config timeout-minutes: ${{ matrix.cc == 'gcc' && 90 || 60 }} - name: ${{ matrix.cc == 'cl' && 'MSVC' || matrix.cc == 'gcc' && 'Cygwin' || 'clang-cl' }} ${{ matrix.arch }} ${{ matrix.config_arg != '' && format('({0})', matrix.config_arg) || '' }} + name: ${{ matrix.cc == 'cl' && 'MSVC' || matrix.cc == 'gcc' && 'Cygwin' || 'clang-cl' }} ${{ matrix.arch }} ${{ matrix.libdir }} ${{ matrix.config_arg != '' && format('({0})', matrix.config_arg) || '' }} strategy: matrix: ${{ fromJSON(needs.config.outputs.matrix) }} - fail-fast: true + fail-fast: false steps: @@ -102,7 +109,7 @@ jobs: - name: Install Cygwin uses: cygwin/cygwin-install-action@v3 with: - packages: make,${{ matrix.cc != 'gcc' && 'mingw64-x86_64-' || 'gcc-g++,gcc-fortran,' }}gcc-core + packages: make,${{ matrix.cc != 'gcc' && 'mingw64-x86_64-' || 'gcc-g++,gcc-fortran,' }}gcc-core,rsync,unzip install-dir: 'D:\cygwin' - name: Save Cygwin cache @@ -118,6 +125,13 @@ jobs: arch: ${{ matrix.arch == 'x86_64' && 'x64' || 'x86' }} if: matrix.cc != 'gcc' + - name: Install opam + if: matrix.opam == 'true' + shell: pwsh + run: | + winget install opam --accept-source-agreements + Add-Content -Path $env:GITHUB_PATH -Value "$env:LOCALAPPDATA\Microsoft\WinGet\Links" + - name: Compute a key to cache configure results id: autoconf-cache-key env: @@ -136,11 +150,14 @@ jobs: env: CONFIG_ARGS: >- --cache-file=config.cache - --prefix "${{ matrix.cc != 'gcc' && '$PROGRAMFILES/Бактріан🐫' || '$(cygpath "$PROGRAMFILES/Бактріан🐫")'}}" + --prefix ${{ matrix.cc != 'gcc' && format('"{0}/_opam"', matrix.prefix) || format('"$(cygpath "{0}")"', matrix.prefix) }} + --docdir ${{ format((matrix.cc != 'gcc' && '"{0}/_opam/doc/ocaml"' || '"$(cygpath "{0}/doc/ocaml")"'), matrix.prefix) }} ${{ matrix.cc != 'gcc' && format('--host={0}-pc-windows', matrix.arch) || '' }} ${{ matrix.cc != 'gcc' && format('CC={0}', matrix.cc) || '' }} --enable-ocamltest ${{ endsWith(matrix.arch, '64') && '--enable-native-toplevel' || '--disable-native-toplevel' }} + ${{ matrix.libdir == 'relative' && (matrix.cc == 'gcc' && '--with-relative-libdir=../lib/ocaml' || '--with-relative-libdir=..\\lib\\ocaml') || '--without-relative-libdir' }} + ${{ matrix.libdir == 'relative' && '--enable-runtime-search=always --enable-runtime-search-target' || '--disable-runtime-search --disable-runtime-search-target' }} ${{ matrix.config_arg }} run: | eval $(tools/msvs-promote-path) @@ -193,8 +210,24 @@ jobs: make tests - name: Install the compiler + if: matrix.opam != 'true' run: make install + - name: Create opam switch + if: matrix.opam == 'true' + env: + OPAMSWITCH: ${{ matrix.prefix }} + run: | + make OPAM_PACKAGE_NAME=ocaml-variants INSTALL_MODE=opam install + opam init --bare --yes --disable-sandboxing --auto-setup --cygwin-local-install + opam switch create '${{ env.OPAMSWITCH }}' --empty + opam pin add --no-action --kind=path ocaml-variants . + opam pin add --no-action flexdll flexdll + opam pin add --no-action winpthreads winpthreads + opam install --yes flexdll winpthreads + opam install --yes --assume-built ocaml-variants + opam exec -- ocamlc -v + - name: Test in prefix run: | eval $(tools/msvs-promote-path) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5733285aef07..b90e8cb54590 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -59,7 +59,7 @@ jobs: '${{ github.event.repository.full_name }}' - name: Configure tree run: | - MAKE_ARG=-j CONFIG_ARG='--enable-flambda --enable-cmm-invariants --enable-codegen-invariants --enable-dependency-generation --enable-native-toplevel' OCAMLRUNPARAM=b,v=0 bash -xe tools/ci/actions/runner.sh configure + MAKE_ARG=-j CONFIG_ARG='--enable-flambda --enable-cmm-invariants --enable-codegen-invariants --enable-dependency-generation --enable-native-toplevel --with-relative-libdir=../lib/ocaml --enable-runtime-search=always --enable-runtime-search-target' OCAMLRUNPARAM=b,v=0 bash -xe tools/ci/actions/runner.sh configure - name: Build run: | MAKE_ARG=-j bash -xe tools/ci/actions/runner.sh build @@ -78,6 +78,7 @@ jobs: # debug runtime and minor heap verification. # debug-s4096: select testsuite run with the debug runtime and a small # minor heap. +# opam: constructs an opam local switch from the compiler. normal: name: ${{ matrix.name }} needs: [build, config] @@ -88,11 +89,13 @@ jobs: - id: normal name: normal dependencies: texlive-latex-extra texlive-fonts-recommended texlive-luatex hevea sass gdb lldb + - id: opam + name: opam installation - id: debug name: extra (debug) - id: debug-s4096 name: extra (debug-s4096) - fail-fast: true + fail-fast: false steps: - name: Download Artifact uses: actions/download-artifact@v4 @@ -134,11 +137,19 @@ jobs: - name: Install if: matrix.id == 'normal' run: | - MAKE_ARG=-j OCAMLRUNPARAM=b,v=0 bash -xe tools/ci/actions/runner.sh install + MAKE_ARG=-j OCAMLRUNPARAM=b,v=0 bash -xe tools/ci/actions/runner.sh install + - name: Create opam switch + if: matrix.id == 'opam' + run: | + MAKE_ARG=-j OCAMLRUNPARAM=b,v=0 bash -xe tools/ci/actions/runner.sh opam - name: Test in prefix - if: matrix.id == 'normal' + if: matrix.id == 'normal' || matrix.id == 'opam' run: | - MAKE_ARG=-j OCAMLRUNPARAM=b,v=0 bash -xe tools/ci/actions/runner.sh test-in-prefix + MAKE_ARG=-j OCAMLRUNPARAM=b,v=0 bash -xe tools/ci/actions/runner.sh test-in-prefix + - name: Test in prefix (alternate configuration) + if: matrix.id == 'normal' && needs.config.outputs.full-matrix == 'true' + run: | + MAKE_ARG=-j OCAMLRUNPARAM=b,v=0 bash -xe tools/ci/actions/runner.sh re-test-in-prefix - name: Build the manual if: matrix.id == 'normal' && needs.build.outputs.manual_changed == 'true' run: | @@ -152,9 +163,23 @@ jobs: config: runs-on: ubuntu-latest outputs: + full-matrix: ${{ steps.full.outputs.result }} jobs: ${{ steps.jobs.outputs.result }} skip-testsuite: ${{ steps.skip.outputs.result }} steps: + - name: Record if the build matrix is expanded + id: full + uses: actions/github-script@v7 + with: + script: | + let full_matrix = false; + if (context.payload.pull_request) { + const { data: labels } = + await github.rest.issues.listLabelsOnIssue({...context.repo, issue_number: context.payload.pull_request.number}); + full_matrix = labels.some(label => label.name === 'CI: Full matrix'); + } + console.log('Full matrix: ' + full_matrix); + return full_matrix; - name: Compute matrix for the "others" job id: jobs uses: actions/github-script@v7 @@ -165,10 +190,13 @@ jobs: {name: 'linux-O0', os: 'ubuntu-latest', config_arg: "CFLAGS='-O0'"}, {name: 'linux-arm64', os: 'ubuntu-24.04-arm', + config_arg: '--with-target-sh=exe --enable-runtime-search-target', 'test-in-prefix': true}, {name: 'macos-x86_64', os: 'macos-15-intel', + config_arg: '--with-target-sh=exe', 'test-in-prefix': true}, {name: 'macos-arm64', os: 'macos-latest', + config_arg: '--with-relative-libdir=../lib/ocaml --enable-runtime-search=always', 'test-in-prefix': true}]; // # If this is a pull request, see if the PR has the // # 'CI: Full matrix' label. This is done using an API request, @@ -182,13 +210,16 @@ jobs: await github.rest.issues.listLabelsOnIssue({...context.repo, issue_number: context.payload.pull_request.number}); if (labels.some(label => label.name === 'CI: Full matrix')) { console.log('Full matrix requested'); - // # Add "static" and "minimal" jobs + // # Add "static", "minimal" and "unsuffixed" jobs jobs = jobs.concat([ {name: 'static', os: 'ubuntu-latest', config_arg: '--disable-native-toplevel --disable-shared', 'test-in-prefix': true}, {name: 'minimal', os: 'ubuntu-latest', - config_arg: '--disable-native-toplevel --disable-native-compiler --disable-shared --disable-debug-runtime --disable-instrumented-runtime --disable-systhreads --disable-str-lib --disable-unix-lib --disable-ocamldoc'}]); + config_arg: '--disable-native-toplevel --disable-native-compiler --disable-shared --disable-debug-runtime --disable-instrumented-runtime --disable-systhreads --disable-str-lib --disable-unix-lib --disable-ocamldoc'}, + {name: 'unsuffixed', os: 'ubuntu-latest', + config_arg: '--disable-suffixing', + 'test-in-prefix': true}]); } } return jobs; @@ -213,7 +244,7 @@ jobs: strategy: matrix: include: ${{ fromJSON(needs.config.outputs.jobs) }} - fail-fast: true + fail-fast: false steps: - name: Checkout uses: actions/checkout@v5 @@ -258,6 +289,10 @@ jobs: if: ${{ matrix.test-in-prefix }} run: | MAKE_ARG=-j OCAMLRUNPARAM=b,v=0 bash -xe tools/ci/actions/runner.sh test-in-prefix + - name: Test in prefix (alternate configuration) + if: ${{ matrix.test-in-prefix && needs.config.outputs.full-matrix == 'true' && !contains(matrix.config_arg, '--disable-suffixing') }} + run: | + MAKE_ARG=-j OCAMLRUNPARAM=b,v=0 bash -xe tools/ci/actions/runner.sh re-test-in-prefix i386: runs-on: ubuntu-latest @@ -277,7 +312,7 @@ jobs: - name: configure tree run: | chown -R ocaml:ocaml . - MAKE_ARG=-j CONFIG_ARG='--disable-native-toplevel' su ocaml -c "bash -xe tools/ci/actions/runner.sh configure" + MAKE_ARG=-j CONFIG_ARG='--disable-native-toplevel --with-target-sh=exe' su ocaml -c "bash -xe tools/ci/actions/runner.sh configure" - name: Build run: | MAKE_ARG=-j su ocaml -c "bash -xe tools/ci/actions/runner.sh build" @@ -290,4 +325,8 @@ jobs: su ocaml -c "bash -xe tools/ci/actions/runner.sh install" - name: Test in prefix run: | - su ocaml -c "bash -xe tools/ci/actions/runner.sh test-in-prefix" + MAKE_ARG=-j su ocaml -c "bash -xe tools/ci/actions/runner.sh test-in-prefix" + - name: Test in prefix (alternate configuration) + if: ${{ needs.config.outputs.full-matrix == 'true' }} + run: | + MAKE_ARG=-j su ocaml -c "bash -xe tools/ci/actions/runner.sh re-test-in-prefix" diff --git a/.gitignore b/.gitignore index 761b059c7204..edb8f02c69a3 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ *.cmx[as] *.cmti *.annot +*.stripped *.exe *.exe.manifest .DS_Store @@ -83,6 +84,7 @@ META /bytecomp/opcodes.ml /bytecomp/opcodes.mli +/bytecomp/byterntm.ml /debugger/debugger_lexer.ml /debugger/debugger_parser.ml @@ -251,13 +253,10 @@ META /runtime/build_config.h /runtime/sak -/stdlib/runtime.info /stdlib/runtime-launch-info /stdlib/labelled-* /stdlib/caml /stdlib/sys.ml -/stdlib/target_runtime.info -/stdlib/target_runtime-launch-info /testsuite/**/*.result /testsuite/**/*.opt_result @@ -271,6 +270,7 @@ META /testsuite/_retries /testsuite/tools/codegen +/testsuite/tools/dummy /testsuite/tools/expect /testsuite/tools/lexcmm.ml /testsuite/tools/parsecmm.ml @@ -321,12 +321,12 @@ META /toplevel/native/trace.mli /toplevel/native/topmain.mli -/utils/config_boot.ml -/utils/config_boot.mli +/utils/config/config_boot.ml +/utils/config/config_boot.mli +/utils/config/config_main.ml +/utils/config/config_main.mli /utils/config.common.ml /utils/config.generated.ml -/utils/config_main.ml -/utils/config_main.mli /utils/config.ml /utils/domainstate.ml /utils/domainstate.mli diff --git a/Changes b/Changes index 318623daed50..8f2a76dc5187 100644 --- a/Changes +++ b/Changes @@ -66,6 +66,37 @@ Working version (Tim McGilchrist, review by Nick Barnes, Sadiq Jaffer and Gabriel Scherer) +* #14243: Explicit relative paths in ld.conf (".", "..", "./", + "../") are interpreted as being relative to the directory ld.conf + was loaded from, and the default ld.conf now uses relative paths, rather than + embedding the absolute path to the Standard Library. The brave may continue to + put implicit paths in ld.conf. The interpretation of CAML_LD_LIBRARY_PATH is + unaltered. Additionally, ld.conf is loaded from all of $OCAMLLIB/ld.conf, + $CAMLLIB/ld.conf and standard_library_default/ld.conf rather than just the + first one found. ld.conf files with CRLF line endings are now consistently on + both Windows and Unix. + (David Allsopp, review by Jonah Beckford, Hugo Heuzard and ???) + +- #14244: Added --with-relative-bindir which allows the runtime and the + compilers to locate the Standard Library relative to where the binaries + themselves are installed, removing the absolute path previously embedded in + caml_standard_library_default. Executables linked with `ocamlc -custom` now + always attempt to load bytecode from the executable itself, rather than first + trying `argv[0]`. + (David Allsopp, review by Jonah Beckford, Antonin Décimo, Samuel Hym, + Vincent Laviron and ???) + +- #14245: Introduce Runtime IDs for use in filename mangling to allow different + configurations and different versions of the runtime system to coexist + harmoniously on a single system. The IDs are used, along with the host + triplet, to provide mangled names for the ocamlrun executable and its variants + and the DLL versions of both the bytecode and native runtimes, with symlinks + created for the original names. They are also used to mangle the names of stub + libraries so that stub libraries compiled for a given configuration of the + runtime will only be sought by that runtime. The behaviour is disabled by + configuring with --disable-suffixing. + (David Allsopp, review by Samuel Hym and ???) + - #12269, #12410, #13063: Fix unsafety, deadlocks, and/or leaks should rare errors happen during domain creation and thread creation/registration. @@ -84,6 +115,39 @@ Working version continuations without calling `caml_continuation_use`. (Max Slater, review by Nick Barnes and Stephen Dolan) +- #14???: The Windows version of the executable launcher (stdlib/header.c) no + longer assumes that paths are limited to 260 (MAX_PATH) characters. + (David Allsopp, review by ???) + +- #14???: Executables linked with `ocamlc -custom` now always attempt to load + bytecode from the executable itself, rather than first trying `argv[0]`. + (David Allsopp, review by ???) + +- #14???: Change semantics of the internal caml_attempt_open bytecode runtime + function to require the caller to perform PATH-resolution prior to calling the + function. Affects the resolution of CAML_DEBUG_FILE, which is still searched + in PATH, but at program startup instead of when the debug information is first + loaded. + (David Allsopp, review by ???) + +- #14???: The CAML_DEBUG_FILE environment variable is now ignored if it set to + the empty string. + (David Allsopp, review by ???) + +* #14???: When set to the empty string, OCAMLRUNPARAM, OCAMLLIB and CAMLLIB are + now ignored (i.e. treated as if they were unset). In particular, this means + that a blank value for OCAMLLIB no longer prevents the compiler's default + Standard Library from being used. CAML_LD_LIBRARY_PATH now ignores empty + segments (instead of interpreting them as being ".") which also means that + CAML_LD_LIBRARY_PATH is ignored if it is just set to the empty string. Blank + lines in ld.conf are now likewise ignored. + (David Allsopp, review by ???) + +* #14???: Windows and Unix. When configured with --disable-shared, the runtime + now rejects bytecode executables which require DLLs to be loaded earlier, and + with a less unclear error message. + (David Allsopp, review by ???) + ### Code generation and optimizations: ### Standard library: @@ -125,6 +189,11 @@ Working version (Émile Trotignon, review by Nicolás Ojeda Bär, Jan Midtgaard and Damien Doligez) +- #14???: On Unix, the Filename module now treats TMPDIR='' (i.e. TMPDIR set, + but to an empty string) as TMPDIR=/tmp (as it already does if TMPDIR is not + set at all). + (David Allsopp, review by ???) + ### Type system: - #13781: Set scope of internal type nodes during abbreviation expansion @@ -182,6 +251,21 @@ Working version and socklen_param_type for socklen_t. (Antonin Décimo, review by Nicolás Ojeda Bär, David Allsopp and Samuel Hym) +- #14???: Improved the error message when bytecode Dynlink is asked to load an + archive which needs dynamic loading on a runtime which doesn't support it. + Previously, the full machinery to search for DLLs was invoked, and the error + confusingly included the name of a non-existent DLL. The entire process is now + skipped, with a general message that dynamic loading is needed and is + unavailable. + (David Allsopp, review by ???) + +- #14???: Remove duplicated linker options from unix.cma and unix.cmxa on + Windows. + (David Allsopp, review by ???) + +- #14???: Build and install threads.cmxs. + (David Allsopp, review by ???) + ### Tools: - #14055: Invert BUILD_PATH_PREFIX_MAP in directories loaded at startup @@ -217,6 +301,21 @@ Working version - #14239: Fix `#show_constructor` when printing non-GADT type parameters (Takafumi Saikawa, Jacques Garrigue, review by Gabriel Scherer) +- #14245: ocamlobjinfo now displays the runtime invoked by a bytecode + executable (either from the RNTM section or by analysing the shebang lines) + (David Allsopp, review by Samuel Hym and ???) + +- #14???: ocamlyacc treats TMPDIR='' (i.e. set, but to the empty string) as + TMPDIR=/tmp (as it already does if TMPDIR is not set at all). + (David Allsopp, review by ???) + +### Toplevel: + +- #14???: Empty segments in OCAMLTOP_INCLUDE_PATH are no longer interpreted as + adding the current working directory to the search path. The current working + directory can of course still be included in the search path by adding ".". + (David Allsopp, review by ???) + ### Manual and documentation: - #14293: Improve documentation of Runtime_events.Timestamp (Raphaël Proust, @@ -290,6 +389,23 @@ Working version (Gabriel Scherer, review by Nicolás Ojeda Bär and Florian Angeletti, report by Kate Deplaix) +- #14244: Add -set-runtime-default option to the compiler, allowing the default + value of the Standard Library location used by the runtime to be overridden. + (Antonin Décimo, review by David Allsopp, Jonah Beckford, Antonin Décimo, + Samuel Hym and ???) + +- #14245: New option -launch-method for ocamlc allows the method used by a + tendered bytecode executable to locate the interpreter to be given explicitly. + In particular, it makes it easier to specify the use of the executable + launcher on Unix. New option -runtime-search extends the bytecode executable + header to be able to search for the runtime interpreter in the directory + containing the executable and in PATH rather than relying on a single + hard-coded path. + (David Allsopp, review by Samuel Hym and ???) + +- #14???: Extend -set-runtime-default to allow the default values of all runtime + parameters to be overridden for any given binary. + (David Allsopp and Antonin Décimo, review by ???) ### Internal/compiler-libs changes: @@ -336,6 +452,10 @@ Working version - #14198 Constraints on module unpacking are not ghost (Thomas Refis, review by Nicolás Ojeda Bär) +- #14243: ocamlc now uses the same code as the runtime to parse ld.conf (via a + C primitive), eliminating some highly obscure corner cases. + (David Allsopp, review by Jonah Beckford, Hugo Heuzard and ???) + - #14260: Refactor Lambda.structured_constant to avoid duplicate representations for string constants (Vincent Laviron, review by Nicolás Ojeda Bär and Gabriel Scherer) @@ -354,11 +474,36 @@ Working version - #14331: Enforce current_level <= generic_level, and explain create_scope (Jacques Garrigue and Takafumi Saikawa, review by Gabriel Scherer) +- #14???: `-l` libraries are no longer given to the partial linker (for + `-output-complete-obj`) if `ocamlc`/`ocamlopt` cannot resolve them in the + current search path. Allows system libraries (e.g. `-lzstd`) to be specified + in .cma or .cmxa files without breaking `-output-complete-obj`. + (David Allsopp, review by ???) + ### Build system: - #13810: Support build of cross compilers to native freestanding targets (Samuel Hym, review by Antonin Décimo and Romain Calascibetta) +- #14243: New configure option --with-stublibs allows an additional directory + to be added to the start of ld.conf. Additionally, the stublibs subdirectory + is no longer created, nor added to ld.conf, when building OCaml with + --disable-shared. + (David Allsopp, review by Jonah Beckford, Hugo Heuzard and ???) + +- #14244: When targeting native Windows on Cygwin or MSYS2, preserve + backslashes in the supplied `--prefix` (in particular, backslashes instead of + slashes will then be displayed by `ocamlopt -config-var standard_library`). + If the supplied prefix contains a slash, then it is normalised, as + previously. + (David Allsopp, review by Jonah Beckford, Antonin Décimo, Samuel Hym and ???) + +- #14245: New --enable-runtime-search configure option controls the + -runtime-search option used to build the bytecode binaries in the compiler + distribution. --enable-runtime-search-target controls the default value of + -runtime-search used for bytecode executables produced by the compiler. + (David Allsopp, review by Samuel Hym and ???) + ### Bug fixes: - #14036: Fix nontermination of cycle printing in recursive modules with @@ -418,6 +563,11 @@ Working version - #14332: Fix missing TSan instrumentation in subexpressions (Vincent Laviron, review by Gabriel Scherer and Olivier Nicole) +- #14???: Fix the hand-off of the bytecode image to the runtime from the + executable header. In particular, running `ocamlc.byte` rather than + `ocamlc.byte.exe` now invokes the compiler instead of ocamlrun itself! + (David Allsopp, review by ???) + OCaml 5.4.0 (9 October 2025) ---------------------------- diff --git a/Makefile b/Makefile index 4491fd49a99d..6731f1dcc988 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,8 @@ # The main Makefile ROOTDIR = . +SUBDIR_NAME = + # NOTE: it is important that the OCAMLDEP and OCAMLLEX variables # are defined *before* Makefile.common gets included, so that # their local definitions here take precedence over their @@ -52,8 +54,6 @@ PERVASIVES=$(STDLIB_MODULES) outcometree topprinters topdirs toploop LIBFILES=stdlib.cma std_exit.cmo *.cmi $(HEADER_NAME) -COMPLIBDIR=$(LIBDIR)/compiler-libs - TOPINCLUDES=$(addprefix -I otherlibs/,$(filter-out %threads,$(OTHERLIBRARIES))) expunge := expunge$(EXE) @@ -188,7 +188,6 @@ comp_SOURCES = \ file_formats/cmo_format.mli \ file_formats/cmx_format.mli \ file_formats/cmxs_format.mli \ - bytecomp/meta.mli bytecomp/meta.ml \ bytecomp/opcodes.mli bytecomp/opcodes.ml \ bytecomp/bytesections.mli bytecomp/bytesections.ml \ bytecomp/dll.mli bytecomp/dll.ml \ @@ -207,6 +206,7 @@ ocamlcommon_SOURCES = \ $(lambda_SOURCES) $(comp_SOURCES) ocamlbytecomp_SOURCES = \ + bytecomp/byterntm.mll \ bytecomp/instruct.mli bytecomp/instruct.ml \ bytecomp/bytegen.mli bytecomp/bytegen.ml \ bytecomp/printinstr.mli bytecomp/printinstr.ml \ @@ -413,12 +413,12 @@ partialclean:: cd toplevel/byte ; rm -f $(TOPLEVEL_SHARED_ARTEFACTS) cd toplevel/native ; rm -f $(TOPLEVEL_SHARED_ARTEFACTS) -ALL_CONFIG_CMO = utils/config_main.cmo utils/config_boot.cmo +ALL_CONFIG_CMO = utils/config/config_main.cmo utils/config/config_boot.cmo -utils/config_%.mli: utils/config.mli +utils/config/config_%.mli: utils/config.mli | utils/config cp $^ $@ -beforedepend:: utils/config_main.mli utils/config_boot.mli +beforedepend:: utils/config/config_main.mli utils/config/config_boot.mli $(addprefix compilerlibs/ocamlcommon., cma cmxa): \ OC_COMMON_LINKFLAGS += -linkall @@ -475,17 +475,22 @@ partialclean:: # The configuration file utils/config.ml: \ - utils/config_$(if $(filter true,$(IN_COREBOOT_CYCLE)),boot,main).ml + utils/config/config_$(if $(filter true,$(IN_COREBOOT_CYCLE)),boot,main).ml $(V_GEN)cp $< $@ -utils/config_boot.ml: utils/config.fixed.ml utils/config.common.ml +utils/config: + $(MKDIR) $@ +utils/config/config_boot.ml: \ + utils/config.fixed.ml utils/config.common.ml | utils/config $(V_GEN)cat $^ > $@ - -utils/config_main.ml: utils/config.generated.ml utils/config.common.ml +utils/config/config_main.ml: \ + utils/config.generated.ml utils/config.common.ml | utils/config $(V_GEN)cat $^ > $@ +ADDITIONAL_CONFIGURE_ARGS ?= .PHONY: reconfigure reconfigure: - ac_read_git_config=true ./configure $(CONFIGURE_ARGS) + ac_read_git_config=true ./configure $(CONFIGURE_ARGS) \ + $(ADDITIONAL_CONFIGURE_ARGS) utils/domainstate.ml: utils/domainstate.ml.c runtime/caml/domain_state.tbl $(V_GEN)$(CPP) -I runtime/caml $< > $@ @@ -499,13 +504,13 @@ configure: tools/autogen configure.ac aclocal.m4 build-aux/ocaml_version.m4 .PHONY: partialclean partialclean:: rm -f utils/config.ml \ - utils/config_main.ml utils/config_main.mli \ - utils/config_boot.ml utils/config_boot.mli \ + utils/config/config_main.ml utils/config/config_main.mli \ + utils/config/config_boot.ml utils/config/config_boot.mli \ utils/domainstate.ml utils/domainstate.mli .PHONY: beforedepend beforedepend:: \ - utils/config.ml utils/config_boot.ml utils/config_main.ml \ + utils/config.ml utils/config/config_boot.ml utils/config/config_main.ml \ utils/domainstate.ml utils/domainstate.mli ocamllex_PROGRAMS = $(addprefix lex/,ocamllex ocamllex.opt) @@ -631,7 +636,7 @@ USE_STDLIB = -nostdlib -I ../stdlib FLEXDLL_OBJECTS = \ flexdll_$(FLEXDLL_CHAIN).$(O) flexdll_initer_$(FLEXDLL_CHAIN).$(O) FLEXLINK_BUILD_ENV = \ - MSVCC_ROOT= \ + MSVCC_ROOT= MSVCC=$(CC) MSVCC64=$(CC) \ MSVC_DETECT=0 OCAML_CONFIG_FILE=../Makefile.config \ CHAINS=$(FLEXDLL_CHAIN) ROOTDIR=.. ifneq ($(RC),) @@ -659,7 +664,10 @@ flexlink.byte$(EXE): $(FLEXDLL_SOURCES) rm -f $(FLEXDLL_SOURCE_DIR)/flexlink.exe $(MAKE) -C $(FLEXDLL_SOURCE_DIR) $(FLEXLINK_BUILD_ENV) \ OCAMLRUN='$$(ROOTDIR)/boot/ocamlrun$(EXE)' NATDYNLINK=false \ - OCAMLOPT='$(value BOOT_OCAMLC) $(USE_RUNTIME_PRIMS) $(USE_STDLIB)' \ + OCAMLOPT=$(call QUOTE_SINGLE,$(value BOOT_OCAMLC) \ + $(USE_RUNTIME_PRIMS) \ + $(BYTECODE_LAUNCHER_FLAGS) \ + $(USE_STDLIB)) \ flexlink.exe support cp $(FLEXDLL_SOURCE_DIR)/flexlink.exe $@ cp $(addprefix $(FLEXDLL_SOURCE_DIR)/, $(FLEXDLL_OBJECTS)) $(ROOTDIR) @@ -706,7 +714,7 @@ boot/ocamlrun$(EXE): # Start up the system from the distribution compiler .PHONY: coldstart -coldstart: boot/ocamlrun$(EXE) runtime/libcamlrun.$(A) +coldstart: boot/ocamlrun$(EXE) stdlib/libcamlrun.$(A) $(MAKE) -C stdlib OCAMLRUN='$$(ROOTDIR)/$<' USE_BOOT_OCAMLC=true all rm -f $(addprefix boot/, libcamlrun.$(A) $(LIBFILES)) cp $(addprefix stdlib/, $(LIBFILES)) boot @@ -731,13 +739,6 @@ CMPCMD ?= $(OCAMLRUN) tools/cmpbyt$(EXE) .PHONY: compare compare: -# The core system has to be rebuilt after bootstrap anyway, so strip ocamlc -# and ocamllex, which means the artefacts should be identical. - mv ocamlc$(EXE) ocamlc.tmp - $(OCAMLRUN) tools/stripdebug$(EXE) -all ocamlc.tmp ocamlc$(EXE) - mv lex/ocamllex$(EXE) ocamllex.tmp - $(OCAMLRUN) tools/stripdebug$(EXE) -all ocamllex.tmp lex/ocamllex$(EXE) - rm -f ocamllex.tmp ocamlc.tmp @if $(CMPCMD) boot/ocamlc ocamlc$(EXE) \ && $(CMPCMD) boot/ocamllex lex/ocamllex$(EXE); \ then echo "Fixpoint reached, bootstrap succeeded."; \ @@ -748,12 +749,10 @@ compare: # Promote a compiler -PROMOTE ?= cp - .PHONY: promote-common promote-common: - $(PROMOTE) ocamlc$(EXE) boot/ocamlc - $(PROMOTE) lex/ocamllex$(EXE) boot/ocamllex + cp ocamlc$(EXE) boot/ocamlc + cp lex/ocamllex$(EXE) boot/ocamllex cd stdlib; cp $(LIBFILES) ../boot # Promote the newly compiled system to the rank of cross compiler @@ -764,7 +763,6 @@ promote-cross: promote-common # Promote the newly compiled system to the rank of bootstrap compiler # (Runs on the new runtime, produces code for the new runtime) .PHONY: promote -promote: PROMOTE = $(OCAMLRUN) tools/stripdebug$(EXE) -all promote: promote-common rm -f boot/ocamlrun$(EXE) cp runtime/ocamlrun$(EXE) boot/ocamlrun$(EXE) @@ -899,7 +897,7 @@ flexlink.opt$(EXE): \ $(FLEXDLL_SOURCES) | $(BYTE_BINDIR)/flexlink$(EXE) $(OPT_BINDIR) rm -f $(FLEXDLL_SOURCE_DIR)/flexlink.exe $(MAKE) -C $(FLEXDLL_SOURCE_DIR) $(FLEXLINK_BUILD_ENV) \ - OCAMLOPT='$(FLEXLINK_OCAMLOPT) -nostdlib -I ../stdlib' flexlink.exe + OCAMLOPT='$(FLEXLINK_OCAMLOPT) -nostdlib -I ../stdlib $(SET_RELATIVE_STDLIB)' flexlink.exe cp $(FLEXDLL_SOURCE_DIR)/flexlink.exe $@ rm -f $(OPT_BINDIR)/flexlink$(EXE) cd $(OPT_BINDIR); $(LN) $(call ROOT_FROM, $(OPT_BINDIR))/$@ flexlink$(EXE) @@ -928,8 +926,6 @@ partialclean:: rm -f flexlink.opt flexlink.opt.exe \ $(OPT_BINDIR)/flexlink $(OPT_BINDIR)/flexlink.exe -INSTALL_COMPLIBDIR = $(DESTDIR)$(COMPLIBDIR) -INSTALL_FLEXDLLDIR = $(INSTALL_LIBDIR)/flexdll FLEXDLL_MANIFEST = default$(filter-out _i386,_$(ARCH)).manifest DOC_FILES=\ @@ -953,11 +949,14 @@ clean:: # Build the manual latex files from the etex source files # (see manual/README.md) .PHONY: manual-pregen -manual-pregen: opt.opt - cd manual; $(MAKE) clean && $(MAKE) pregen-etex +manual-pregen: opt.opt | manual + $(MAKE) -C manual clean + $(MAKE) -C manual pregen-etex +ifneq "$(wildcard manual)" "" clean:: $(MAKE) -C manual clean +endif # The clean target clean:: partialclean @@ -976,8 +975,13 @@ ocamlc_SOURCES = driver/main.mli driver/main.ml ocamlc_BYTECODE_LINKFLAGS = -compat-32 -g +ifeq "$(IN_COREBOOT_CYCLE)" "true" +ocamlc_BYTECODE_LINKFLAGS += \ +-no-g -without-runtime -set-runtime-default standard_library_default=. +endif + partialclean:: - rm -f ocamlc ocamlc.exe ocamlc.opt ocamlc.opt.exe + rm -f ocamlc ocamlc.exe ocamlc.opt ocamlc.opt.exe ocamlc*.stripped # The native-code compiler @@ -988,7 +992,7 @@ ocamlopt_SOURCES = driver/optmain.mli driver/optmain.ml ocamlopt_BYTECODE_LINKFLAGS = -g partialclean:: - rm -f ocamlopt ocamlopt.exe ocamlopt.opt ocamlopt.opt.exe + rm -f ocamlopt ocamlopt.exe ocamlopt.opt ocamlopt.opt.exe ocamlopt*.stripped # The toplevel @@ -1115,12 +1119,12 @@ otherlibs/dynlink.depend: beforedepend otherlibs/dynlink/native/dynlink.ml \ >> $@ -# Cleanup the lexer +# Cleanup the lexers partialclean:: - rm -f parsing/lexer.ml + rm -f bytecomp/byterntm.ml parsing/lexer.ml -beforedepend:: parsing/lexer.ml +beforedepend:: bytecomp/byterntm.ml parsing/lexer.ml # The predefined exceptions and primitives @@ -1253,6 +1257,10 @@ runtime_COMMON_C_SOURCES = \ $(UNIX_OR_WIN32) \ weak +ifeq "$(UNIX_OR_WIN32)" "unix" +runtime_COMMON_C_SOURCES += unix_executable +endif + runtime_BYTECODE_ONLY_C_SOURCES = \ backtrace_byt \ fail_byt \ @@ -1287,22 +1295,21 @@ runtime_BUILT_HEADERS = $(addprefix runtime/, \ ## Targets to build and install -runtime_PROGRAMS = runtime/ocamlrun$(EXE) -runtime_BYTECODE_STATIC_LIBRARIES = $(addprefix runtime/, \ - ld.conf libcamlrun.$(A)) +runtime_PROGRAMS = ocamlrun +runtime_BYTECODE_STATIC_LIBRARIES = runtime/libcamlrun.$(A) runtime_BYTECODE_SHARED_LIBRARIES = runtime_NATIVE_STATIC_LIBRARIES = \ runtime/libasmrun.$(A) runtime/libcomprmarsh.$(A) runtime_NATIVE_SHARED_LIBRARIES = ifeq "$(RUNTIMED)" "true" -runtime_PROGRAMS += runtime/ocamlrund$(EXE) +runtime_PROGRAMS += ocamlrund runtime_BYTECODE_STATIC_LIBRARIES += runtime/libcamlrund.$(A) runtime_NATIVE_STATIC_LIBRARIES += runtime/libasmrund.$(A) endif ifeq "$(INSTRUMENTED_RUNTIME)" "true" -runtime_PROGRAMS += runtime/ocamlruni$(EXE) +runtime_PROGRAMS += ocamlruni runtime_BYTECODE_STATIC_LIBRARIES += runtime/libcamlruni.$(A) runtime_NATIVE_STATIC_LIBRARIES += runtime/libasmruni.$(A) endif @@ -1310,9 +1317,9 @@ endif ifeq "$(UNIX_OR_WIN32)" "unix" ifeq "$(SUPPORTS_SHARED_LIBRARIES)" "true" runtime_BYTECODE_STATIC_LIBRARIES += runtime/libcamlrun_pic.$(A) -runtime_BYTECODE_SHARED_LIBRARIES += runtime/libcamlrun_shared.$(SO) +runtime_BYTECODE_SHARED_LIBRARIES += camlrun runtime_NATIVE_STATIC_LIBRARIES += runtime/libasmrun_pic.$(A) -runtime_NATIVE_SHARED_LIBRARIES += runtime/libasmrun_shared.$(SO) +runtime_NATIVE_SHARED_LIBRARIES += asmrun endif endif @@ -1363,13 +1370,15 @@ ocamlruni_CPPFLAGS = $(runtime_CPPFLAGS) -DCAML_INSTR .PHONY: runtime-all runtime-all: \ - $(runtime_BYTECODE_STATIC_LIBRARIES) $(runtime_BYTECODE_SHARED_LIBRARIES) \ - $(runtime_PROGRAMS) $(SAK) + $(runtime_BYTECODE_STATIC_LIBRARIES) \ + $(runtime_BYTECODE_SHARED_LIBRARIES:%=runtime/lib%_shared$(EXT_DLL)) \ + $(runtime_PROGRAMS:%=runtime/%$(EXE)) $(SAK) .PHONY: runtime-allopt ifeq "$(NATIVE_COMPILER)" "true" runtime-allopt: \ - $(runtime_NATIVE_STATIC_LIBRARIES) $(runtime_NATIVE_SHARED_LIBRARIES) + $(runtime_NATIVE_STATIC_LIBRARIES) \ + $(runtime_NATIVE_SHARED_LIBRARIES:%=runtime/lib%_shared$(EXT_DLL)) else runtime-allopt: $(error The build has been configured with --disable-native-compiler) @@ -1377,10 +1386,6 @@ endif ## Generated non-object files -runtime/ld.conf: $(ROOTDIR)/Makefile.config - $(V_GEN)echo "$(STUBLIBDIR)" > $@ && \ - echo "$(LIBDIR)" >> $@ - runtime/primitives: runtime/gen_primitives.sh $(runtime_BYTECODE_C_SOURCES) $(V_GEN)runtime/gen_primitives.sh $@ $(runtime_BYTECODE_C_SOURCES) @@ -1407,16 +1412,20 @@ runtime/caml/jumptbl.h : runtime/caml/instruct.h $(SAK): runtime/sak.c runtime/caml/misc.h runtime/caml/config.h $(V_MKEXE)$(call SAK_BUILD,$@,$<) -C_LITERAL = $(shell $(SAK) $(ENCODE_C_LITERAL) '$(1)') +C_LITERAL = $(shell $(SAK) $(ENCODE_C_LITERAL) $(call QUOTE_SINGLE,$(1))) -runtime/build_config.h: $(ROOTDIR)/Makefile.config $(SAK) +runtime/build_config.h: $(ROOTDIR)/Makefile.config \ + $(ROOTDIR)/Makefile.build_config $(SAK) $(V_GEN){ \ echo '/* This file is generated from $(ROOTDIR)/Makefile.config */'; \ printf '#define OCAML_STDLIB_DIR %s\n' \ - '$(call C_LITERAL,$(TARGET_LIBDIR))'; \ + $(call QUOTE_SINGLE,$(call C_LITERAL,$(TARGET_LIBDIR))); \ echo '#define HOST "$(HOST)"'; \ + echo '#define BYTECODE_RUNTIME_ID "$(BYTECODE_RUNTIME_ID)"'; \ } > $@ +runtime/prims.$(O): runtime/build_config.h + ## Runtime libraries and programs runtime/ocamlrun$(EXE): runtime/prims.$(O) runtime/libcamlrun.$(A) @@ -1619,14 +1628,14 @@ include $(addprefix $(DEPDIR)/, $(runtime_DEP_FILES)) endif .PHONY: runtime -runtime: stdlib/libcamlrun.$(A) +runtime: stdlib/libcamlrun.$(A) runtime-all .PHONY: makeruntime makeruntime: runtime-all -stdlib/libcamlrun.$(A): runtime-all +stdlib/libcamlrun.$(A): runtime/libcamlrun.$(A) cd stdlib; $(LN) ../runtime/libcamlrun.$(A) . clean:: - rm -f $(addprefix runtime/, *.o *.obj *.a *.lib *.so *.dll ld.conf) + rm -f $(addprefix runtime/, *.o *.obj *.a *.lib *.so *.dll) rm -f $(addprefix runtime/, ocamlrun ocamlrund ocamlruni ocamlruns sak) rm -f $(addprefix runtime/, \ ocamlrun.exe ocamlrund.exe ocamlruni.exe ocamlruns.exe sak.exe) @@ -1673,7 +1682,7 @@ library: ocamlc $(MAKE) -C stdlib all .PHONY: library-cross -library-cross: +library-cross: stdlib/libcamlrun.$(A) $(MAKE) -C stdlib OCAMLRUN=../runtime/ocamlrun$(EXE) all .PHONY: libraryopt @@ -1716,6 +1725,11 @@ ocamllex.opt: ocamlopt ocamllex_BYTECODE_LINKFLAGS = -compat-32 +ifeq "$(IN_COREBOOT_CYCLE)" "true" +ocamllex_BYTECODE_LINKFLAGS += \ +-no-g -without-runtime -set-runtime-default standard_library_default=. +endif + partialclean:: rm -f lex/*.cm* lex/*.o lex/*.obj \ $(ocamllex_PROGRAMS) $(ocamllex_PROGRAMS:=.exe) \ @@ -1875,6 +1889,13 @@ OCAMLDOC_LIBCMTS=$(OCAMLDOC_LIBMLIS:.mli=.cmt) $(OCAMLDOC_LIBMLIS:.mli=.cmti) ocamldoc/%: CAMLC = $(BEST_OCAMLC) $(STDLIBFLAGS) ocamldoc/%: CAMLOPT = $(BEST_OCAMLOPT) $(STDLIBFLAGS) +ifeq "$(SUPPORTS_SHARED_LIBRARIES)" "false" +# ocamldoc needs a custom runtime when building statically owing to the C stubs +# in unix.cma and str.cma. This is specified explicitly to suppress the default +# linking flags (see $(MAYBE_ADD_BYTECODE_LAUNCHER_FLAGS) in Makefile.common) +ocamldoc/ocamldoc$(EXE): ocamldoc_BYTECODE_LINKFLAGS += -custom +endif + .PHONY: ocamldoc ocamldoc: ocamldoc/ocamldoc$(EXE) ocamldoc/odoc_test.cmo \ ocamlc ocamlyacc ocamllex @@ -2022,8 +2043,20 @@ testsuite/tools/test_in_prefi%: CAMLC = $(BEST_OCAMLC) $(STDLIBFLAGS) test_in_prefix_BYTECODE_LINKFLAGS += -custom +ifeq "$(TARGET_LIBDIR_IS_RELATIVE)" "true" +# testsuite/tools/test_in_prefix cannot use a relative stdlib because it is run +# from testsuite/tools, not from the installation tree (the alternative would be +# to compile it directly with the installed compiler) +test_in_prefix_NATIVE_LINKFLAGS = +test_in_prefix_COMMON_LINKFLAGS = \ + -set-runtime-default 'standard_library_default=$(LIBDIR)' +endif + testsuite/tools/test_in_prefi%: CAMLOPT = $(BEST_OCAMLOPT) $(STDLIBFLAGS) +testsuite/tools/dummy$(EXE): testsuite/tools/dummy.$(O) + $(V_MKEXE)$(call MKEXE_VIA_CC,$@,$^) + ocamltest_BYTECODE_LINKFLAGS = -custom -g ocamltest/ocamltest$(EXE): ocamlc ocamlyacc ocamllex @@ -2075,6 +2108,7 @@ partialclean:: rm -f $(addprefix testsuite/lib/*.,cm* o obj a lib) rm -f $(addprefix testsuite/tools/*.,cm* o obj a lib) rm -f testsuite/tools/codegen testsuite/tools/codegen.exe + rm -f testsuite/tools/dummy testsuite/tools/dummy.exe rm -f testsuite/tools/expect testsuite/tools/expect.exe rm -f testsuite/tools/test_in_prefix testsuite/tools/test_in_prefix.exe rm -f testsuite/tools/test_in_prefix.opt \ @@ -2233,6 +2267,13 @@ debugger/ocamldebug.cmo: $(ocamldebug_DEBUGGER_OBJECTS) debugger/ocamldebug_entry.cmo: debugger/ocamldebug.cmo +ifeq "$(SUPPORTS_SHARED_LIBRARIES)" "false" +# ocamldebug needs a custom runtime when building statically owing to the +# C stubs in unix.cma. This is specified explicitly to suppress the default +# linking flags (see $(MAYBE_ADD_BYTECODE_LAUNCHER_FLAGS) in Makefile.common) +debugger/ocamldebug$(EXE): ocamldebug_BYTECODE_LINKFLAGS += -custom +endif + clean:: rm -f debugger/ocamldebug debugger/ocamldebug.exe rm -f debugger/debugger_lexer.ml @@ -2501,6 +2542,13 @@ $(ocamltex): VPATH += $(addprefix otherlibs/,str unix) tools/ocamltex.cmo: OC_COMMON_COMPFLAGS += -no-alias-deps +ifeq "$(SUPPORTS_SHARED_LIBRARIES)" "false" +# ocamltex needs a custom runtime when building statically owing to the C stubs +# in unix.cma and str.cma. This is specified explicitly to suppress the default +# linking flags (see $(MAYBE_ADD_BYTECODE_LAUNCHER_FLAGS) in Makefile.common) +tools/ocamltex$(EXE): ocamltex_BYTECODE_LINKFLAGS += -custom +endif + # we need str and unix which depend on the bytecode version of other tools # thus we use the othertools target ## Test compilation of backend-specific parts @@ -2596,8 +2644,8 @@ endif $(V_OCAMLOPT)$(COMPILE_NATIVE_MODULE) -c $< partialclean:: - for d in utils parsing typing bytecomp asmcomp middle_end file_formats \ - lambda middle_end/closure middle_end/flambda \ + for d in utils utils/config parsing typing bytecomp asmcomp middle_end \ + file_formats lambda middle_end/closure middle_end/flambda \ middle_end/flambda/base_types \ driver toplevel toplevel/byte toplevel/native tools debugger; do \ rm -f $$d/*.cm[ioxt] $$d/*.cmti $$d/*.annot $$d/*.s $$d/*.asm \ @@ -2658,10 +2706,11 @@ $(foreach file, asmcomp/emit.ml $(ARCH_SPECIFIC),\ $(eval $(call MV_FILE,$(file).depend,$(file)))) DEP_DIRS = \ - utils parsing typing bytecomp asmcomp middle_end lambda file_formats \ - middle_end/closure middle_end/flambda middle_end/flambda/base_types driver \ - toplevel toplevel/byte toplevel/native lex tools debugger ocamldoc ocamltest \ - testsuite/lib testsuite/tools otherlibs/dynlink + utils utils/config parsing typing bytecomp asmcomp middle_end lambda \ + file_formats middle_end/closure middle_end/flambda \ + middle_end/flambda/base_types driver toplevel toplevel/byte toplevel/native \ + lex tools debugger ocamldoc ocamltest testsuite/lib testsuite/tools \ + otherlibs/dynlink DEP_FILES = $(addsuffix .depend, $(DEP_DIRS)) @@ -2677,7 +2726,9 @@ distclean: clean ifneq "$(FLEXDLL_SUBMODULE_PRESENT)" "" $(MAKE) -C flexdll distclean MSVC_DETECT=0 endif +ifneq "$(wildcard manual)" "" $(MAKE) -C manual distclean +endif rm -f ocamldoc/META rm -f $(addprefix ocamltest/,ocamltest_config.ml ocamltest_unix.ml) rm -f otherlibs/dynlink/META otherlibs/dynlink/dynlink_config.ml \ @@ -2685,11 +2736,12 @@ endif otherlibs/dynlink/dynlink_cmxs_format.mli \ otherlibs/dynlink/dynlink_platform_intf.mli $(MAKE) -C otherlibs distclean - rm -f $(runtime_CONFIGURED_HEADERS) + rm -f $(runtime_CONFIGURED_HEADERS) runtime/ld.conf $(MAKE) -C stdlib distclean $(MAKE) -C testsuite distclean rm -f tools/eventlog_metadata tools/*.bak rm -f utils/config.common.ml utils/config.generated.ml + rm -rf utils/config rm -f compilerlibs/META rm -f boot/ocamlrun boot/ocamlrun.exe boot/$(HEADER_NAME) \ boot/flexdll_*.o boot/flexdll_*.obj \ @@ -2699,319 +2751,336 @@ endif $(BYTE_BUILD_TREE) $(OPT_BUILD_TREE) rm -f config.log config.status libtool -INSTALL_LIBDIR_DYNLINK = $(INSTALL_LIBDIR)/dynlink +# COMPILER_ARTEFACT_DIRS adds the common compiler-libs directories as prefixes +# to a sequence of patterns in the first argument, e.g. +# $(call COMPILER_ARTEFACT_DIRS, *.cmi) expands to utils/*.cmi, parsing/*.cmi, +# and so forth. Multiple wildcard patterns may be supplied. An optional second +# argument includes additional directories beyond the common ones (e.g. asmcomp, +# etc.) +COMPILER_ARTEFACT_DIRS = \ + $(foreach dir, \ + utils parsing typing bytecomp file_formats lambda driver toplevel \ + $(if $(filter-out undefined, $(origin 2)), $(2)), \ + $(addprefix $(dir)/, $(1))) +NATIVE_ARTEFACT_DIRS = \ + asmcomp toplevel/native \ + middle_end middle_end/closure middle_end/flambda middle_end/flambda/base_types # Installation -.PHONY: install -install: - $(MKDIR) "$(INSTALL_BINDIR)" - $(MKDIR) "$(INSTALL_LIBDIR)" - $(MKDIR) "$(INSTALL_STUBLIBDIR)" - $(MKDIR) "$(INSTALL_COMPLIBDIR)" - $(MKDIR) "$(INSTALL_DOCDIR)" - $(MKDIR) "$(INSTALL_INCDIR)" - $(MKDIR) "$(INSTALL_LIBDIR_PROFILING)" - $(INSTALL_PROG) $(runtime_PROGRAMS) "$(INSTALL_BINDIR)" - $(INSTALL_DATA) $(runtime_BYTECODE_STATIC_LIBRARIES) \ - "$(INSTALL_LIBDIR)" -ifneq "$(runtime_BYTECODE_SHARED_LIBRARIES)" "" - $(INSTALL_PROG) $(runtime_BYTECODE_SHARED_LIBRARIES) \ - "$(INSTALL_LIBDIR)" -endif - $(INSTALL_DATA) runtime/caml/domain_state.tbl runtime/caml/*.h \ - "$(INSTALL_INCDIR)" - $(INSTALL_PROG) ocaml$(EXE) "$(INSTALL_BINDIR)" +# Historically, the install target dynamically installed what had been built, +# for example, if only world had been built then make install simply didn't +# install the native tools. That infrastructure is potentially convenient when +# working on the compiler, but potentially masks bugs. It is better to have the +# installation targets require everything configure mandated to have built. +# There are three entry points to installation: +# install - installs everything +# installopt - installs the native code compiler _and_ the extra .opt tools +# installoptopt - intalls just the extra .opt tools +# The installopt targets have been maintained for now, but may be removed in the +# future. + +ifeq "$(NATIVE_COMPILER)" "true" +install: full-installoptopt + $(call INSTALL_END) +else +install: common-install + $(call INSTALL_END) +endif + +# These three targets are the slightly esoteric special sauce that avoid +# recursive make invocations in the install targets. +# There are three basic install recipies: +# - The old install target is available to common-install, but never recurses to +# - The old installopt target is available as both full-installopt and +# native-install +# - The old installoptopt target is also available as full-installoptopt and +# installopt +# These sets of recipies are then welded together by these three dependency +# specifications +# - When configured with --disable-native-compiler, the install target simply +# depends on common-install (see above) +# - Otherwise, install depends on full-installoptopt (see above) +# - The recipe for full-installoptopt installs the .opt versions of the tools, +# but it _depends on_ full-installopt. +# - full-installopt installs the native compiler, but it _depends on_ +# common-install +installopt: native-install + +full-installopt:: common-install + +full-installoptopt: full-installopt + +.PHONY: common-install +common-install:: + $(call INSTALL_BEGIN) + +ifeq "$(SUFFIXING)" "true" +MANGLE_RUNTIME_NAME = $(TARGET)-$(1)-$(BYTECODE_RUNTIME_ID)$(EXE) +MANGLE_RUNTIME_DLL_NAME = lib$(1)-$(TARGET)-$($(2)_RUNTIME_ID)$(EXT_DLL) +else +MANGLE_RUNTIME_NAME = $(1)$(EXE) +MANGLE_RUNTIME_DLL_NAME = lib$(1)_shared$(EXT_DLL) +endif + +define INSTALL_RUNTIME +common-install:: + $$(call INSTALL_ITEM, runtime/$(1)$(EXE), bin, , \ + $(call MANGLE_RUNTIME_NAME,$(1)), $(if $(filter true, $(SUFFIXING)), \ + $(1)$(EXE) $(1)-$(ZINC_RUNTIME_ID)$(EXE))) +endef +define INSTALL_RUNTIME_LIB +ifeq "$(2)" "BYTECODE" +common-install:: +else +full-installopt native-install:: +endif + $$(call INSTALL_ITEM, runtime/lib$(1)_shared$(EXT_DLL), libexec, , \ + $(call MANGLE_RUNTIME_DLL_NAME,$(1),$(2)), \ + $(if $(filter true, $(SUFFIXING)), lib$(1)_shared$(EXT_DLL))) +endef + +$(foreach runtime, $(runtime_PROGRAMS), \ + $(eval $(call INSTALL_RUNTIME,$(runtime)))) + +common-install:: + $(call INSTALL_ITEMS, runtime/ld.conf $(runtime_BYTECODE_STATIC_LIBRARIES), \ + lib) + +$(foreach shared_runtime, $(runtime_BYTECODE_SHARED_LIBRARIES), \ + $(eval $(call INSTALL_RUNTIME_LIB,$(shared_runtime),BYTECODE))) + +common-install:: + $(call INSTALL_ITEMS, \ + runtime/caml/domain_state.tbl runtime/caml/*.h, \ + lib, $(INSTALL_LIBDIR_CAML)) + $(call INSTALL_ITEMS, ocaml$(EXE), bin) ifeq "$(INSTALL_BYTECODE_PROGRAMS)" "true" - $(call INSTALL_STRIPPED_BYTE_PROG,\ - ocamlc$(EXE),"$(INSTALL_BINDIR)/ocamlc.byte$(EXE)") + $(call STRIP_BYTE_PROG, ocamlc$(EXE)) +ifeq "$(NATIVE_COMPILER)" "true" + $(call INSTALL_ITEM, \ + ocamlc$(EXE).stripped, bin, , ocamlc.byte$(EXE)) +else + $(call INSTALL_ITEM, \ + ocamlc$(EXE).stripped, bin, , ocamlc.byte$(EXE), ocamlc$(EXE)) +endif endif $(MAKE) -C stdlib install + +define INSTALL_ONE_NAT_TOOL +common-install:: +ifeq "$(NATIVE_COMPILER)" "true" ifeq "$(INSTALL_BYTECODE_PROGRAMS)" "true" - $(INSTALL_PROG) lex/ocamllex$(EXE) \ - "$(INSTALL_BINDIR)/ocamllex.byte$(EXE)" - for i in $(TOOLS_TO_INSTALL_NAT); \ - do \ - $(INSTALL_PROG) "tools/$$i$(EXE)" "$(INSTALL_BINDIR)/$$i.byte$(EXE)";\ - if test -f "tools/$$i".opt$(EXE); then \ - $(INSTALL_PROG) "tools/$$i.opt$(EXE)" "$(INSTALL_BINDIR)" && \ - (cd "$(INSTALL_BINDIR)" && $(LN) "$$i.opt$(EXE)" "$$i$(EXE)"); \ - else \ - (cd "$(INSTALL_BINDIR)" && $(LN) "$$i.byte$(EXE)" "$$i$(EXE)"); \ - fi; \ - done + $$(call INSTALL_ITEM, tools/$(1)$(EXE), bin, , $(1).byte$(EXE)) +endif + $$(call INSTALL_ITEM, tools/$(1).opt$(EXE), bin, , , $(1)$(EXE)) else - for i in $(TOOLS_TO_INSTALL_NAT); \ - do \ - if test -f "tools/$$i".opt$(EXE); then \ - $(INSTALL_PROG) "tools/$$i.opt$(EXE)" "$(INSTALL_BINDIR)"; \ - (cd "$(INSTALL_BINDIR)" && $(LN) "$$i.opt$(EXE)" "$$i$(EXE)"); \ - fi; \ - done + $$(call INSTALL_ITEM, tools/$(1)$(EXE), bin, , $(1).byte$(EXE), $(1)$(EXE)) endif - for i in $(TOOLS_TO_INSTALL_BYT); \ - do \ - $(INSTALL_PROG) "tools/$$i$(EXE)" "$(INSTALL_BINDIR)";\ - done - $(INSTALL_PROG) $(ocamlyacc_PROGRAM)$(EXE) "$(INSTALL_BINDIR)" - $(INSTALL_DATA) \ - utils/*.cmi \ - parsing/*.cmi \ - typing/*.cmi \ - bytecomp/*.cmi \ - file_formats/*.cmi \ - lambda/*.cmi \ - driver/*.cmi \ - toplevel/*.cmi \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - toplevel/byte/*.cmi \ - "$(INSTALL_COMPLIBDIR)" +endef + +ifeq "$(INSTALL_BYTECODE_PROGRAMS)" "true" +common-install:: +ifeq "$(NATIVE_COMPILER)" "true" + $(call INSTALL_ITEM, \ + lex/ocamllex$(EXE), bin, , ocamllex.byte$(EXE)) +else + $(call INSTALL_ITEM, \ + lex/ocamllex$(EXE), bin, , ocamllex.byte$(EXE), ocamllex$(EXE)) +endif +endif + +$(foreach tool, $(TOOLS_TO_INSTALL_NAT), \ + $(eval $(call INSTALL_ONE_NAT_TOOL,$(tool)))) + +define INSTALL_ONE_BYT_TOOL +common-install:: + $$(call INSTALL_ITEMS, tools/$(1)$(EXE), bin) +endef + +$(foreach tool, $(TOOLS_TO_INSTALL_BYT), \ + $(eval $(call INSTALL_ONE_BYT_TOOL,$(tool)))) + +common-install:: + $(call INSTALL_ITEMS, $(ocamlyacc_PROGRAM)$(EXE), bin) + $(call INSTALL_ITEMS, \ + $(call COMPILER_ARTEFACT_DIRS, *.cmi), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) ifeq "$(INSTALL_SOURCE_ARTIFACTS)" "true" - $(INSTALL_DATA) \ - utils/*.cmt utils/*.cmti utils/*.mli \ - parsing/*.cmt parsing/*.cmti parsing/*.mli \ - typing/*.cmt typing/*.cmti typing/*.mli \ - file_formats/*.cmt file_formats/*.cmti file_formats/*.mli \ - lambda/*.cmt lambda/*.cmti lambda/*.mli \ - bytecomp/*.cmt bytecomp/*.cmti bytecomp/*.mli \ - driver/*.cmt driver/*.cmti driver/*.mli \ - toplevel/*.cmt toplevel/*.cmti toplevel/*.mli \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - toplevel/byte/*.cmt \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - tools/profiling.cmt tools/profiling.cmti \ - "$(INSTALL_LIBDIR_PROFILING)" -endif - $(INSTALL_DATA) \ - compilerlibs/*.cma compilerlibs/META \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - $(ocamlc_CMO_FILES) $(ocaml_CMO_FILES) \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_PROG) $(expunge) "$(INSTALL_LIBDIR)" + $(call INSTALL_ITEMS, \ + $(call COMPILER_ARTEFACT_DIRS, *.cmt *.cmti *.mli), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, toplevel/byte/*.cmt, \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, tools/profiling.cmt tools/profiling.cmti, \ + lib, $(INSTALL_LIBDIR_PROFILING)) +endif + $(call INSTALL_ITEMS, compilerlibs/*.cma compilerlibs/META, \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, $(ocamlc_CMO_FILES) $(ocaml_CMO_FILES), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, $(expunge), libexec) # If installing over a previous OCaml version, ensure some modules are removed # from the previous installation. - rm -f "$(INSTALL_LIBDIR)"/topdirs.cm* "$(INSTALL_LIBDIR)/topdirs.mli" - rm -f "$(INSTALL_LIBDIR)"/profiling.cm* "$(INSTALL_LIBDIR)/profiling.$(O)" - $(INSTALL_DATA) \ - tools/profiling.cmi tools/profiling.cmo \ - "$(INSTALL_LIBDIR_PROFILING)" + $(call INSTALL_RM, \ + "$(INSTALL_LIBDIR)"/topdirs.cm* "$(INSTALL_LIBDIR)/topdirs.mli") + $(call INSTALL_RM, \ + "$(INSTALL_LIBDIR)"/profiling.cm* "$(INSTALL_LIBDIR)/profiling.$(O)") + $(call INSTALL_ITEMS, tools/profiling.cmi tools/profiling.cmo, \ + lib, $(INSTALL_LIBDIR_PROFILING)) ifeq "$(UNIX_OR_WIN32)" "unix" # Install manual pages only on Unix $(MAKE) -C man install endif # For dynlink, if installing over a previous OCaml version, ensure # dynlink is removed from the previous installation. - rm -f "$(INSTALL_LIBDIR)"/dynlink.cm* "$(INSTALL_LIBDIR)/dynlink.mli" \ - "$(INSTALL_LIBDIR)/dynlink.$(A)" \ - $(addprefix "$(INSTALL_LIBDIR)/", $(notdir $(dynlink_CMX_FILES))) - $(MKDIR) "$(INSTALL_LIBDIR_DYNLINK)" - $(INSTALL_DATA) \ - otherlibs/dynlink/dynlink.cmi otherlibs/dynlink/dynlink.cma \ - otherlibs/dynlink/META \ - "$(INSTALL_LIBDIR_DYNLINK)" + $(call INSTALL_RM, \ + "$(INSTALL_LIBDIR)"/dynlink.cm* \ + "$(INSTALL_LIBDIR)/dynlink.mli" \ + "$(INSTALL_LIBDIR)/dynlink.$(A)" \ + $(addprefix "$(INSTALL_LIBDIR)/", $(notdir $(dynlink_CMX_FILES)))) + $(call INSTALL_ITEMS, \ + otherlibs/dynlink/dynlink.cmi otherlibs/dynlink/dynlink.cma \ + otherlibs/dynlink/META, \ + lib, $(INSTALL_LIBDIR_DYNLINK)) ifeq "$(INSTALL_SOURCE_ARTIFACTS)" "true" - $(INSTALL_DATA) \ - otherlibs/dynlink/dynlink.cmti otherlibs/dynlink/dynlink.mli \ - "$(INSTALL_LIBDIR_DYNLINK)" + $(call INSTALL_ITEMS, \ + otherlibs/dynlink/dynlink.cmti otherlibs/dynlink/dynlink.mli, \ + lib, $(INSTALL_LIBDIR_DYNLINK)) endif for i in $(OTHERLIBS); do \ $(MAKE) -C otherlibs/$$i install || exit $$?; \ done ifeq "$(build_ocamldoc)" "true" - $(MKDIR) "$(INSTALL_LIBDIR)/ocamldoc" - $(INSTALL_PROG) $(OCAMLDOC) "$(INSTALL_BINDIR)" - $(INSTALL_DATA) \ - ocamldoc/ocamldoc.hva ocamldoc/*.cmi ocamldoc/odoc_info.cma \ - ocamldoc/META \ - "$(INSTALL_LIBDIR)/ocamldoc" - $(INSTALL_DATA) \ - $(OCAMLDOC_LIBCMIS) \ - "$(INSTALL_LIBDIR)/ocamldoc" + $(call INSTALL_ITEMS, ocamldoc/ocamldoc$(EXE), bin) + $(call INSTALL_ITEMS, \ + ocamldoc/ocamldoc.hva ocamldoc/*.cmi ocamldoc/odoc_info.cma \ + ocamldoc/META, \ + lib, $(INSTALL_LIBDIR_OCAMLDOC)) ifeq "$(INSTALL_SOURCE_ARTIFACTS)" "true" - $(INSTALL_DATA) \ - $(OCAMLDOC_LIBMLIS) $(OCAMLDOC_LIBCMTS) \ - "$(INSTALL_LIBDIR)/ocamldoc" + $(call INSTALL_ITEMS, $(OCAMLDOC_LIBMLIS) $(OCAMLDOC_LIBCMTS), \ + lib, $(INSTALL_LIBDIR_OCAMLDOC)) endif endif ifeq "$(build_libraries_manpages)" "true" $(MAKE) -C api_docgen install endif - if test -n "$(WITH_DEBUGGER)"; then \ - $(INSTALL_PROG) debugger/ocamldebug$(EXE) "$(INSTALL_BINDIR)"; \ - fi +ifneq "$(WITH_DEBUGGER)" "" + $(call INSTALL_ITEMS, debugger/ocamldebug$(EXE), bin) +endif ifeq "$(BOOTSTRAPPING_FLEXDLL)" "true" ifeq "$(TOOLCHAIN)" "msvc" - $(INSTALL_DATA) $(FLEXDLL_SOURCE_DIR)/$(FLEXDLL_MANIFEST) \ - "$(INSTALL_BINDIR)/" + # Technically this should not be installed with "executable" + # permissions, but in practice that request will be ignored. + $(call INSTALL_ITEMS, $(FLEXDLL_SOURCE_DIR)/$(FLEXDLL_MANIFEST), bin) endif ifeq "$(INSTALL_BYTECODE_PROGRAMS)" "true" - $(INSTALL_PROG) \ - flexlink.byte$(EXE) "$(INSTALL_BINDIR)" -endif # ifeq "$(INSTALL_BYTECODE_PROGRAMS)" "true" - $(MKDIR) "$(INSTALL_FLEXDLLDIR)" - $(INSTALL_DATA) $(FLEXDLL_OBJECTS) "$(INSTALL_FLEXDLLDIR)" -endif # ifeq "$(BOOTSTRAPPING_FLEXDLL)" "true" - $(INSTALL_DATA) Makefile.config "$(INSTALL_LIBDIR)" - $(INSTALL_DATA) $(DOC_FILES) "$(INSTALL_DOCDIR)" -ifeq "$(INSTALL_BYTECODE_PROGRAMS)" "true" - if test -f ocamlopt$(EXE); then $(MAKE) installopt; else \ - cd "$(INSTALL_BINDIR)"; \ - $(LN) ocamlc.byte$(EXE) ocamlc$(EXE); \ - $(LN) ocamllex.byte$(EXE) ocamllex$(EXE); \ - (test -f flexlink.byte$(EXE) && \ - $(LN) flexlink.byte$(EXE) flexlink$(EXE)) || true; \ - fi +ifeq "$(NATIVE_COMPILER)" "true" + $(call INSTALL_ITEMS, flexlink.byte$(EXE), bin) else - if test -f ocamlopt$(EXE); then $(MAKE) installopt; fi + $(call INSTALL_ITEM, flexlink.byte$(EXE), bin, , , flexlink$(EXE)) endif +endif # ifeq "$(INSTALL_BYTECODE_PROGRAMS)" "true" + $(call INSTALL_ITEMS, $(FLEXDLL_OBJECTS), lib, $(INSTALL_LIBDIR_FLEXDLL)) +endif # ifeq "$(BOOTSTRAPPING_FLEXDLL)" "true" + $(call INSTALL_ITEMS, Makefile.config, lib) + $(call INSTALL_ITEMS, $(DOC_FILES), doc) # Installation of the native-code compiler -.PHONY: installopt -installopt: - $(INSTALL_DATA) $(runtime_NATIVE_STATIC_LIBRARIES) "$(INSTALL_LIBDIR)" -ifneq "$(runtime_NATIVE_SHARED_LIBRARIES)" "" - $(INSTALL_PROG) $(runtime_NATIVE_SHARED_LIBRARIES) "$(INSTALL_LIBDIR)" -endif +.PHONY: full-installopt native-install +full-installopt native-install:: + $(call INSTALL_ITEMS, $(runtime_NATIVE_STATIC_LIBRARIES), lib) + +$(foreach shared_runtime, $(runtime_NATIVE_SHARED_LIBRARIES), \ + $(eval $(call INSTALL_RUNTIME_LIB,$(shared_runtime),NATIVE))) + +full-installopt native-install:: ifeq "$(INSTALL_BYTECODE_PROGRAMS)" "true" - $(call INSTALL_STRIPPED_BYTE_PROG,\ - ocamlopt$(EXE),"$(INSTALL_BINDIR)/ocamlopt.byte$(EXE)") + $(call STRIP_BYTE_PROG, ocamlopt$(EXE)) + $(call INSTALL_ITEM, ocamlopt$(EXE).stripped, bin, , ocamlopt.byte$(EXE)) endif $(MAKE) -C stdlib installopt - $(INSTALL_DATA) \ - middle_end/*.cmi \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - middle_end/closure/*.cmi \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - middle_end/flambda/*.cmi \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - middle_end/flambda/base_types/*.cmi \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - asmcomp/*.cmi \ - "$(INSTALL_COMPLIBDIR)" -ifeq "$(INSTALL_SOURCE_ARTIFACTS)" "true" - $(INSTALL_DATA) \ - middle_end/*.cmt middle_end/*.cmti \ - middle_end/*.mli \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - middle_end/closure/*.cmt middle_end/closure/*.cmti \ - middle_end/closure/*.mli \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - middle_end/flambda/*.cmt middle_end/flambda/*.cmti \ - middle_end/flambda/*.mli \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - middle_end/flambda/base_types/*.cmt \ - middle_end/flambda/base_types/*.cmti \ - middle_end/flambda/base_types/*.mli \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - asmcomp/*.cmt asmcomp/*.cmti \ - asmcomp/*.mli \ - "$(INSTALL_COMPLIBDIR)" -endif - $(INSTALL_DATA) \ - $(ocamlopt_CMO_FILES) \ - "$(INSTALL_COMPLIBDIR)" -ifeq "$(build_ocamldoc)" "true" - $(MKDIR) "$(INSTALL_LIBDIR)/ocamldoc" - $(INSTALL_PROG) $(OCAMLDOC_OPT) "$(INSTALL_BINDIR)" - $(INSTALL_DATA) \ - $(OCAMLDOC_LIBCMIS) \ - "$(INSTALL_LIBDIR)/ocamldoc" + $(call INSTALL_ITEMS, \ + middle_end/*.cmi, \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, \ + middle_end/closure/*.cmi, \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, \ + middle_end/flambda/*.cmi, \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, \ + middle_end/flambda/base_types/*.cmi, \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, \ + asmcomp/*.cmi, \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) ifeq "$(INSTALL_SOURCE_ARTIFACTS)" "true" - $(INSTALL_DATA) \ - $(OCAMLDOC_LIBMLIS) $(OCAMLDOC_LIBCMTS) \ - "$(INSTALL_LIBDIR)/ocamldoc" + $(call INSTALL_ITEMS, \ + $(addprefix middle_end/, *.cmt *.cmti *.mli), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, \ + $(addprefix middle_end/closure/, *.cmt *.cmti *.mli), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, \ + $(addprefix middle_end/flambda/, *.cmt *.cmti *.mli), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, \ + $(addprefix middle_end/flambda/base_types/, *.cmt *.cmti *.mli), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, \ + $(addprefix asmcomp/, *.cmt *.cmti *.mli), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) endif - $(INSTALL_DATA) \ - ocamldoc/ocamldoc.hva ocamldoc/*.cmx ocamldoc/odoc_info.$(A) \ - ocamldoc/odoc_info.cmxa \ - "$(INSTALL_LIBDIR)/ocamldoc" + $(call INSTALL_ITEMS, $(ocamlopt_CMO_FILES), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) +ifeq "$(build_ocamldoc)" "true" + $(call INSTALL_ITEMS, ocamldoc/ocamldoc.opt$(EXE), bin) + $(call INSTALL_ITEMS, \ + ocamldoc/*.cmx ocamldoc/odoc_info.$(A) ocamldoc/odoc_info.cmxa, \ + lib, $(INSTALL_LIBDIR_OCAMLDOC)) endif ifeq "$(strip $(NATDYNLINK))" "true" - $(INSTALL_DATA) \ - $(dynlink_CMX_FILES) otherlibs/dynlink/dynlink.cmxa \ - otherlibs/dynlink/dynlink.$(A) \ - "$(INSTALL_LIBDIR_DYNLINK)" + $(call INSTALL_ITEMS, \ + $(dynlink_CMX_FILES) otherlibs/dynlink/dynlink.cmxa \ + otherlibs/dynlink/dynlink.$(A), \ + lib, $(INSTALL_LIBDIR_DYNLINK)) endif for i in $(OTHERLIBS); do \ $(MAKE) -C otherlibs/$$i installopt || exit $$?; \ done -ifeq "$(INSTALL_BYTECODE_PROGRAMS)" "true" - if test -f ocamlopt.opt$(EXE); then $(MAKE) installoptopt; else \ - cd "$(INSTALL_BINDIR)"; \ - $(LN) ocamlc.byte$(EXE) ocamlc$(EXE); \ - $(LN) ocamlopt.byte$(EXE) ocamlopt$(EXE); \ - $(LN) ocamllex.byte$(EXE) ocamllex$(EXE); \ - (test -f flexlink.byte$(EXE) && \ - $(LN) flexlink.byte$(EXE) flexlink$(EXE)) || true; \ - fi -else - if test -f ocamlopt.opt$(EXE); then $(MAKE) installoptopt; fi -endif - $(INSTALL_DATA) \ - tools/profiling.cmx tools/profiling.$(O) \ - "$(INSTALL_LIBDIR_PROFILING)" - -.PHONY: installoptopt -installoptopt: - $(INSTALL_PROG) ocamlc.opt$(EXE) "$(INSTALL_BINDIR)" - $(INSTALL_PROG) ocamlopt.opt$(EXE) "$(INSTALL_BINDIR)" - $(INSTALL_PROG) lex/ocamllex.opt$(EXE) "$(INSTALL_BINDIR)" - cd "$(INSTALL_BINDIR)"; \ - $(LN) ocamlc.opt$(EXE) ocamlc$(EXE); \ - $(LN) ocamlopt.opt$(EXE) ocamlopt$(EXE); \ - $(LN) ocamllex.opt$(EXE) ocamllex$(EXE) + $(call INSTALL_ITEMS, tools/profiling.cmx tools/profiling.$(O), \ + lib, $(INSTALL_LIBDIR_PROFILING)) + +.PHONY: full-installoptopt installopt installoptopt +full-installoptopt installopt installoptopt: + $(call INSTALL_ITEM, ocamlc.opt$(EXE), bin, , , ocamlc$(EXE)) + $(call INSTALL_ITEM, ocamlopt.opt$(EXE), bin, , , ocamlopt$(EXE)) + $(call INSTALL_ITEM, lex/ocamllex.opt$(EXE), bin, , , ocamllex$(EXE)) ifeq "$(BOOTSTRAPPING_FLEXDLL)" "true" - $(INSTALL_PROG) flexlink.opt$(EXE) "$(INSTALL_BINDIR)" - cd "$(INSTALL_BINDIR)"; \ - $(LN) flexlink.opt$(EXE) flexlink$(EXE) -endif - $(INSTALL_DATA) \ - utils/*.cmx parsing/*.cmx typing/*.cmx bytecomp/*.cmx \ - toplevel/*.cmx toplevel/native/*.cmx \ - toplevel/native/tophooks.cmi \ - file_formats/*.cmx \ - lambda/*.cmx \ - driver/*.cmx asmcomp/*.cmx middle_end/*.cmx \ - middle_end/closure/*.cmx \ - middle_end/flambda/*.cmx \ - middle_end/flambda/base_types/*.cmx \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - compilerlibs/*.cmxa compilerlibs/*.$(A) \ - "$(INSTALL_COMPLIBDIR)" - $(INSTALL_DATA) \ - $(ocamlc_CMX_FILES) $(ocamlc_CMX_FILES:.cmx=.$(O)) \ - $(ocamlopt_CMX_FILES) $(ocamlopt_CMX_FILES:.cmx=.$(O)) \ - $(ocamlnat_CMX_FILES:.cmx=.$(O)) \ - "$(INSTALL_COMPLIBDIR)" + $(call INSTALL_ITEM, flexlink.opt$(EXE), bin, , , flexlink$(EXE)) +endif + $(call INSTALL_ITEMS, \ + $(call COMPILER_ARTEFACT_DIRS, *.cmx, $(NATIVE_ARTEFACT_DIRS)) \ + toplevel/native/tophooks.cmi, \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, compilerlibs/*.cmxa compilerlibs/*.$(A), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) + $(call INSTALL_ITEMS, \ + $(ocamlc_CMX_FILES:.cmx=.$(O)) \ + $(ocamlopt_CMX_FILES:.cmx=.$(O)) \ + $(ocamlnat_CMX_FILES:.cmx=.$(O)), \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) ifeq "$(INSTALL_OCAMLNAT)" "true" - $(INSTALL_PROG) ocamlnat$(EXE) "$(INSTALL_BINDIR)" + $(call INSTALL_ITEMS, ocamlnat$(EXE), bin) endif # Installation of the *.ml sources of compiler-libs .PHONY: install-compiler-sources install-compiler-sources: ifeq "$(INSTALL_SOURCE_ARTIFACTS)" "true" - $(INSTALL_DATA) \ - utils/*.ml parsing/*.ml typing/*.ml bytecomp/*.ml driver/*.ml \ - file_formats/*.ml \ - lambda/*.ml \ - toplevel/*.ml toplevel/byte/*.ml \ - middle_end/*.ml middle_end/closure/*.ml \ - middle_end/flambda/*.ml middle_end/flambda/base_types/*.ml \ - asmcomp/*.ml \ - asmcmp/debug/*.ml \ - "$(INSTALL_COMPLIBDIR)" + $(call INSTALL_ITEMS, \ + $(call COMPILER_ARTEFACT_DIRS, *.ml, $(NATIVE_ARTEFACT_DIRS)) \ + toplevel/byte/*.ml, \ + lib, $(INSTALL_LIBDIR_COMPILERLIBS)) endif include .depend diff --git a/Makefile.build_config.in b/Makefile.build_config.in index fe9782c20ce2..f159ccad6ab8 100644 --- a/Makefile.build_config.in +++ b/Makefile.build_config.in @@ -75,14 +75,22 @@ INSTALL_OCAMLNAT = @install_ocamlnat@ DEP_CC=@DEP_CC@ -MM COMPUTE_DEPS=@compute_deps@ +BUILD_PATH_LOGICAL = @srcdir_abs@ +BUILD_PATH_PHYSICAL = @srcdir_abs_real@ +BUILD_MAP_FLAGS = @build_map_flags@ +BUILD_MAP_CFLAGS = $(foreach flag, $(BUILD_MAP_FLAGS), \ + $(call QUOTE_SINGLE,$(flag)$(BUILD_PATH_LOGICAL)=+build) \ + $(if $(BUILD_PATH_PHYSICAL), \ + $(call $(QUOTE_SINGLE),$(flag)$(BUILD_PATH_PHYSICAL)=+build))) + # Default flags to use to compile C files -OC_CFLAGS = @oc_cflags@ +OC_CFLAGS = @oc_cflags@ $(BUILD_MAP_CFLAGS) # Flags to use when compiling C files to be linked with bytecode -OC_BYTECODE_CFLAGS = @oc_bytecode_cflags@ +OC_BYTECODE_CFLAGS = @oc_bytecode_cflags@ $(BUILD_MAP_CFLAGS) # Flags to use when compiling C files to be linked with native code -OC_NATIVE_CFLAGS = @oc_native_cflags@ +OC_NATIVE_CFLAGS = @oc_native_cflags@ $(BUILD_MAP_CFLAGS) # The submodules should be searched *before* any other external -I paths OC_INCLUDES = $(addprefix -I $(ROOTDIR)/, \ @@ -101,6 +109,9 @@ OC_DLL_LDFLAGS=@oc_dll_ldflags@ MKEXE_VIA_CC=$(CC) @mkexe_via_cc_ldflags@ @mkexe_via_cc_extra_cmd@ +LAUNCH_METHOD = @launch_method@ +SUFFIXING = @suffixing@ + # How to build sak SAK_BUILD=@SAK_BUILD@ # How to invoke sak @@ -143,6 +154,7 @@ DOCDIR=@docdir@ ### Where to look for the standard library on target TARGET_LIBDIR=@TARGET_LIBDIR@ +TARGET_LIBDIR_IS_RELATIVE=@target_libdir_is_relative@ unix_directory = @unix_directory@ unix_library = @unix_library@ @@ -184,6 +196,10 @@ OC_NATIVE_LINKFLAGS = -g BUILD_TRIPLET = @build@ +# Zinc Runtime ID is needed for installation only +ZINC_RUNTIME_ID_HI = @zinc_runtime_id_hi@ +ZINC_RUNTIME_ID = @zinc_runtime_id_lo@$(ZINC_RUNTIME_ID_HI) + # Platform-dependent command to create symbolic links LN = @ln@ @@ -205,3 +221,5 @@ TSAN=@tsan@ # Contains TSan-specific runtime files, or nothing if TSan support is # disabled TSAN_NATIVE_RUNTIME_C_SOURCES = @tsan_native_runtime_c_sources@ + +RUNTIME_SEARCH = @runtime_search@ diff --git a/Makefile.common b/Makefile.common index c77a3a4aadfe..5f0cdd3bf15d 100644 --- a/Makefile.common +++ b/Makefile.common @@ -29,6 +29,7 @@ EMPTY := SPACE := $(EMPTY) $(EMPTY) # $( ) suppresses warning from the alignments in the V_ macros below $(SPACE) := +HASH := \# ifeq "$(UNIX_OR_WIN32)" "win32" DIR_SEP := \$ # There must a space following the $ @@ -38,6 +39,8 @@ DIR_SEP = / CONVERT_PATH = $(strip $(1)) endif +QUOTE_SINGLE = '$(subst ','\'',$(1))' + V ?= 0 ifeq "$(V)" "0" @@ -83,15 +86,334 @@ V_ODOC = endif DESTDIR ?= -INSTALL_BINDIR := $(DESTDIR)$(BINDIR) -INSTALL_LIBDIR := $(DESTDIR)$(LIBDIR) -INSTALL_INCDIR=$(INSTALL_LIBDIR)/caml -INSTALL_STUBLIBDIR := $(DESTDIR)$(STUBLIBDIR) -INSTALL_LIBDIR_PROFILING = $(INSTALL_LIBDIR)/profiling -INSTALL_MANDIR := $(DESTDIR)$(MANDIR) -INSTALL_PROGRAMS_MAN_DIR := $(DESTDIR)$(PROGRAMS_MAN_DIR) -INSTALL_LIBRARIES_MAN_DIR := $(DESTDIR)$(LIBRARIES_MAN_DIR) -INSTALL_DOCDIR := $(DESTDIR)$(DOCDIR) + +# Augment directories from Makefile.config / Makefile.build_config with +# $(DESTDIR). i.e. each of these 5 directories may be overridden by the user, +# and the compiler distribution makes no assumptions about where they are +# relative to each other. +INSTALL_BINDIR = $(DESTDIR)$(BINDIR) +INSTALL_DOCDIR = $(DESTDIR)$(DOCDIR) +INSTALL_LIBDIR = $(DESTDIR)$(LIBDIR) +INSTALL_MANDIR = $(DESTDIR)$(MANDIR) +INSTALL_STUBLIBDIR = $(DESTDIR)$(STUBLIBDIR) + +# Library subdirectories. The compiler distribution does make assumptions about +# these, and they cannot be freely overridden by the user. +INSTALL_LIBDIR_CAML = caml +INSTALL_LIBDIR_COMPILERLIBS = compiler-libs +INSTALL_LIBDIR_DYNLINK = dynlink +INSTALL_LIBDIR_FLEXDLL = flexdll +INSTALL_LIBDIR_OCAMLDOC = ocamldoc +INSTALL_LIBDIR_PROFILING = profiling +INSTALL_LIBDIR_STDLIB = stdlib +INSTALL_LIBDIR_SYSTHREADS = threads + +INSTALL_MANDIR_PROGRAMS = man1 +INSTALL_MANDIR_LIBRARIES = man3 + +INSTALL_MODE ?= install + +# INSTALL_ITEM installs a single file, possibly with a different name and +# possibly creating additional symlinks/copies +# $1 = source file (may include directories) +# $2 = section (bin, doc, lib, libexec, man, stublibs) +# $3 = directory within section (may be empty) +# $4 = target basename (may be empty) +# $5 = additional basenames (either symlinked or copied, depending on what the +# platform supports) +# The $(origin n) dance is necessary to suppress warnings about undefined +# variables. +INSTALL_ITEM = \ + $(INSTALL_$(INSTALL_MODE)_PREFIX)$(call INSTALL_ENSURE_DIR,$\ + $(strip $(2)),$(if $(filter-out undefined,$(origin 3)),$(strip $(3))))$\ + $(call INSTALL_DESPATCH_$(INSTALL_MODE)_ITEM,$\ + $(strip $(1)),$\ + $(strip $(2)),$\ + $(if $(filter-out undefined,$(origin 3)),$(strip $(3))),$\ + $(if $(filter-out undefined,$(origin 4)),$(strip $(4))),$\ + $(if $(filter-out undefined,$(origin 5)),$(strip $(5)))) + +# INSTALL_ITEMS installs a series of files to a single directory +# $1 = source file(s) (may include directories and glob patterns) +# $2 = section (as for INSTALL_ITEM) +# $3 = directory within section (may be omitted) +# INSTALL_ITEMS is sometimes an alias for INSTALL_ITEM. For simplicity with +# undefined variable warnings, INSTALL_DESPATCH_foo_ITEMS is passed 5 parameters +# but $4 and $5 are always empty. +INSTALL_ITEMS = \ + $(INSTALL_$(INSTALL_MODE)_PREFIX)$(call INSTALL_ENSURE_DIR,$\ + $(strip $(2)),$(if $(filter-out undefined,$(origin 3)),$(strip $(3))))$\ + $(call INSTALL_DESPATCH_$(INSTALL_MODE)_ITEMS,$\ + $(strip $(1)),$\ + $(strip $(2)),$\ + $(if $(filter-out undefined,$(origin 3)),$(strip $(3))),,) + +# INSTALL_ITEMS_OPT is INSTALL_ITEMS, but does nothing if the source file(s) do +# not exist +INSTALL_ITEMS_OPT = \ + $(if $(wildcard $(1)),$(call INSTALL_ITEMS, \ + $(1), $(2), $(if $(filter-out undefined,$(origin 3)), $(3)))) + +INSTALL_ENSURE_DIR = \ + $(if $(filter undefined,$(origin DIR_CREATED_$(subst exec,,$(1))_$(2))),$\ + $(eval DIR_CREATED_$(1)_$(2):=)$\ + $(call INSTALL_DESPATCH_$(INSTALL_MODE)_MKDIR,$\ + $(subst exec,,$(1)),$(2))) + +# INSTALL_RM takes a single argument which may include glob patterns of files to +# be removed when performing a physical install. +INSTALL_RM = $(call INSTALL_DESPATCH_$(INSTALL_MODE)_RM,$(strip $(1))) + +# INSTALL_BEGIN and INSTALL_END are used in the root Makefile's install target +INSTALL_BEGIN = $(INSTALL_DESPATCH_$(INSTALL_MODE)_BEGIN) +INSTALL_END = $(INSTALL_DESPATCH_$(INSTALL_MODE)_END) + +# Normal installation +INSTALL_CMD_bin = $(INSTALL_PROG) +INSTALL_CMD_doc = $(INSTALL_DATA) +INSTALL_CMD_lib = $(INSTALL_DATA) +INSTALL_CMD_libexec = $(INSTALL_PROG) +INSTALL_CMD_man = $(INSTALL_DATA) +INSTALL_CMD_stublibs = $(INSTALL_PROG) + +INSTALL_SECTION_bin = $(INSTALL_BINDIR) +INSTALL_SECTION_doc = $(INSTALL_DOCDIR) +INSTALL_SECTION_lib = $(INSTALL_LIBDIR) +INSTALL_SECTION_libexec = $(INSTALL_LIBDIR) +INSTALL_SECTION_man = $(INSTALL_MANDIR) +INSTALL_SECTION_stublibs = $(INSTALL_STUBLIBDIR) + +QUOTE_SINGLE = '$(subst ','\'',$(1))' + +define NEWLINE + + +endef +SH_AND = && \$(NEWLINE) + +INSTALL_install_PREFIX = + +INSTALL_DESPATCH_install_RM = rm -f $(1) + +INSTALL_DESPATCH_install_MKDIR = \ + $(MKDIR) $(call QUOTE_SINGLE,$(INSTALL_SECTION_$(1))$(addprefix /,$(2))) \ + $(SH_AND) + +MK_LINK = \ + (cd "$(INSTALL_SECTION_$(2))$(addprefix /,$(3))" && \ + $(LN) $(call QUOTE_SINGLE,$(1)) $(call QUOTE_SINGLE,$(4))) + +INSTALL_DESPATCH_install_ITEM = \ + $(INSTALL_CMD_$(2)) $(1) \ + $(call QUOTE_SINGLE,$\ + $(INSTALL_SECTION_$(2))$(addprefix /,$(3))$(addprefix /,$(4))) \ + $(foreach link, $(5),$(SH_AND)$\ + $(call MK_LINK,$(if $(4),$(4),$(notdir $(1))),$(2),$(3),$(link))) + +INSTALL_DESPATCH_install_ITEMS = $(INSTALL_DESPATCH_install_ITEM) + +INSTALL_DESPATCH_install_BEGIN = @ +INSTALL_DESPATCH_install_END = @ + +INSTALL_display_PREFIX = @ + +INSTALL_DESPATCH_display_RM = @ + +INSTALL_DESPATCH_display_MKDIR = \ + echo $(call QUOTE_SINGLE,$\ + -> MKDIR $(INSTALL_SECTION_$(1))$(addprefix /,$(2))) $(SH_AND) + +MKLINK_display = \ + echo $(call QUOTE_SINGLE,-> LN \ + $(abspath $(INSTALL_SECTION_$(2))$(addprefix /,$(3))/$(1)) -> \ + $(if $(4),$(4),$(notdir $(1)))) + +INSTALL_DESPATCH_display_ITEM = \ + echo $(call QUOTE_SINGLE,-> INSTALL $(1) \ + $(INSTALL_SECTION_$(2))$(addprefix /,$(3))$(addprefix /,$(4))) \ + $(foreach link, $(5), && \ + $(call MKLINK_display,$(if $(4),$(4),$(notdir $(1))),$(2),$(3),$(link))) + +INSTALL_DESPATCH_display_ITEMS = $(INSTALL_DESPATCH_display_ITEM) + +INSTALL_DESPATCH_display_BEGIN = @ +INSTALL_DESPATCH_display_END = @ + +INSTALL_list_PREFIX = @ + +INSTALL_DESPATCH_list_RM = @ + +INSTALL_DESPATCH_list_MKDIR = + +MKLINK_list = \ + echo $(call QUOTE_SINGLE,-> \ + $(abspath $(INSTALL_SECTION_$(2))$(addprefix /,$(3))/$\ + $(if $(4),$(4),$(notdir $(1))))) + +INSTALL_DESPATCH_list_ITEM = \ + echo $(call QUOTE_SINGLE,-> \ + $(abspath $(INSTALL_SECTION_$(2))$(addprefix /,$(3))/$\ + $(if $(4),$(4),$(notdir $(1))))) \ + $(foreach link, $(5), && \ + $(call MKLINK_list,$(if $(4),$(4),$(notdir $(1))),$(2),$(3),$(link))) + +INSTALL_DESPATCH_list_ITEMS = \ + $(foreach file, $(wildcard $(1)), \ + echo $(call QUOTE_SINGLE,-> \ + $(INSTALL_SECTION_$(2))$(addprefix /,$(3))/$(notdir $(file)));) \ + true + +INSTALL_DESPATCH_list_BEGIN = @ +INSTALL_DESPATCH_list_END = @ + +OPAM_PACKAGE_NAME ?= ocaml-compiler + +# Generate $(OPAM_PACKAGE_NAME).install and $(OPAM_PACKAGE_NAME)-fixup.sh +# (INSTALL_MODE=opam) +# opam's .install format isn't quite rich enough at present to express the +# installation of the compiler. In particular, we can't install the doc files to +# doc/ocaml using a .install and we can't create symlinks. The things which +# can't be handled by the .install file are dealt with by the fixup script +# instead. + +INVOKE = $(strip $(1)) $(call QUOTE_SINGLE,$(strip $(2))) +ADD_LINE = $(call INVOKE, echo, $(2)) >> $(1) + +# RECORD_SYMLINK_TO_INSTALL +# $1 = file to install, implicitly relative to $(ROOTDIR) +# $2 = section +# $3 = subdirectory within $2 (may be empty) +# $4 = name to install $1 (must be specified) +# $5 = single name of symlink +# If symlinks are supported, $1 is ignored and the three pieces of information +# are recorded in create-symlinks: the directory, implicitly relative to the +# prefix, in which the symlink is to be created, the source file and name of the +# symlink. +# These can then be munged to a cd + ln combination in the fixup script. +# If symlinks are not supported, $1 is instead used to create an additional copy +# of the file, using the .install file. +ifeq "$(firstword $(LN))" "ln" +RECORD_SYMLINK_TO_INSTALL = \ + $(call ADD_LINE, $(ROOTDIR)/create-symlinks, \ + $(patsubst lib%,lib,$(2))$(addprefix /,$(3)) $(4) $(5)) +else +# Symlinks aren't available, so copy the file again using the target name +RECORD_SYMLINK_TO_INSTALL = \ + $(call RECORD_$(INSTALL_MODE)_ITEM_TO_INSTALL,$(1),$(2),$(3),$(5)) +endif + +# Process the arguments to pass to RECORD_$(INSTALL_MODE)_ITEM_TO_INSTALL: +# - Items installed to the stublibs section need to be remapped to the stublibs +# subdirectory of libexec (since we install to lib/ocaml/stublibs rather than +# opam's default lib/stublibs) +# - Source files must be given implicitly relative to $(ROOTDIR), so prefix with +# $(SUBDIR_NAME) if necessary +# - Items installed to the lib/libexec sections will in fact be installed to +# lib_root/libexec_root, so remap the installation directory to ocaml (i.e. to +# install to lib/ocaml rather than lib) +# - If no target basename has been explictly given, use the source's basename +RECORD_ITEM_TO_INSTALL = \ + $(if $(filter stublibs,$(2)),\ + $(call RECORD_ITEM_TO_INSTALL,$\ + $(1),libexec,stublibs$(addprefix /,$(3)),$(4),$(5)),\ + $(call RECORD_$(INSTALL_MODE)_ITEM_TO_INSTALL,$\ + $(addsuffix /,$(SUBDIR_NAME))$(1),$\ + $(2),$\ + $(if $(filter doc lib%,$(2)),ocaml$(addprefix /,$(3)),$(3)),$\ + $(if $(4),$(4),$(notdir $(1))),$\ + $(5))) + +# All files must be explicitly installed, so evaluate the wildcards and call +# INSTALL_DESPATCH_opam_ITEM for each file. +INSTALL_EVALUATE_GLOBS = \ + $(foreach file, $(wildcard $(1)), \ + $(call INSTALL_DESPATCH_$(INSTALL_MODE)_ITEM,$(file),$(2),$(3));) \ + true + +# RECORD_FILE_TO_INSTALL +# $1 = file to install, implicitly relative to $(ROOTDIR) +# $2 = bin/lib/libexec/man +# $3 = subdirectory within $2 (may be empty, but otherwise must end with "/") +# $4 = name to install $1 (must be specified) +# Writes an opam .install line to the section file for $(2). Each line consists +# of a double-quoted implicit filename relative to $(ROOTDIR) and optionally a +# second double-quoted implicit filename relative to the $(2) for the name to +# install the file under. +# e.g. "lex/ocamllex" {"ocamllex.byte"} or "expunge" {"ocaml/expunge"} +RECORD_FILE_TO_INSTALL = \ + $(call ADD_LINE, $(ROOTDIR)/opam-$(2), \ + "$(1)" $(if $(3)$(filter-out $(notdir $(1)),$(4)), {"$(3)$(4)"})) + +# RECORD_FILE_TO_CLONE +# $1 = file to install, implicitly relative to $(ROOTDIR) +# $2 = subdirectory (may be empty, but otherwise must end with "/") +# $3 = name to install $1 (must be specified) +# The compiler is installed as the ocaml package in opam, but the actual files +# are installed from other packages (typically ocaml-compiler). For the the lib +# directory, the lib_root and libexec_root sections allow files to be installed +# to lib/ocaml, but there's no equivalent mechanism for the doc directory. These +# files are recorded to be copied manually in the fixup script. +RECORD_FILE_TO_CLONE = \ + $(call ADD_LINE, $(ROOTDIR)/clone-$(subst /,@,$(2)), $(1) $(3)) + +# RECORD_opam_ITEM_TO_INSTALL despatches the processed arguments of +# INSTALL_DESPATCH_opam_ITEM to the appropriate RECORD_ macro. +RECORD_opam_ITEM_TO_INSTALL = \ + $(if $(filter doc,$(2)),\ + $(call RECORD_FILE_TO_CLONE,$(1),doc/$(3),$(4)), \ + $(call RECORD_FILE_TO_INSTALL,$(1),$(2),$(addsuffix /,$(3)),$(4))) \ + $(foreach link, $(5), && \ + $(call RECORD_SYMLINK_TO_INSTALL,$(1),$(2),$(3),$(4),$(link))) + +INSTALL_DESPATCH_opam_ITEM = $(RECORD_ITEM_TO_INSTALL) + +INSTALL_DESPATCH_opam_ITEMS = $(INSTALL_EVALUATE_GLOBS) + +INSTALL_opam_PREFIX = @ + +INSTALL_DESPATCH_opam_RM = @ + +# INSTALL_MKDIR is ignored (opam creates them when executing the .install file) +INSTALL_DESPATCH_opam_MKDIR = + +INSTALL_DESPATCH_opam_BEGIN = \ + rm -f opam-bin clone-* opam-lib opam-libexec opam-man create-symlinks + +# Munge opam-bin, opam-lib, opam-libexec and opam-man into a .install file and +# then munge clone-* and create-symlinks into the fixup script. +INSTALL_DESPATCH_opam_END = \ + $(OCAMLRUN) ./ocaml$(EXE) $(STDLIBFLAGS) \ + tools/opam/generate.ml $(INSTALL_MODE) $(OPAM_PACKAGE_NAME) '$(LN)' + +# Generate $(OPAM_PACKAGE_NAME)-clone.sh (INSTALL_MODE=clone) + +# ld.conf is explicitly copied, rather than cloned, to allow (in principle, if +# not in practice) the cloning installation to edit it. +RECORD_clone_ITEM_TO_INSTALL = \ + $(if $(filter runtime/ld.conf Makefile.config, $(1)), true, \ + $(if $(filter libexec,$(2)), \ + $(call RECORD_clone_ITEM_TO_INSTALL,$(1),lib,$(3),$(4),$(5)), \ + $(call ADD_LINE, \ + $(ROOTDIR)/clone-$(2)$(addprefix @,$(subst /,@,$(3))), \ + $(2)$(addprefix /,$(3))/$(if $(4),$(4),$(notdir $(1)))) \ + $(foreach link, $(5), && \ + $(call RECORD_SYMLINK_TO_INSTALL,$(1),$(2),$(3),$(4),$(link))))) + +INSTALL_DESPATCH_clone_ITEM = $(RECORD_ITEM_TO_INSTALL) + +INSTALL_DESPATCH_clone_ITEMS = $(INSTALL_EVALUATE_GLOBS) + +INSTALL_clone_PREFIX = @ + +INSTALL_DESPATCH_clone_RM = @ + +# INSTALL_MKDIR is ignored - INSTALL_DESPATCH_clone_END automatically creates +# directories for each cp file. +INSTALL_DESPATCH_clone_MKDIR = + +INSTALL_DESPATCH_clone_BEGIN = rm -f clone-* create-symlinks + +INSTALL_DESPATCH_clone_END = $(INSTALL_DESPATCH_opam_END) FLEXDLL_SUBMODULE_PRESENT := $(wildcard $(ROOTDIR)/flexdll/Makefile) @@ -176,6 +498,26 @@ ifeq "$(FUNCTION_SECTIONS)" "true" OPTCOMPFLAGS += -function-sections endif +ifeq "$(TARGET_LIBDIR_IS_RELATIVE)" "true" + SRCDIR_ENCODED = $(subst =,%+,$(subst :,%.,$(subst %,%$(HASH),$(SRCDIR_ABS)))) + SRCDIR_ABS_REAL := $(shell realpath $(SRCDIR_ABS) 2>/dev/null) + SRCDIR_REAL_ENCODED = \ + $(subst =,%+,$(subst :,%.,$(subst %,%$(HASH),$(SRCDIR_ABS_REAL)))) + BUILD_PATH_PREFIX_MAP ?= + export BUILD_PATH_PREFIX_MAP := \ + $(BUILD_PATH_PREFIX_MAP)$\ + :.=$(SRCDIR_ENCODED)$\ + $(if $(SRCDIR_REAL_ENCODED),:.=$(SRCDIR_REAL_ENCODED)) +endif # ifeq "$(TARGET_LIBDIR_IS_RELATIVE)" "true" + +# Allow Makefile.cross to override the Standard Library default for the compiler +# itself. +HOST_LIBDIR ?= $(TARGET_LIBDIR) + +OC_COMMON_LINKFLAGS += \ + -set-runtime-default \ + $(call QUOTE_SINGLE,standard_library_default=$(HOST_LIBDIR)) + # The rule to compile C files # This rule is similar to GNU make's implicit rule, except that it is more @@ -319,8 +661,20 @@ $(eval $(call _OCAML_COMMON_BASE,$(1))) $(basename $(notdir $(1)))_COMMON_LINKFLAGS = endef # _OCAML_PROGRAM_BASE +# $(ROOTDIR)/ocamlc needs -launch-method to be given explicitly as its default +# values are those for the target (cf. --with-target-sh and TARGET_BINDIR). +BYTECODE_LAUNCHER_FLAGS = \ + -launch-method $(call QUOTE_SINGLE,$(LAUNCH_METHOD) $(BINDIR)) \ + -runtime-search $(if $(RUNTIME_SEARCH),$(RUNTIME_SEARCH),disable) + +MAYBE_ADD_BYTECODE_LAUNCHER_FLAGS = \ + $(if $(filter -custom, $(1)),,\ + -use-prims $(ROOTDIR)/runtime/primitives $(BYTECODE_LAUNCHER_FLAGS)) + LINK_BYTECODE_PROGRAM =\ - $(CAMLC) $(OC_COMMON_LINKFLAGS) $(OC_BYTECODE_LINKFLAGS) + $(CAMLC) $(OC_COMMON_LINKFLAGS) \ + $(call MAYBE_ADD_BYTECODE_LAUNCHER_FLAGS, \ + $(OC_COMMON_LINKFLAGS) $(OC_BYTECODE_LINKFLAGS)) $(OC_BYTECODE_LINKFLAGS) # The _OCAML_BYTECODE_PROGRAM macro defines a bytecode program but assuming # that _OCAML_PROGRAM_BASE has already been called. Its public counterpart @@ -344,8 +698,12 @@ $(basename $(notdir $(1)))_BYTECODE_LINKFLAGS = $(basename $(notdir $(1)))_BYTECODE_LINKCMD = \ $(strip \ - $$(CAMLC) $$(OC_COMMON_LINKFLAGS) $$(OC_BYTECODE_LINKFLAGS) \ - $$($(basename $(notdir $(1)))_COMMON_LINKFLAGS) \ + $$(CAMLC) $$(OC_COMMON_LINKFLAGS) \ + $$(call MAYBE_ADD_BYTECODE_LAUNCHER_FLAGS, \ + $$(OC_COMMON_LINKFLAGS) $$(OC_BYTECODE_LINKFLAGS) \ + $$($(basename $(notdir $(1)))_COMMON_LINKFLAGS) \ + $$($(basename $(notdir $(1)))_BYTECODE_LINKFLAGS)) \ + $$(OC_BYTECODE_LINKFLAGS) $$($(basename $(notdir $(1)))_COMMON_LINKFLAGS) \ $$($(basename $(notdir $(1)))_BYTECODE_LINKFLAGS)) $(1)$(EXE): $$$$($(basename $(notdir $(1)))_BCOBJS) @@ -471,12 +829,11 @@ $(eval $(call _OCAML_BYTECODE_LIBRARY,$(1))) $(eval $(call _OCAML_NATIVE_LIBRARY,$(1))) endef # OCAML_LIBRARY -# Installing a bytecode executable, with debug information removed -define INSTALL_STRIPPED_BYTE_PROG -$(OCAMLRUN) $(ROOTDIR)/tools/stripdebug$(EXE) $(1) $(1).tmp \ -&& $(INSTALL_PROG) $(1).tmp $(2) \ -&& rm $(1).tmp -endef # INSTALL_STRIPPED_BYTE_PROG +# Strip debug information from a bytecode executable +define STRIP_BYTE_PROG +$(OCAMLRUN) $(ROOTDIR)/tools/stripdebug$(EXE) \ + $(strip $(1)) $(strip $(1)).stripped +endef # STRIP_BYTE_PROG # ocamlc has several mechanisms for linking a bytecode image to the runtime # which executes it. The exact mechanism depends on the platform and the precise diff --git a/Makefile.config.in b/Makefile.config.in index f6473a490bdb..cc7d9134f6ee 100644 --- a/Makefile.config.in +++ b/Makefile.config.in @@ -220,6 +220,10 @@ FUNCTION_SECTIONS=@function_sections@ AWK=@AWK@ NAKED_POINTERS=false +# Runtime ID values +BYTECODE_RUNTIME_ID=@bytecode_runtime_id@ +NATIVE_RUNTIME_ID=@native_runtime_id@ + # Deprecated variables ## Variables deprecated since OCaml 5.3 diff --git a/Makefile.cross b/Makefile.cross index df9b242ceabd..793a18ff674a 100644 --- a/Makefile.cross +++ b/Makefile.cross @@ -40,8 +40,15 @@ VPATH := + $(VPATH) CROSS_OVERRIDES=OCAMLRUN=ocamlrun NEW_OCAMLRUN=ocamlrun \ BOOT_OCAMLLEX=ocamllex OCAMLYACC=ocamlyacc +# The cross-compiler is linked as build/bin/ocamlopt -o cross/bin/ocamlopt +# Config.standard_library_default for cross/bin/ocamlopt would be default be the +# value for build/bin/ocamlopt (i.e. build/lib/ocaml), which is not what is +# wanted. When linking the cross-compiler itself, therefore, this default must +# be overridden with -set-runtime-default so that cross/bin/ocamlopt instead has +# cross/lib/ocaml for Config.standard_library_default CROSS_COMPILER_OVERRIDES=$(CROSS_OVERRIDES) CAMLC=ocamlc CAMLOPT=ocamlopt \ - BEST_OCAMLC=ocamlc BEST_OCAMLOPT=ocamlopt BEST_OCAMLLEX=ocamllex + BEST_OCAMLC=ocamlc BEST_OCAMLOPT=ocamlopt BEST_OCAMLLEX=ocamllex \ + HOST_LIBDIR="$(LIBDIR)" CROSS_COMPILERLIBS_OVERRIDES=$(CROSS_OVERRIDES) CAMLC=ocamlc \ CAMLOPT="$(ROOTDIR)/ocamlopt.opt$(EXE) $(STDLIBFLAGS)" @@ -62,10 +69,10 @@ CROSSCOMPILERLIBS := $(addprefix compilerlibs/,$(addsuffix .cmxa,\ .PHONY: crossopt ifeq "$(BOOTSTRAPPING_FLEXDLL)" "true" crossopt: cross-flexdll - $(MAKE) runtime-all $(OLDS) + $(MAKE) runtime $(OLDS) else # In that case, $(OLDS) is empty, we can depend directly on runtime-all -crossopt: runtime-all +crossopt: runtime endif $(MAKE) ocamlc $(TOOLS_BYTECODE_TARGETS) expunge$(EXE) \ $(CROSS_COMPILER_OVERRIDES) $(OLDS) diff --git a/api_docgen/Makefile b/api_docgen/Makefile index 07254645812a..4ad898ea6bfd 100644 --- a/api_docgen/Makefile +++ b/api_docgen/Makefile @@ -14,6 +14,7 @@ #************************************************************************** # Used by included Makefiles ROOTDIR = .. +SUBDIR_NAME = api_docgen -include ../Makefile.build_config odoc-%: diff --git a/api_docgen/ocamldoc/Makefile b/api_docgen/ocamldoc/Makefile index 058f88c26d95..f173b7690001 100644 --- a/api_docgen/ocamldoc/Makefile +++ b/api_docgen/ocamldoc/Makefile @@ -14,6 +14,7 @@ #************************************************************************** # Used by included Makefiles ROOTDIR = ../.. +SUBDIR_NAME = api_docgen/ocamldoc include ../Makefile.common vpath %.mli ../../stdlib $(DOC_COMPILERLIBS_DIRS) $(DOC_STDLIB_DIRS) @@ -121,7 +122,4 @@ build/latex/compilerlibs_input.tex: | build/latex .PHONY: install install: - $(MKDIR) "$(INSTALL_LIBRARIES_MAN_DIR)" - if test -d build/man; then \ - $(INSTALL_DATA) build/man/*.3o "$(INSTALL_LIBRARIES_MAN_DIR)"; \ - fi + $(call INSTALL_ITEMS_OPT, build/man/*.3o, man, $(INSTALL_MANDIR_LIBRARIES)) diff --git a/api_docgen/odoc/Makefile b/api_docgen/odoc/Makefile index c40ed778c41e..8f2c0195571a 100644 --- a/api_docgen/odoc/Makefile +++ b/api_docgen/odoc/Makefile @@ -15,6 +15,7 @@ # Used by included Makefiles ROOTDIR = ../.. +SUBDIR_NAME = api_docgen/odoc include ../Makefile.common @@ -191,13 +192,10 @@ $(ALL_PAGED_DOC:%=build/%.3o.stamp):build/%.3o.stamp:build/%.odocl | build/ # Man pages are the only installed documentation .PHONY: install install: - $(MKDIR) "$(INSTALL_LIBRARIES_MAN_DIR)" - if test -d build/man/libref ; then \ - $(INSTALL_DATA) build/man/libref/* "$(INSTALL_LIBRARIES_MAN_DIR)"; \ - fi - if test -d build/man/compilerlibref ; then \ - $(INSTALL_DATA) build/man/libref/* "$(INSTALL_LIBRARIES_MAN_DIR)"; \ - fi + $(call INSTALL_ITEMS_OPT, \ + build/man/libref/*, man, $(INSTALL_MANDIR_LIBRARIES)) + $(call INSTALL_ITEMS_OPT, \ + build/man/compilerlibref/*, man, $(INSTALL_MANDIR_LIBRARIES)) # Dependencies for stdlib modules. # Use the same dependencies used for compiling .cmx files. diff --git a/appveyor.yml b/appveyor.yml index 10b984728fa6..bfef4674d4af 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -17,6 +17,13 @@ branches: - /4\.*/ - /5\.*/ - trunk + - relocatable-base-trunk + - relocatable-testing + - installation-tests + - installation-tests-unified-header + +# Do not build feature branch with open Pull Requests +skip_branch_with_pr: true # Compile the 64 bits version platform: @@ -41,6 +48,7 @@ environment: matrix: - PORT: mingw64 BOOTSTRAP_FLEXDLL: true + RELOCATABLE: true # OCaml 5.0 does not yet support MSVC # - PORT: msvc64 # BOOTSTRAP_FLEXDLL: false @@ -52,7 +60,7 @@ environment: # "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x86 matrix: - fast_finish: true + fast_finish: false cache: - C:\cygwin64\var\cache\setup diff --git a/asmcomp/asmlink.ml b/asmcomp/asmlink.ml index 498a8e0da7ca..e8febce846ce 100644 --- a/asmcomp/asmlink.ml +++ b/asmcomp/asmlink.ml @@ -105,12 +105,18 @@ let add_ccobjs origin l = end let runtime_lib () = - let libname = "libasmrun" ^ !Clflags.runtime_variant ^ ext_lib in - try - if !Clflags.nopervasives || not !Clflags.with_runtime then [] - else [ Load_path.find libname ] - with Not_found -> - raise(Error(File_not_found libname)) + if !Clflags.runtime_variant = "_shared" then + if Config.suffixing then + [Misc.RuntimeID.shared_runtime Sys.Native] + else + ["-lasmrun_shared"] + else + let libname = "libasmrun" ^ !Clflags.runtime_variant ^ ext_lib in + try + if !Clflags.nopervasives || not !Clflags.with_runtime then [] + else [ Load_path.find libname ] + with Not_found -> + raise(Error(File_not_found libname)) (* First pass: determine which units are needed *) @@ -198,6 +204,13 @@ let make_globals_map units_list ~crc_interfaces = crc_interfaces defined let make_startup_file ~ppf_dump units_list ~crc_interfaces = + let need_stdlib = + let needs_stdlib = function + | ({ui_need_stdlib = true; _}, _, _) -> true + | _ -> false + in + List.exists needs_stdlib units_list + in let compile_phrase p = Asmgen.compile_phrase ~ppf_dump p in Location.input_name := "caml_startup"; (* set name of "current" input *) Compilenv.reset "_startup"; @@ -224,6 +237,17 @@ let make_startup_file ~ppf_dump units_list ~crc_interfaces = Array.iteri (fun i name -> compile_phrase (Cmm_helpers.predef_exception i name)) Runtimedef.builtin_exceptions; + compile_phrase (Cmm_helpers.emit_global_char_os_constant + "caml_executable_ocamlrunparam" + (Compenv.overridden_runtime_parameters ())); + if need_stdlib then begin + let standard_library_default = + Option.value ~default:Config.standard_library_effective + !Clflags.standard_library_default in + compile_phrase + (Cmm_helpers.emit_global_string_constant + "caml_standard_library_nat" standard_library_default) + end; compile_phrase (Cmm_helpers.global_table name_list); let globals_map = make_globals_map units_list ~crc_interfaces in compile_phrase (Cmm_helpers.globals_map globals_map); diff --git a/asmcomp/asmpackager.ml b/asmcomp/asmpackager.ml index f0e2148f447c..6ea19f9f79b9 100644 --- a/asmcomp/asmpackager.ml +++ b/asmcomp/asmpackager.ml @@ -217,6 +217,9 @@ let build_package_cmx members cmxfile = else Clambda (get_approx ui) in + let ui_need_stdlib = + List.fold_left (fun acc unit -> acc || unit.ui_need_stdlib) false units + in Export_info_for_pack.clear_import_state (); let pkg_infos = { ui_name = ui.ui_name; @@ -239,6 +242,7 @@ let build_package_cmx members cmxfile = List.exists (fun info -> info.ui_force_link) units; ui_export_info; ui_for_pack = None; + ui_need_stdlib; } in Compilenv.write_unit_info pkg_infos cmxfile diff --git a/asmcomp/cmm_helpers.ml b/asmcomp/cmm_helpers.ml index f1c89ca3367f..1b1b9f0cb3ce 100644 --- a/asmcomp/cmm_helpers.ml +++ b/asmcomp/cmm_helpers.ml @@ -2660,6 +2660,40 @@ let predef_exception i name = in Cdata data_items +let emit_global_string_constant name value = + Cdata (emit_string_constant (name, Global) value []) + +module String = Misc.Stdlib.String + +let os_bytes_of_string = + if Config.target_win32 then + fun s -> + let b = Buffer.create (String.length s * 2) in + Seq.iter (Buffer.add_utf_16le_uchar b) (String.to_utf_8_seq s); + Buffer.add_utf_16le_uchar b (Uchar.of_int 0); + Buffer.contents b + else + fun s -> s ^ "\000" + +let emit_global_char_os_constant name value = + let data = + Cglobal_symbol name + :: Cdefine_symbol name + :: match value with + | Some s -> + if String.is_valid_utf_8 s then + let value = os_bytes_of_string s in + let value_sym = Compilenv.new_const_symbol () in + [Csymbol_address value_sym; + Cdefine_symbol value_sym; + Cstring value] + else + invalid_arg "Cmm_helpers.emit_global_string_constant" + | None -> + [cint_zero] + in + Cdata data + (* Header for a plugin *) let plugin_header units = diff --git a/asmcomp/cmm_helpers.mli b/asmcomp/cmm_helpers.mli index 1f5ac3da4d30..d6a255cce906 100644 --- a/asmcomp/cmm_helpers.mli +++ b/asmcomp/cmm_helpers.mli @@ -628,6 +628,13 @@ val code_segment_table: string list -> phrase (** Generate data for a predefined exception *) val predef_exception: int -> string -> phrase +(** Generate data for a global NUL-terminated string represented using [char_os] + (i.e. [wchar_t] on Windows; [char] on Unix). Must be valid UTF-8. *) +val emit_global_char_os_constant: string -> string option -> phrase + +(** Generate data for a global string constant *) +val emit_global_string_constant: string -> string -> phrase + val plugin_header: (Cmx_format.unit_infos * Digest.BLAKE128.t) list -> phrase (** Emit constant symbols *) diff --git a/boot/ocamlc b/boot/ocamlc index 144ac0e5655e..1a2618264725 100755 Binary files a/boot/ocamlc and b/boot/ocamlc differ diff --git a/boot/ocamllex b/boot/ocamllex index 92aa2d4b48b8..bd0aa01f1da4 100755 Binary files a/boot/ocamllex and b/boot/ocamllex differ diff --git a/build-aux/ocaml_version.m4 b/build-aux/ocaml_version.m4 index 96c17028ea04..e754178b5374 100644 --- a/build-aux/ocaml_version.m4 +++ b/build-aux/ocaml_version.m4 @@ -29,11 +29,14 @@ m4_define([OCAML__DEVELOPMENT_VERSION], [true]) # The three following components (major, minor and patch level) MUST be # integers. They MUST NOT be left-padded with zeros and all of them, -# including the patchlevel, are mandatory. +# including the patchlevel, are mandatory. OCAML__RELEASE_NUMBER must be +# incremented with each minor release, and likewise must be an unpadded integer. m4_define([OCAML__VERSION_MAJOR], [5]) m4_define([OCAML__VERSION_MINOR], [5]) +m4_define([OCAML__RELEASE_NUMBER], [21]) m4_define([OCAML__VERSION_PATCHLEVEL], [0]) + # Note that the OCAML__VERSION_EXTRA string defined below is always empty # for officially-released versions of OCaml. m4_define([OCAML__VERSION_EXTRA], [dev0-2025-04-28]) diff --git a/bytecomp/bytegen.ml b/bytecomp/bytegen.ml index b173ae0507c4..bba38494db4b 100644 --- a/bytecomp/bytegen.ml +++ b/bytecomp/bytegen.ml @@ -439,7 +439,9 @@ let comp_primitive stack_info p sz args = | Ostype_unix -> "ostype_unix" | Ostype_win32 -> "ostype_win32" | Ostype_cygwin -> "ostype_cygwin" - | Backend_type -> "backend_type" in + | Backend_type -> "backend_type" + | Standard_library_default -> "standard_library_default" + | Shared_libraries -> "shared_libraries" in Kccall(Printf.sprintf "caml_sys_const_%s" const_name, 1) | Pisint -> Kisint | Pisout -> Kisout diff --git a/bytecomp/bytelink.ml b/bytecomp/bytelink.ml index ff491aea5a5f..285ba36652e9 100644 --- a/bytecomp/bytelink.ml +++ b/bytecomp/bytelink.ml @@ -19,6 +19,7 @@ open Misc open Config open Cmo_format +module String = Misc.Stdlib.String module Compunit = Symtable.Compunit module Dep = struct @@ -201,7 +202,7 @@ let debug_info = ref ([] : (int * Instruct.debug_event list * string list) list) (* Link in a compilation unit *) -let link_compunit output_fun currpos_fun inchan file_name compunit = +let link_compunit accu output_fun currpos_fun inchan file_name compunit = check_consistency file_name compunit; seek_in inchan compunit.cu_pos; let code_block = @@ -227,46 +228,45 @@ let link_compunit output_fun currpos_fun inchan file_name compunit = debug_info := (currpos_fun(), debug_event_list, debug_dirs) :: !debug_info end; output_fun code_block; - if !Clflags.link_everything then - List.iter Symtable.require_primitive compunit.cu_primitives + let fold_primitive (needs_stdlib, uses_dynlink) name = + if !Clflags.link_everything then + Symtable.require_primitive name; + (needs_stdlib || name = "%standard_library_default", + uses_dynlink || name = "caml_reify_bytecode") + in + List.fold_left fold_primitive accu compunit.cu_primitives (* Link in a .cmo file *) -let link_object output_fun currpos_fun file_name compunit = - let inchan = open_in_bin file_name in - try - link_compunit output_fun currpos_fun inchan file_name compunit; - close_in inchan - with - Symtable.Error msg -> - close_in inchan; raise(Error(Symbol_error(file_name, msg))) - | x -> - close_in inchan; raise x +let link_object accu output_fun currpos_fun file_name compunit = + In_channel.with_open_bin file_name @@ fun inchan -> + try link_compunit accu output_fun currpos_fun inchan file_name compunit + with Symtable.Error msg -> raise(Error(Symbol_error(file_name, msg))) (* Link in a .cma file *) -let link_archive output_fun currpos_fun file_name units_required = - let inchan = open_in_bin file_name in - try - List.iter - (fun cu -> +let link_archive accu output_fun currpos_fun file_name units_required = + In_channel.with_open_bin file_name @@ fun inchan -> + List.fold_left + (fun accu cu -> let n = Compunit.name cu.cu_name in let name = file_name ^ "(" ^ n ^ ")" in try - link_compunit output_fun currpos_fun inchan name cu + link_compunit accu output_fun currpos_fun inchan name cu with Symtable.Error msg -> raise(Error(Symbol_error(name, msg)))) - units_required; - close_in inchan - with x -> close_in inchan; raise x + accu units_required (* Link in a .cmo or .cma file *) -let link_file output_fun currpos_fun = function +let link_file output_fun currpos_fun accu = function Link_object(file_name, unit) -> - link_object output_fun currpos_fun file_name unit + link_object accu output_fun currpos_fun file_name unit | Link_archive(file_name, units) -> - link_archive output_fun currpos_fun file_name units + link_archive accu output_fun currpos_fun file_name units + +let link_files output_fun currpos_fun = + List.fold_left (link_file output_fun currpos_fun) (false, false) (* Output the debugging information *) (* Format is: @@ -294,13 +294,6 @@ type launch_method = | Shebang_runtime | Executable -type runtime_launch_info = { - buffer : string; - bindir : string; - launcher : launch_method; - executable_offset : int -} - (* See https://www.in-ulm.de/~mascheck/various/shebang/#origin for a deep dive into shebangs. - Whitespace (space or horizontal tab) delimits the interpreter from an @@ -313,60 +306,23 @@ let invalid_for_shebang_line path = let invalid_char = function ' ' | '\t' | '\n' -> true | _ -> false in String.length path > 125 || String.exists invalid_char path -(* The runtime-launch-info file consists of two "lines" followed by binary data. - The file is _always_ LF-formatted, even on Windows. The sequence of bytes up - to the first '\n' is interpreted: - - "sh" - use a shebang-style launcher. If sh is needed, determine its - location from [command -p -v sh] - - "exe" - use the executable launcher contained in this runtime-launch-info - file. - - "/" ^ path - use a shebang-style launcher. If sh is needed, path is the - absolute location of sh. path must be valid for a shebang - line. - The second "line" is interpreted as the next "\000\n"-terminated sequence and - is the directory containing the default runtimes (ocamlrun, ocamlrund, etc.). - The null terminator is used since '\n' is valid in a nefarious installation - prefix but Posix forbids filenames including the nul character. - The remainder of the file is then the executable launcher for bytecode - programs (see stdlib/header{,nt}.c). *) - -let read_runtime_launch_info file = - let buffer = - try - In_channel.with_open_bin file In_channel.input_all - with Sys_error msg -> raise (Error (Camlheader (msg, file))) - in - try - let bindir_start = String.index buffer '\n' + 1 in - let bindir_end = String.index_from buffer bindir_start '\000' in - let bindir = String.sub buffer bindir_start (bindir_end - bindir_start) in - let executable_offset = bindir_end + 2 in - let launcher = - let kind = String.sub buffer 0 (bindir_start - 1) in - if kind = "exe" then - Executable - else if kind <> "" && (kind.[0] = '/' || kind = "sh") then - Shebang_bin_sh kind - else - raise Not_found in - if String.length buffer < executable_offset - || buffer.[executable_offset - 1] <> '\n' then - raise Not_found - else - {bindir; launcher; buffer; executable_offset} - with Not_found -> - raise (Error (Camlheader ("corrupt header", file))) - let find_bin_sh () = let output_file = Filename.temp_file "caml_bin_sh" "" in let result = try - let cmd = - Filename.quote_command ~stdout:output_file "command" ["-p"; "-v"; "sh"] + let run command args = + let cmd = + Filename.quote_command ~stdout:output_file command args + in + if !Clflags.verbose then + Printf.eprintf "+ %s\n" cmd; + (Sys.command cmd = 0) in - if !Clflags.verbose then - Printf.eprintf "+ %s\n" cmd; - if Sys.command cmd = 0 then + (* While [command -v] and [command -p] are long-standing Posix commands, + the ability to combine them as [command -p -v] is actually Posix Issue 7 + and so of course Solaris does not support it *) + if run "command" ["-p"; "-v"; "sh"] || + run "sh" ["-c"; "PATH=\"`getconf PATH`\" command -v sh"] then In_channel.with_open_text output_file input_line else "" @@ -376,45 +332,149 @@ let find_bin_sh () = remove_file output_file; result +(* Writes the shell script version of the bytecode launcher to outchan *) +let write_sh_launcher outchan bin_sh bindir search runtime = + let open struct type tag = D | A | E end in + let l tag fmt = + let output s = + if tag = D || tag = A && search <> Config.Absolute + || tag = E && search = Config.Absolute_then_search then begin + output_string outchan (String.trim s); + output_char outchan '\n' + end + in + Printf.ksprintf output fmt + in + let runtime = Filename.quote runtime in + let bin = Filename.quote (Filename.concat bindir "") in + let exec = + if search = Config.Absolute then + runtime + else + {|"$c"|} + in + let release = + Printf.sprintf "%d.%d" Sys.ocaml_release.major Sys.ocaml_release.minor + in + (* Each of the three search modes requires a slightly different shell script. + However, these shell scripts do have one very useful property: the script + for Absolute_then_search adds lines to the script for Search which adds + lines to the script for Absolute, but none of them change lines (apart from + a trivial tweak to the exec line for the Absolute script). + The lines below are laid out to reflect this, with the tag letters + D(isable) for the lines in the Absolute script, A(lways) for the lines in + Search script and E(nable) for the Absolute_then_search script. If a line + is emitted, it is first passed to String.trim, which allows indentation and + a column-based layout to be used. + + The Absolute script just needs to exec the runtime. The two searching modes + do a few more calculations and will ultimately exec the contents of $c + (which is why exec_arg above is set to the literal string {v "$c" v}). + + In the script itself: + - $r is the name of the runtime ('ocamlrun', 'ocamlrund', etc.) + - $d is calculated in the script as $(dirname "$0") - i.e. the directory + containing the bytecode executable itself + - $c will ultimately be the runtime to exec. If it is empty, then the + script displays an error message. In Absolute_then_search, $c will be the + first runtime to try (i.e. the runtime in bindir), and the bindir passed + must end with a separator (which is ensured by Filename.concat above) + + The script tries up to three options: + - exec $c, if it exists (prefer the runtime in bindir) + - exec $d/$r, if it exists (prefer a runtime in the same directory + as the bytecode executable) + - otherwise try $(command -v "$r") (search PATH for the runtime) + + If the script fails to find an interpreter, $c will always be empty + (since [command -v] will have returned an empty string) and an + error message can be displayed. *) + l D {|#!%s |} bin_sh; + l A {|r=%s |} runtime; + l E {|c=%s"$r" |} bin; + l E {|if ! test -f "$c"; then |}; + l A {| d="$(dirname "$0" 2>/dev/null)" |}; + l A {| test -z "$d" || d="${d%%/}/" |}; + l A {| c="$(command -v "$d$r")" |}; + l A {| test -n "$c" || c="$(command -v "$r")" |}; + l E {|fi |}; + l A {|if test -z "$c"; then |}; + l A {| echo 'This program requires an OCaml %s interpreter'>&2|} release; + l A {| echo "$r not found either with $0 or in \$PATH">&2 |}; + l A {|else |}; + l D {| exec %s "$0" "$@" |} exec; + l A {|fi |}; + l A {|exit 126 |} + (* Writes the executable header to outchan and writes the RNTM section, if needed. Returns a toc_writer (i.e. Bytesections.init_record is always called) *) let write_header outchan = - let use_runtime, runtime = - if String.length !Clflags.use_runtime > 0 then - (* Do not use BUILD_PATH_PREFIX_MAP mapping for this. *) - let make_absolute file = - if Filename.is_relative file then Filename.concat (Sys.getcwd()) file - else file in - (true, make_absolute !Clflags.use_runtime) - else - (false, "ocamlrun" ^ !Clflags.runtime_variant) - in - (* Write the header *) - let runtime_info = - let header = "runtime-launch-info" in - try read_runtime_launch_info (Load_path.find header) - with Not_found -> raise (Error (File_not_found header)) - in - let runtime = - (* Historically, the native Windows ports are assumed to be finding - ocamlrun using a PATH search. *) - if use_runtime || Sys.win32 then - runtime - else - Filename.concat runtime_info.bindir runtime + let zinc_runtime_id, write_exe_launcher = + let header = + let header = "runtime-launch-info" in + try Load_path.find header + with Not_found -> raise (Error (File_not_found header)) + in + let data = + try In_channel.with_open_bin header In_channel.input_all + with Sys_error msg -> raise (Error (Camlheader (msg, header))) + in + let zinc_runtime_id, offset = + if String.length data < 2 then + raise (Error (Camlheader ("corrupt header", header))) + else if data.[0] = '\000' then + None, 1 + else + let zinc = Misc.RuntimeID.of_zinc_hi (String.sub data 0 2) in + if Option.fold ~none:false ~some:Misc.RuntimeID.is_zinc zinc then + zinc, 2 + else + raise (Error (Camlheader ("corrupt header", header))) + in + let write_exe_header outchan = + let len = String.length data in + Out_channel.output_substring outchan data offset (len - offset) + in + zinc_runtime_id, write_exe_header in (* Determine which method will be used for launching the executable: Executable: concatenate the bytecode image to the executable stub Shebang_runtime: #! line with the required runtime Shebang_bin_sh: #! for a shell script calling exec *) + let launcher, bindir = + match !Clflags.launch_method with + | Config.Executable, bindir -> + Executable, bindir + | Config.Shebang sh, bindir -> + Shebang_bin_sh (Option.value ~default:"sh" sh), bindir + in + let runtime, search = + if String.length !Clflags.use_runtime > 0 then + (* Do not use BUILD_PATH_PREFIX_MAP mapping for this. *) + let runtime = !Clflags.use_runtime in + if Filename.is_relative runtime then + Filename.concat (Sys.getcwd ()) runtime, Config.Absolute + else + runtime, Config.Absolute + else + let runtime = + let runtime = "ocamlrun" ^ !Clflags.runtime_variant in + let some = Misc.RuntimeID.ocamlrun !Clflags.runtime_variant in + Option.fold ~none:runtime ~some zinc_runtime_id + in + if !Clflags.search_method <> Config.Absolute then + runtime, !Clflags.search_method + else + Filename.concat bindir runtime, Config.Absolute + in let launcher = - if runtime_info.launcher = Executable then + if launcher = Executable then Executable else - if invalid_for_shebang_line runtime then - match runtime_info.launcher with + if search <> Config.Absolute || invalid_for_shebang_line runtime then + match launcher with | Shebang_bin_sh sh -> let sh = if sh = "sh" then @@ -430,25 +490,37 @@ let write_header outchan = else Shebang_runtime in + (* Write the header *) match launcher with | Shebang_runtime -> + assert (search = Config.Absolute); (* Use the runtime directly *) Printf.fprintf outchan "#!%s\n" runtime; Bytesections.init_record outchan | Shebang_bin_sh bin_sh -> - (* exec the runtime using sh *) - Printf.fprintf outchan "\ - #!%s\n\ - exec %s \"$0\" \"$@\"\n" bin_sh (Filename.quote runtime); + (* Use the shebang launcher *) + write_sh_launcher outchan bin_sh bindir search runtime; Bytesections.init_record outchan | Executable -> (* Use the executable stub launcher *) - let pos = runtime_info.executable_offset in - let len = String.length runtime_info.buffer - pos in - Out_channel.output_substring outchan runtime_info.buffer pos len; + write_exe_launcher outchan; (* The runtime name needs recording in RNTM *) let toc_writer = Bytesections.init_record outchan in - Printf.fprintf outchan "%s\000" runtime; + (* stdlib/header.c determines which mode is needed based on whether the + RNTM section contains an embedded NUL character. For Absolute, the path + is written verbatim (no extra NUL), otherwise the directory separator + just before the basename is effectively turned into a NUL (for Search, + there is no dirname, so the string "begins" with a NUL character). *) + if search = Absolute then + output_string outchan runtime + else begin + if search = Absolute_then_search then + (* Ensure bindir does _not_ end up with a separator *) + output_string outchan + (Filename.(dirname (concat bindir current_dir_name))); + output_char outchan '\000'; + output_string outchan runtime + end; Bytesections.record toc_writer RNTM; toc_writer @@ -484,19 +556,39 @@ let link_bytecode ?final_name tolink exec_name standalone = let start_code = pos_out outchan in Symtable.init(); clear_crc_interfaces (); - let sharedobjs = List.map Dll.extract_dll_name !Clflags.dllibs in + let (tocheck, sharedobjs) = + let process_dllib ((suffixed, name) as dllib) (tocheck, sharedobjs) = + let resolved_name = Dll.extract_dll_name dllib in + let partial_name = + if suffixed then + if String.starts_with ~prefix:"-l" name then + (suffixed, "dll" ^ String.sub name 2 (String.length name - 2)) + else + dllib + else + (false, resolved_name) + in + (resolved_name::tocheck, partial_name::sharedobjs) + in + List.fold_right process_dllib !Clflags.dllibs ([], []) + in let check_dlls = standalone && Config.target = Config.host in if check_dlls then begin (* Initialize the DLL machinery *) Dll.init_compile !Clflags.no_std_include; Dll.add_path (Load_path.get_path_list ()); - try Dll.open_dlls Dll.For_checking sharedobjs + try Dll.open_dlls Dll.For_checking tocheck with Failure reason -> raise(Error(Cannot_open_dll reason)) end; let output_fun buf = Out_channel.output_bigarray outchan buf 0 (Bigarray.Array1.dim buf) and currpos_fun () = pos_out outchan - start_code in - List.iter (link_file output_fun currpos_fun) tolink; + (* link_files returns true if any module refers to caml_reify_bytecode, + which is used solely by the toplevel and dynlink libraries and is used + to control whether we included the CRCS section. *) + let needs_stdlib, uses_dynlink = + link_files output_fun currpos_fun tolink + in if check_dlls then Dll.close_all_dlls(); (* The final STOP instruction *) output_byte outchan Opcodes.opSTOP; @@ -505,11 +597,20 @@ let link_bytecode ?final_name tolink exec_name standalone = (* DLL stuff *) if standalone then begin (* The extra search path for DLLs *) - output_string outchan (concat_null_terminated !Clflags.dllpaths); - Bytesections.record toc_writer DLPT; + if !Clflags.dllpaths <> [] then begin + output_string outchan (concat_null_terminated !Clflags.dllpaths); + Bytesections.record toc_writer DLPT + end; (* The names of the DLLs *) - output_string outchan (concat_null_terminated sharedobjs); - Bytesections.record toc_writer DLLS + if sharedobjs <> [] then begin + let output_sharedobj (suffixed, name) = + output_char outchan (if suffixed then '-' else ':'); + output_string outchan name; + output_byte outchan 0 + in + List.iter output_sharedobj sharedobjs; + Bytesections.record toc_writer DLLS + end end; (* The names of all primitives *) Symtable.output_primitive_names outchan; @@ -519,12 +620,40 @@ let link_bytecode ?final_name tolink exec_name standalone = ~filename:final_name ~kind:"bytecode executable" outchan (Symtable.initial_global_table()); Bytesections.record toc_writer DATA; + let standard_library_default = + if standalone && needs_stdlib then + (* -set-runtime-default *) + if !Clflags.standard_library_default = None then + Some Config.standard_library_effective + else + !Clflags.standard_library_default + else + (* -custom executables don't need OSLD sections - the correct value + is already included in the runtime. *) + None + in + begin match standard_library_default with + | Some value -> + (* OCaml Standard Library Default location *) + output_string outchan value; + Bytesections.record toc_writer OSLD + | None -> () + end; + begin match Compenv.overridden_runtime_parameters () with + | Some ocamlrunparam when standalone -> + (* Embedded runtime defaults *) + output_string outchan ocamlrunparam; + Bytesections.record toc_writer ORUN; + | _ -> () + end; (* The map of global identifiers *) Symtable.output_global_map outchan; Bytesections.record toc_writer SYMB; (* CRCs for modules *) - output_value outchan (extract_crc_interfaces()); - Bytesections.record toc_writer CRCS; + if uses_dynlink then begin + output_value outchan (extract_crc_interfaces()); + Bytesections.record toc_writer CRCS + end; (* Debug info *) if !Clflags.debug then begin output_debug_info outchan; @@ -590,6 +719,54 @@ let output_cds_file outfile = Bytesections.write_toc_and_trailer toc_writer; ) +(* [c_string_literal_of_string s] returns the C literal string representation of + [s], suitable for embedding in a C source file with type [char_os *]. The + result includes the quote markers. *) +let c_string_literal_of_string s = + let b = Buffer.create (String.length s * 2) in + let utf16le = Bytes.create 4 in + let iter u = + match Uchar.to_int u with + (* Characters with C escape sequences *) + | 000 (* '\0' *) -> Buffer.add_string b "\\0" + | 009 (* '\t' *) -> Buffer.add_string b "\\t" + | 010 (* '\n' *) -> Buffer.add_string b "\\n" + | 013 (* '\r' *) -> Buffer.add_string b "\\r" + | 034 (* '\"' *) -> Buffer.add_string b "\\\"" + | 092 (* '\\' *) -> Buffer.add_string b "\\\\" + (* Most C compilers will have no problem processing UTF-8 in the strings + with the characters above converted to their C representations. On + Windows, where the string is [wchar_t *], all characters for which + iswprint returns 0 are escaped using the extended [\x] notation. *) + | c when Config.target_win32 && (c < 32 (* ' ' *) || c >= 127) -> + (* Convert u to UTF-16LE, allowing for surrogate pairs *) + let len = Bytes.set_utf_16le_uchar utf16le 0 u in + for i = 1 to len / 2 do + Printf.bprintf b "\\x%04x" (Bytes.get_uint16_le utf16le ((i - 1) * 2)) + done + | _ -> + Buffer.add_utf_8_uchar b u + in + if Config.target_win32 then + Buffer.add_char b 'L'; + Buffer.add_char b '"'; + Seq.iter iter (String.to_utf_8_seq s); + Buffer.add_char b '"'; + Buffer.contents b + +let emit_global_constant outchan name value = + let value = Option.fold ~none:"NULL" ~some:c_string_literal_of_string value in + Printf.fprintf outchan "const char_os * %s = %s;\n" name value + +let emit_runtime_standard_library_default outchan = + let stdlib = + if !Clflags.standard_library_default = None then + Some Config.standard_library_effective + else + !Clflags.standard_library_default + in + emit_global_constant outchan "caml_runtime_standard_library_default" stdlib + (* Output a bytecode executable as a C file *) let link_bytecode_as_c tolink outfile with_main = @@ -613,6 +790,8 @@ extern "C" { #include #include +enum caml_byte_program_mode caml_byte_program_mode = EMBEDDED; + static int caml_code[] = { |}; Symtable.init(); @@ -622,7 +801,7 @@ static int caml_code[] = { output_code_string outchan code; currpos := !currpos + (Bigarray.Array1.dim code) and currpos_fun () = !currpos in - List.iter (link_file output_fun currpos_fun) tolink; + let _, uses_dynlink = link_files output_fun currpos_fun tolink in (* The final STOP instruction *) Printf.fprintf outchan "\n0x%x};\n" Opcodes.opSTOP; (* The table of global data *) @@ -635,21 +814,26 @@ static char caml_data[] = { }; |}; (* The sections *) - let sections : (string * Obj.t) array = - [| Bytesections.Name.to_string SYMB, - Symtable.data_global_map(); - Bytesections.Name.to_string CRCS, - Obj.repr(extract_crc_interfaces()) |] + let sections : (string * Obj.t) list = + (Bytesections.Name.to_string SYMB, Symtable.data_global_map()) :: + if uses_dynlink then + [ Bytesections.Name.to_string CRCS, + Obj.repr(extract_crc_interfaces()) ] + else + [] in output_string outchan {| static char caml_sections[] = { |}; output_data_string outchan - (Marshal.to_string sections []); + (Marshal.to_string (Array.of_list sections) []); output_string outchan {| }; |}; + emit_global_constant outchan "caml_executable_ocamlrunparam" + (Compenv.overridden_runtime_parameters ()); + emit_runtime_standard_library_default outchan; (* The table of primitives *) Symtable.output_primitive_table outchan; (* The entry point *) @@ -657,7 +841,6 @@ static char caml_sections[] = { output_string outchan {| int main_os(int argc, char_os **argv) { - caml_byte_program_mode = COMPLETE_EXE; caml_startup_code(caml_code, sizeof(caml_code), caml_data, sizeof(caml_data), caml_sections, sizeof(caml_sections), @@ -715,13 +898,20 @@ value caml_startup_pooled_exn(char_os ** argv) if not with_main && !Clflags.debug then output_cds_file ((Filename.chop_extension outfile) ^ ".cds") +let runtime_library_name runtime_variant = + if runtime_variant = "_shared" && Config.suffixing then + Misc.RuntimeID.shared_runtime Sys.Bytecode + else + "-lcamlrun" ^ runtime_variant + (* Build a custom runtime *) let build_custom_runtime prim_name exec_name = let runtime_lib = if not !Clflags.with_runtime then "" - else "-lcamlrun" ^ !Clflags.runtime_variant in + else runtime_library_name !Clflags.runtime_variant + in let stable_name = if not !Clflags.keep_camlprimc_file then Some "camlprim.c" @@ -799,11 +989,19 @@ let link objfiles output_name = extern "C" { #endif +#define CAML_INTERNALS #define CAML_INTERNALS_NO_PRIM_DECLARATIONS + #include +#include + +enum caml_byte_program_mode caml_byte_program_mode = APPENDED; |}; Symtable.output_primitive_table poc; + emit_global_constant poc "caml_executable_ocamlrunparam" + (Compenv.overridden_runtime_parameters ()); + emit_runtime_standard_library_default poc; output_string poc {| #ifdef __cplusplus } @@ -858,7 +1056,8 @@ extern "C" { let runtime_lib = if not !Clflags.with_runtime then "" - else "-lcamlrun" ^ !Clflags.runtime_variant in + else runtime_library_name !Clflags.runtime_variant + in Ccomp.call_linker mode output_name ([obj_file] @ List.rev !Clflags.ccobjs @ [runtime_lib]) c_libs = 0 diff --git a/bytecomp/bytelink.mli b/bytecomp/bytelink.mli index 062137cd0470..d9ef6e62bcb4 100644 --- a/bytecomp/bytelink.mli +++ b/bytecomp/bytelink.mli @@ -30,27 +30,6 @@ val linkdeps_unit : val extract_crc_interfaces: unit -> crcs -(** Ways of starting a bytecode executable *) -type launch_method = -| Shebang_bin_sh of string (** Use a shell script *) -| Shebang_runtime (** Invoke the runtime directly *) -| Executable (** Use the executable stub *) - -(** runtime-launch-info files *) -type runtime_launch_info = { - buffer : string; (** Content of the file *) - bindir : string; (** Directory containing runtime executables *) - launcher : launch_method; (** Default launch method (this is never - {!Shebang_runtime}) *) - executable_offset : int (** Offset in the buffer field at which the - executable stub data begins *) -} - -val read_runtime_launch_info : string -> runtime_launch_info -(** [read_runtime_launch_info file] loads the {!runtime_launch_info} from [file] - - @raise Error if the file cannot be parsed *) - type error = | File_not_found of filepath | Not_an_object_file of filepath diff --git a/bytecomp/byterntm.mli b/bytecomp/byterntm.mli new file mode 100644 index 000000000000..ba4ea8e26a52 --- /dev/null +++ b/bytecomp/byterntm.mli @@ -0,0 +1,32 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* David Allsopp, University of Cambridge & Tarides *) +(* *) +(* Copyright 2025 David Allsopp Ltd. *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +(** Parser for RNTM in bytecode executables. Parses both the RNTM section and + the shebang launcher produced by {!Bytelink}. *) + +(** Search methods used by a tendered bytecode image to find a runtime. *) +type search_method = +| Absolute of string + (** Check fixed location only *) +| Absolute_then_search of string + (** Check given location first then search for the interpreter *) +| Search + (** Always search for the interpreter *) + +val read_runtime : + Bytesections.section_table -> in_channel + -> (string * Misc.RuntimeID.t option * search_method) option +(** Returns the runtime used by this tendered/standalone image. If the runtime + used cannot be parsed, or the image was linked using -without-runtime, then + [None] is returned. *) diff --git a/bytecomp/byterntm.mll b/bytecomp/byterntm.mll new file mode 100644 index 000000000000..d91cd4b92cd7 --- /dev/null +++ b/bytecomp/byterntm.mll @@ -0,0 +1,129 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* David Allsopp, University of Cambridge & Tarides *) +(* *) +(* Copyright 2025 David Allsopp Ltd. *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +{ +type search_method = +| Absolute of string +| Absolute_then_search of string +| Search + +type state = Exec | R | C of string + +let cut_runtime_id search name = + let len = String.length name in + let id = + if len < 6 || name.[len - 5] <> '-' then + None + else + Misc.RuntimeID.of_string (String.sub name (len - 4) 4) + in + let name = + if id = None then + name + else + String.sub name 0 (len - 5) + in + Some (name, id, search) +} + +rule analyze = parse +(* RNTM section for -runtime-search absolute or shebang directly to the + runtime *) + | "#!" ([^ ' ' '\n']* as dir) ('/' as sep) ([^ '/' ' ' '\n']+ as runtime) '\n' + | ([^ '\000']* as dir) (['/' '\\' '\000'] as sep) (* Directory portion *) + ([^ '\\' '/' '\000']+ as runtime) eof (* Runtime portion *) + { if sep = '\000' then + if dir = "" then + cut_runtime_id Search runtime + else + let dir = Filename.concat dir "" in + cut_runtime_id (Absolute_then_search dir) runtime + else + let dir = dir ^ String.make 1 sep in + cut_runtime_id (Absolute dir) runtime } + +(* Legacy RNTM (remove after bootstrap) *) + | (([^ '\000']* ['/' '\\']) as dir) + ([^ '\\' '/' '\000']+ as runtime) '\000' eof + { if dir = "" then + Some (runtime, None, Search) + else + Some (runtime, None, Absolute dir) } + +(* Shell script launcher (if it matches, this always matches more than the above + regexp) *) + | "#!" [^ ' ' '\n']+ "/sh\n" (("exec '" | "r='") as next) + { let state = if next.[0] = 'r' then R else Exec in + analyze_sh_launcher state (Buffer.create 1024) lexbuf } + + | _ | eof + { None } + +and analyze_sh_launcher state b = parse +(* An embedded single quote *) + | "'\\''" + { analyze_sh_launcher state (Buffer.add_char b '\''; b) lexbuf } + + | [^ '\'' ]+ as s + { analyze_sh_launcher state (Buffer.add_string b s; b) lexbuf } + +(* exec line for -runtime-search disable *) + | "' \"$0\" \"$@\"\n" + { if state = Exec then + let name = Buffer.contents b in + let runtime = Filename.basename name in + let dir = + String.sub name 0 (String.length name - String.length runtime) + in + cut_runtime_id (Absolute dir) runtime + else + None } + +(* r= line for -runtime-search {always,enable} *) + | "'\n" ("c='" as c)? + { if state = R then + let runtime = Buffer.contents b in + if c = None then + cut_runtime_id Search runtime + else + analyze_sh_launcher (C runtime) (Buffer.clear b; b) lexbuf + else + None } + +(* c= line for -runtime-search enable *) + | "'\"$r\"\n" + { match state with + | C runtime -> + cut_runtime_id (Absolute_then_search (Buffer.contents b)) runtime + | _ -> + None } + + | _ | eof + { None } + +{ +let read_runtime t ic = + seek_in ic 0; + let lexbuf = + try + if really_input_string ic 2 = "#!" then + let () = seek_in ic 0 in + Some (Lexing.from_channel ic) + else + let rntm = Bytesections.(read_section_string t ic Name.RNTM) in + Some (Lexing.from_string rntm) + with End_of_file | Not_found -> None + in + Option.bind lexbuf analyze +} diff --git a/bytecomp/bytesections.ml b/bytecomp/bytesections.ml index 30a1c0fbc9ec..64939d545ff6 100644 --- a/bytecomp/bytesections.ml +++ b/bytecomp/bytesections.ml @@ -26,6 +26,8 @@ module Name = struct | DBUG (** debug info *) | DLLS (** dll names *) | DLPT (** dll paths *) + | ORUN (** embedded runtime parameters *) + | OSLD (** OCaml Standard Library Default location *) | PRIM (** primitives names *) | RNTM (** The path to the bytecode interpreter (use_runtime mode) *) | SYMB (** global identifiers *) @@ -37,6 +39,8 @@ module Name = struct | "DLPT" -> DLPT | "DLLS" -> DLLS | "DATA" -> DATA + | "ORUN" -> ORUN + | "OSLD" -> OSLD | "PRIM" -> PRIM | "SYMB" -> SYMB | "DBUG" -> DBUG @@ -52,6 +56,8 @@ module Name = struct | DLPT -> "DLPT" | DLLS -> "DLLS" | DATA -> "DATA" + | ORUN -> "ORUN" + | OSLD -> "OSLD" | PRIM -> "PRIM" | SYMB -> "SYMB" | DBUG -> "DBUG" diff --git a/bytecomp/bytesections.mli b/bytecomp/bytesections.mli index 3d287932ac29..b4dc3ea6dbf5 100644 --- a/bytecomp/bytesections.mli +++ b/bytecomp/bytesections.mli @@ -27,6 +27,8 @@ module Name : sig | DBUG (** debug info *) | DLLS (** dll names *) | DLPT (** dll paths *) + | ORUN (** embedded runtime parameters *) + | OSLD (** OCaml Standard Library Default location *) | PRIM (** primitives names *) | RNTM (** The path to the bytecode interpreter (use_runtime mode) *) | SYMB (** global identifiers *) diff --git a/bytecomp/dll.ml b/bytecomp/dll.ml index f86efc6e93be..7fc684ee12a9 100644 --- a/bytecomp/dll.ml +++ b/bytecomp/dll.ml @@ -51,13 +51,20 @@ let remove_path dirs = (* Extract the name of a DLLs from its external name (xxx.so or -lxxx) *) -let extract_dll_name file = - if Filename.check_suffix file Config.ext_dll then +let extract_dll_name (suffixed, file) = + if not suffixed && Filename.check_suffix file Config.ext_dll then Filename.chop_suffix file Config.ext_dll - else if String.length file >= 2 && String.sub file 0 2 = "-l" then - "dll" ^ String.sub file 2 (String.length file - 2) else - file (* will cause error later *) + let file = + if String.starts_with ~prefix:"-l" file then + "dll" ^ String.sub file 2 (String.length file - 2) + else + file + in + if suffixed then + Misc.RuntimeID.stubslib file + else + file (* Open a list of DLLs, adding them to opened_dlls. Raise [Failure msg] in case of error. *) @@ -138,22 +145,7 @@ let synchronize_primitive num symb = assert (actual_num = num) end -(* Read the [ld.conf] file and return the corresponding list of directories *) - -let ld_conf_contents () = - let path = ref [] in - begin try - let ic = open_in (Filename.concat Config.standard_library "ld.conf") in - begin try - while true do - path := input_line ic :: !path - done - with End_of_file -> () - end; - close_in ic - with Sys_error _ -> () - end; - List.rev !path +external ld_conf_contents : string -> string list = "caml_dynlink_parse_ld_conf" (* Split the CAML_LD_LIBRARY_PATH environment variable and return the corresponding list of directories. *) @@ -162,6 +154,7 @@ let ld_library_path_contents () = | exception Not_found -> [] | s -> + (* NB: Misc.split_path_contents "" = [] *) Misc.split_path_contents s (* Initialization for separate compilation *) @@ -169,7 +162,8 @@ let ld_library_path_contents () = let init_compile nostdlib = search_path := ld_library_path_contents() @ - (if nostdlib then [] else ld_conf_contents()) + (if nostdlib then [] else + ld_conf_contents Config.standard_library_effective) (* Initialization for linking in core (dynlink or toplevel) *) diff --git a/bytecomp/dll.mli b/bytecomp/dll.mli index 216fea828014..dc9e99c041d2 100644 --- a/bytecomp/dll.mli +++ b/bytecomp/dll.mli @@ -16,7 +16,7 @@ (* Handling of dynamically-linked libraries *) (* Extract the name of a DLLs from its external name (xxx.so or -lxxx) *) -val extract_dll_name: string -> string +val extract_dll_name: bool * string -> string type dll_mode = | For_checking (* will just check existence of symbols; diff --git a/bytecomp/meta.ml b/bytecomp/meta.ml deleted file mode 100644 index c1ec15f3db9a..000000000000 --- a/bytecomp/meta.ml +++ /dev/null @@ -1,28 +0,0 @@ -(**************************************************************************) -(* *) -(* OCaml *) -(* *) -(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) -(* *) -(* Copyright 1996 Institut National de Recherche en Informatique et *) -(* en Automatique. *) -(* *) -(* All rights reserved. This file is distributed under the terms of *) -(* the GNU Lesser General Public License version 2.1, with the *) -(* special exception on linking described in the file LICENSE. *) -(* *) -(**************************************************************************) - -external global_data : unit -> Obj.t array = "caml_get_global_data" -external realloc_global_data : int -> unit = "caml_realloc_global" -type closure = unit -> Obj.t -type bytecode -external reify_bytecode : - (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t -> - Instruct.debug_event list array -> string option -> - bytecode * closure - = "caml_reify_bytecode" -external release_bytecode : bytecode -> unit - = "caml_static_release_bytecode" -external invoke_traced_function : Obj.raw_data -> Obj.t -> Obj.t -> Obj.t - = "caml_invoke_traced_function" diff --git a/bytecomp/meta.mli b/bytecomp/meta.mli deleted file mode 100644 index e48898b2bd76..000000000000 --- a/bytecomp/meta.mli +++ /dev/null @@ -1,30 +0,0 @@ -(**************************************************************************) -(* *) -(* OCaml *) -(* *) -(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) -(* *) -(* Copyright 1996 Institut National de Recherche en Informatique et *) -(* en Automatique. *) -(* *) -(* All rights reserved. This file is distributed under the terms of *) -(* the GNU Lesser General Public License version 2.1, with the *) -(* special exception on linking described in the file LICENSE. *) -(* *) -(**************************************************************************) - -(* To control the runtime system and bytecode interpreter *) - -external global_data : unit -> Obj.t array = "caml_get_global_data" -external realloc_global_data : int -> unit = "caml_realloc_global" -type closure = unit -> Obj.t -type bytecode -external reify_bytecode : - (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t -> - Instruct.debug_event list array -> string option -> - bytecode * closure - = "caml_reify_bytecode" -external release_bytecode : bytecode -> unit - = "caml_static_release_bytecode" -external invoke_traced_function : Obj.raw_data -> Obj.t -> Obj.t -> Obj.t - = "caml_invoke_traced_function" diff --git a/bytecomp/symtable.ml b/bytecomp/symtable.ml index 0c3a7481b703..2f2fb66ac609 100644 --- a/bytecomp/symtable.ml +++ b/bytecomp/symtable.ml @@ -326,12 +326,15 @@ let data_global_map () = (* Functions for toplevel use *) +external global_data : unit -> Obj.t array = "caml_get_global_data" +external realloc_global_data : int -> unit = "caml_realloc_global" + (* Update the in-core table of globals *) let update_global_table () = let ng = !global_table.cnt in - if ng > Array.length(Meta.global_data()) then Meta.realloc_global_data ng; - let glob = Meta.global_data() in + if ng > Array.length(global_data()) then realloc_global_data ng; + let glob = global_data() in List.iter (fun (slot, cst) -> glob.(slot) <- cst) !literal_table; @@ -361,9 +364,9 @@ let init_toplevel () = let get_global_position = slot_for_getglobal let get_global_value global = - (Meta.global_data()).(slot_for_getglobal global) + (global_data()).(slot_for_getglobal global) let assign_global_value global v = - (Meta.global_data()).(slot_for_getglobal global) <- v + (global_data()).(slot_for_getglobal global) <- v (* Check that all compilation units referenced in the given patch list have already been initialized *) diff --git a/configure b/configure index 0644670ac301..f14632831fcf 100755 --- a/configure +++ b/configure @@ -800,11 +800,23 @@ build_os build_vendor build_cpu build +runtime_search_target +runtime_search +suffixing +native_runtime_id +bytecode_runtime_id +zinc_runtime_id_hi +zinc_runtime_id_lo +build_map_flags +srcdir_abs_real +srcdir_abs +target_libdir_is_relative ar_supports_response_files QS TARGET_LIBDIR ocaml_libdir ocaml_bindir +ocaml_prefix compute_deps build_libraries_manpages PACKLD @@ -851,6 +863,7 @@ build_ocamldoc build_ocamltex build_ocamldebug with_debugger +as_is_cc as_has_debug_prefix_map cc_has_debug_prefix_map unix_directory @@ -878,6 +891,8 @@ natdynlink supports_shared_libraries mklib AR +target_launch_method +launch_method shebangscripts winpthreads_source_include_dir winpthreads_source_dir @@ -952,6 +967,7 @@ CMO_MAGIC_NUMBER CMI_MAGIC_NUMBER EXEC_MAGIC_NUMBER MAGIC_LENGTH +OCAML_RELEASE_NUMBER OCAML_VERSION_SHORT OCAML_VERSION_EXTRA OCAML_VERSION_PATCHLEVEL @@ -1036,6 +1052,7 @@ enable_flambda enable_flambda_invariants enable_cmm_invariants with_target_sh +with_stublibs enable_reserved_header_bits enable_stdlib_manpages enable_warn_error @@ -1043,6 +1060,10 @@ enable_force_safe_string enable_flat_float_array enable_function_sections enable_mmap_map_stack +with_relative_libdir +enable_suffixing +enable_runtime_search +enable_runtime_search_target with_afl with_flexdll with_winpthreads_msvc @@ -1754,6 +1775,13 @@ Optional Features: --disable-function-sections do not emit each function in a separate section --enable-mmap-map-stack use mmap to allocate stacks instead of malloc + --disable-suffixing disable suffixing of runtime executables and shared + libraries + --enable-runtime-search allow the distribution's bytecode executables to + search for ocamlrun + --enable-runtime-search-target + allow bytecode executables produced by ocamlc to + search for ocamlrun --enable-shared[=PKGS] build shared libraries [default=yes] --enable-static[=PKGS] build static libraries [default=yes] --enable-pic[=PKGS] try to use only PIC/non-PIC objects [default=use @@ -1770,6 +1798,10 @@ Optional Packages: --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --with-odoc build documentation with odoc --with-target-sh location of Posix sh on the target system + --with-stublibs additional directory for searching for bytecode stub + libraries + --with-relative-libdir location of the Standard Library, specified relative + to --bindir --with-afl use the AFL fuzzer --with-flexdll bootstrap FlexDLL from the given sources --with-winpthreads-msvc build winpthreads (only for the MSVC port) from the @@ -3367,6 +3399,12 @@ ocamltest_unix_impl="dummy" unix_library="" unix_directory="" diff_supports_color=false +target_libdir_is_relative=false +srcdir_abs='' +srcdir_abs_real='' +build_map_flags='' +runtime_search='' +runtime_search_target='' # Information about the package @@ -3397,6 +3435,8 @@ OCAML_VERSION_EXTRA=dev0-2025-04-28 OCAML_VERSION_SHORT=5.5 +OCAML_RELEASE_NUMBER=21 + printf "%s\n" "#define MAGIC_NUMBER_PREFIX \"Caml1999\"" >>confdefs.h printf "%s\n" "#define MAGIC_NUMBER_VERSION \"037\"" >>confdefs.h @@ -3517,6 +3557,9 @@ LINEAR_MAGIC_NUMBER=Caml1999L037 + + + @@ -3560,6 +3603,18 @@ LINEAR_MAGIC_NUMBER=Caml1999L037 + + + + + + + + + + + + @@ -3581,10 +3636,6 @@ ac_config_files="$ac_config_files Makefile.config" ac_config_files="$ac_config_files stdlib/sys.ml" -ac_config_files="$ac_config_files manual/src/version.tex" - -ac_config_files="$ac_config_files manual/src/html_processing/src/common.ml" - ac_config_files="$ac_config_files otherlibs/dynlink/dynlink_config.ml" ac_config_files="$ac_config_files utils/config.common.ml" @@ -3607,6 +3658,13 @@ ac_config_files="$ac_config_files otherlibs/runtime_events/META" ac_config_files="$ac_config_files stdlib/META" +if test -d manual +then : + ac_config_files="$ac_config_files manual/src/version.tex" + + ac_config_files="$ac_config_files manual/src/html_processing/src/common.ml" + +fi # Definitions related to the version of OCaml printf "%s\n" "#define OCAML_VERSION_MAJOR 5" >>confdefs.h @@ -3623,6 +3681,8 @@ printf "%s\n" "#define OCAML_VERSION 50500" >>confdefs.h printf "%s\n" "#define OCAML_VERSION_STRING \"5.5.0+dev0-2025-04-28\"" >>confdefs.h +printf "%s\n" "#define OCAML_RELEASE_NUMBER 21" >>confdefs.h + # Works out how many "o"s are needed in quoted strings @@ -3782,12 +3842,46 @@ then : ac_tool_prefix=$target_alias- fi +# $cygwin_build_env=true if the build is taking place in any kind of Cygwin-like +# environment (which may include cross-compiling _from_ Cygwin) +# All patterns end with * (cf. build-aux/config.sub) +# +# In Cygwin itself, the mingw-w64 compilers are cross-compilers +# (host=x86_64-pc-cygwin; target=*-w64-mingw32) and $build when running from +# within Cygwin is always *-pc-cygwin. +# +# In MSYS2, the mingw-w64 compiler are normal host compilers, which MSYS2 makes +# available through different Environments (in a similar to the Microsoft Visual +# Studio Tools Command Prompts; see https://www.msys2.org/docs/environments/). +# It is possible to use MSYS2's "Cygwin" gcc (the equivalent of compiling native +# Cygwin), in which case $build is *-*-msys*. +# The mingw-w64 Environments manually set $build to *-w64-mingw32, but the +# _native_ value inferred by config.guess (which uses uname -s) will be +# *-pc-mingw32 (for the 32-bit _target_ environments, even though MSYS2 is a +# 64-bit build environment) and *-pc-mingw64 (for the 64-bit _target_ +# environments). +# +# This leads to the four patterns below, all of which imply that the build is +# taking place on a system where Cygwin's utilities (cygpath, etc.) can be +# expected to be found and semantics (CYGWIN=winsymlinks:native, etc.) expected +# to apply. +# +# Note that although build=x86_64-pc-mingw64 will be accepted here, it is highly +# likely that that's a misconfigured environment, and the script will +# subsequently fail if host has not been altered to x86_64-w64-mingw32. +case $build in #( + *-*-cygwin*|*-*-msys*|*-*-mingw32*|*-*-mingw64*) : + cygwin_build_env=true ;; #( + *) : + cygwin_build_env=false ;; +esac + # Ensure that AC_CONFIG_LINKS will either create symlinks which are compatible # with native Windows (i.e. NTFS symlinks, not WSL or Cygwin-emulated ones) or # use its fallback mechanisms. Native Windows versions of ocamlc/ocamlopt cannot # interpret either WSL or Cygwin-emulated symlinks. -case $host in #( - *-pc-windows|*-w64-mingw32*) : +case $cygwin_build_env,$host in #( + true,*-pc-windows|true,*-w64-mingw32*) : ac_config_commands="$ac_config_commands native-symlinks" ;; #( *) : @@ -4220,6 +4314,25 @@ else $as_nop fi + +# Check whether --with-stublibs was given. +if test ${with_stublibs+y} +then : + withval=$with_stublibs; if test x"$withval" = 'xno' +then : + ocaml_additional_stublibs_dir='' +else $as_nop + if test x"$withval" = 'xyes' +then : + as_fn_error $? "--with-stublibs needs an argument" "$LINENO" 5 +fi + ocaml_additional_stublibs_dir="$withval" +fi +else $as_nop + ocaml_additional_stublibs_dir='' +fi + + # Check whether --enable-reserved-header-bits was given. if test ${enable_reserved_header_bits+y} then : @@ -4290,6 +4403,76 @@ fi +# Check whether --with-relative-libdir was given. +if test ${with_relative_libdir+y} +then : + withval=$with_relative_libdir; if test x"$withval" = 'xno' +then : + bindir_to_libdir='' +else $as_nop + bindir_to_libdir="$withval" +fi +else $as_nop + bindir_to_libdir='' +fi + + +# Check whether --enable-suffixing was given. +if test ${enable_suffixing+y} +then : + enableval=$enable_suffixing; if test "x$enableval" = 'xno' +then : + suffixing=false +else $as_nop + suffixing=true +fi +else $as_nop + suffixing=true +fi + + +# Check whether --enable-runtime-search was given. +if test ${enable_runtime_search+y} +then : + enableval=$enable_runtime_search; case $enableval in #( + no) : + ;; #( + yes) : + runtime_search='enable' ;; #( + always) : + runtime_search='always' ;; #( + *) : + as_fn_error $? "valid values are yes, no or always for --enable-runtime-search" "$LINENO" 5 ;; +esac +fi + + +# Check whether --enable-runtime-search-target was given. +if test ${enable_runtime_search_target+y} +then : + enableval=$enable_runtime_search_target; case $enableval in #( + no) : + ;; #( + yes) : + runtime_search_target='enable' ;; #( + always) : + runtime_search_target='always' ;; #( + *) : + as_fn_error $? "valid values are yes, no or always for --enable-runtime-search-target" "$LINENO" 5 ;; +esac +fi + + +case $suffixing,$runtime_search,$runtime_search_target in #( + true,*,*|false,,) : + ;; #( + false,*,*) : + as_fn_error $? "--disable-suffixed cannot be used with --enable-runtime-search or --enable-runtime-search-target" "$LINENO" 5 ;; #( + *) : + ;; +esac + + # Check whether --with-afl was given. if test ${with_afl+y} then : @@ -15084,11 +15267,11 @@ esac # See https://lists.gnu.org/archive/html/autoconf/2019-07/msg00002.html # for the detailed explanation. -ocamlsrcdir=$(unset CDPATH; cd -- "$srcdir" && printf %sX "$PWD") || fail -ocamlsrcdir=${ocamlsrcdir%X} +srcdir_abs=$(unset CDPATH; cd -- "$srcdir" && printf %sX "$PWD") || fail +srcdir_abs=${srcdir_abs%X} -case $host in #( - *-w64-mingw32*|*-pc-windows) : +case $cygwin_build_env,$host in #( + true,*-w64-mingw32*|true,*-pc-windows) : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a workable solution for ln -sf" >&5 printf %s "checking for a workable solution for ln -sf... " >&6; } @@ -15102,9 +15285,10 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ln" >&5 printf "%s\n" "$ln" >&6; } - ocamlsrcdir="$(LC_ALL=C.UTF-8 cygpath -w -- "$ocamlsrcdir")" ;; #( + ocamlsrcdir="$(LC_ALL=C.UTF-8 cygpath -w -- "$srcdir_abs")" ;; #( *) : - ln='ln -sf' ;; + ln='ln -sf' + ocamlsrcdir="$srcdir_abs" ;; esac # Whether ar supports @FILE arguments @@ -15121,10 +15305,12 @@ esac case $target in #( *-w64-mingw32*|*-pc-windows) : unix_or_win32="win32" + default_separator='\' ocamltest_libunix="Some false" ocamlyacc_wstr_module="yacc/wstr" ;; #( *) : unix_or_win32="unix" + default_separator='/' ocamltest_libunix="Some true" ocamlyacc_wstr_module="" ;; esac @@ -15217,12 +15403,6 @@ esac fi -# stdlib/runtime.info and stdlib/target_runtime.info are generated by commands -# in config.status, rather than by the .in mechanism, since the latter cannot -# reliably process binary files. -ac_config_commands="$ac_config_commands shebang" - - # Checks for programs ## Check for the C compiler: done by libtool @@ -16036,16 +16216,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext if test x"$build" != x"$host" then : - case $build in #( - *-pc-msys|*-*-cygwin) : - flexlink_where="$(cmd /c "$flexlink" -where 2>/dev/null)" - if test -z "$flexlink_where" + if $cygwin_build_env +then : + flexlink_where="$(cmd /c "$flexlink" -where 2>/dev/null)" + if test -z "$flexlink_where" then : as_fn_error $? "$flexlink is not executable from a native Win32 process" "$LINENO" 5 -fi ;; #( - *) : - ;; -esac + +fi + +fi fi @@ -17725,6 +17905,13 @@ fi # Checks for header files +ac_fn_c_check_header_compile "$LINENO" "libgen.h" "ac_cv_header_libgen_h" "$ac_includes_default" +if test "x$ac_cv_header_libgen_h" = xyes +then : + printf "%s\n" "#define HAS_LIBGEN_H 1" >>confdefs.h + +fi + ac_fn_c_check_header_compile "$LINENO" "pthread_np.h" "ac_cv_header_pthread_np_h" "$ac_includes_default" if test "x$ac_cv_header_pthread_np_h" = xyes then : @@ -19418,30 +19605,45 @@ fi # to avoiding forking a C compiler process for each compilation by ocamlopt. # Both AS and ASPP can be overridden by the user. -default_as="$CC -c" -default_aspp="$CC -c" +as_is_cc=true +default_as='' case $as_target,$ocaml_cc_vendor in #( *-*-linux*,gcc-*) : case $as_cpu in #( x86_64|arm*|aarch64*|i[3-6]86|riscv*) : - default_as="${toolpref}as" ;; #( + default_as="${toolpref}as" + as_is_cc=false ;; #( *) : ;; esac ;; #( + *-*-cygwin,gcc-*) : + default_as="${toolpref}as" + as_is_cc=false ;; #( i686-pc-windows,*) : default_as="ml -nologo -coff -Cp -c -Fo" - default_aspp="$default_as" ;; #( + default_aspp="$default_as" + as_is_cc=false ;; #( x86_64-pc-windows,*) : default_as="ml64 -nologo -Cp -c -Fo" - default_aspp="$default_as" ;; #( + default_aspp="$default_as" + as_is_cc=false ;; #( *-*-darwin*,clang-*) : - default_as="$default_as -Wno-trigraphs" + default_as="$CC -c -Wno-trigraphs" default_aspp="$default_as" ;; #( *) : ;; esac +if test -z "$default_as" +then : + default_as="$CC -c" +fi +if test -z "$default_aspp" +then : + default_aspp="$CC -c" +fi + if test "$with_pic" then : fpic=true @@ -19787,6 +19989,15 @@ then : fi +## strlcpy +ac_fn_c_check_func "$LINENO" "strlcpy" "ac_cv_func_strlcpy" +if test "x$ac_cv_func_strlcpy" = xyes +then : + printf "%s\n" "#define HAS_STRLCPY 1" >>confdefs.h + +fi + + ## secure_getenv and __secure_getenv saved_CPPFLAGS="$CPPFLAGS" @@ -21359,8 +21570,6 @@ fi ## -fdebug-prefix-map support by the C compiler case $ocaml_cc_vendor,$target in #( - *,*-w64-mingw32*) : - cc_has_debug_prefix_map=false ;; #( *,*-pc-windows) : cc_has_debug_prefix_map=false ;; #( xlc*,powerpc-ibm-aix*) : @@ -21410,6 +21619,100 @@ fi ;; esac +## -ffile-prefix-map support by the C compiler - used in the build, not by +## the compiler +if test x"$bindir_to_libdir" != 'x' +then : + srcdir_abs_real="$(realpath "$srcdir_abs" 2>/dev/null)" + if test x"$srcdir_abs_real" = "x$srcdir_abs" +then : + srcdir_abs_real='' +fi + as_CACHEVAR=`printf "%s\n" "ax_cv_check_cflags_$warn_error_flag_-Wa,--debug-prefix-map=old=new" | $as_tr_sh` +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler accepts -Wa,--debug-prefix-map=old=new" >&5 +printf %s "checking whether the C compiler accepts -Wa,--debug-prefix-map=old=new... " >&6; } +if eval test \${$as_CACHEVAR+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS $warn_error_flag -Wa,--debug-prefix-map=old=new" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + eval "$as_CACHEVAR=yes" +else $as_nop + eval "$as_CACHEVAR=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +eval ac_res=\$$as_CACHEVAR + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } +if eval test \"x\$"$as_CACHEVAR"\" = x"yes" +then : + build_map_flags='-Wa,--debug-prefix-map=' +else $as_nop + : +fi + + as_CACHEVAR=`printf "%s\n" "ax_cv_check_cflags_$warn_error_flag_-ffile-prefix-map=old=new" | $as_tr_sh` +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler accepts -ffile-prefix-map=old=new" >&5 +printf %s "checking whether the C compiler accepts -ffile-prefix-map=old=new... " >&6; } +if eval test \${$as_CACHEVAR+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS $warn_error_flag -ffile-prefix-map=old=new" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + eval "$as_CACHEVAR=yes" +else $as_nop + eval "$as_CACHEVAR=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +eval ac_res=\$$as_CACHEVAR + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } +if eval test \"x\$"$as_CACHEVAR"\" = x"yes" +then : + build_map_flags="$build_map_flags -ffile-prefix-map=" +else $as_nop + if $cc_has_debug_prefix_map +then : + build_map_flags="$build_map_flags -fdebug-prefix-map=" +fi +fi + +fi + ## Does stat support nanosecond precision stat_has_ns_precision=false @@ -24118,11 +24421,6 @@ then : libdir="$libdir"/ocaml fi -if test x"$TARGET_LIBDIR" = x -then : - TARGET_LIBDIR="$libdir" -fi - if test x"$mandir" = x'${datarootdir}/man' then : mandir='${prefix}/man' @@ -24143,13 +24441,95 @@ then : *) : ;; esac -else $as_nop - case $build,$host in #( - *-*-cygwin,*-w64-mingw32*|*-*-cygwin,*-pc-windows) : - prefix="$(LC_ALL=C.UTF-8 cygpath -m "$prefix")" ;; #( +fi + +# Normalise $prefix, if necessary, on Windows. There are 9 variables to be +# considered: +# - $prefix and $exec_prefix. These are autoconf variables containing the values +# specified for --prefix and --exec-prefix respectively, or NONE if these +# flags were not given on the command line. These two variables are written to +# Makefile.config and are in _build_ format. On native Windows, the values +# must be suitable to pass to _both_ native Windows processes and Cygwin/MSYS2 +# commands. The values are ultimately converted to Windows paths using slashes +# (i.e. C:/foo) +# - $bindir and $libdir. These similarly contain the values specified for +# --bindir and --libdir, or defaults relative to $exec_prefix otherwise. +# Unlike --prefix, values specified to configure for --bindir and --libdir are +# assumed to be suitable for both native Windows processes and Cygwin/MSYS2. +# These two variables ultimately end up in Makefile.config as $(BINDIR) and +# $(LIBDIR) and are used for installation commands only. +# - $ocaml_prefix, $ocaml_bindir and $ocaml_libdir are the _expanded_ values of +# $prefix, $bindir and $libdir so that the values can be inserted into OCaml +# strings (typically in utils/config.generated.ml). On Windows, these preserve +# the backslashes present in the value passed to --prefix. +# - $TARGET_BINDIR and $TARGET_LIBDIR. These are both precious environment +# variables (meaning their value is recorded by configure) but they default to +# $ocaml_bindir and $ocaml_libdir respectively. +ocaml_bindir="$bindir" +ocaml_libdir="$libdir" +ocaml_prefix="$prefix" +case $cygwin_build_env,$host in #( + true,*-w64-mingw32*|true,*-pc-windows) : + prefix="$(LC_ALL=C.UTF-8 cygpath -m "$prefix")" + if test "x${ocaml_prefix%%/*}" != "x$ocaml_prefix" +then : + # $prefix contained a slash - normalise it with cygpath. The rationale for + # this allows both `./configure --prefix $PWD/install` (which will be a + # Cygwin path) and also systems lazily using slashes instead of + # backslashes (i.e. C:/Backslashes/Scare/Us) to work. + ocaml_prefix="$prefix" +else $as_nop + # $prefix was using backslashes - preserve these in the build, but + # continue to use slashes for the Makefile variables. + if test "x$bindir" = 'x${exec_prefix}/bin' +then : + ocaml_bindir='${exec_prefix}\bin' +else $as_nop + ocaml_bindir="$bindir" +fi + if test "x$libdir" = 'x${exec_prefix}/lib/ocaml' +then : + ocaml_libdir='${exec_prefix}\lib\ocaml' +else $as_nop + ocaml_libdir="$libdir" +fi +fi ;; #( *) : ;; esac + +if test x"$libdir" = x'${exec_prefix}/lib/ocaml' +then : + if test x"$bindir_to_libdir" != 'x' +then : + ocaml_libdir="$bindir_to_libdir" + target_libdir_is_relative=true + case $cygwin_build_env,$host in #( + true,*-w64-mingw32*|true,*-pc-windows) : + build_bindir_to_libdir="$(LC_ALL=C.UTF-8 cygpath \ + "$bindir_to_libdir")" ;; #( + *) : + build_bindir_to_libdir="$bindir_to_libdir" ;; +esac + case $build_bindir_to_libdir in #( + ./*) : + libdir="$bindir${build_bindir_to_libdir#.}" ;; #( + ../*) : + libdir="$bindir/$build_bindir_to_libdir" ;; #( + *) : + as_fn_error $? "--with-relative-libdir requires a relative path" "$LINENO" 5 ;; +esac +fi +else $as_nop + if test x"$bindir_to_libdir" != 'x' +then : + as_fn_error $? "--with-relative-libdir and --libdir cannot both be specified" "$LINENO" 5 +fi +fi + +if test x"$bindir_to_libdir" != 'x' && test x"$TARGET_LIBDIR" != 'x' +then : + as_fn_error $? "--with-relative-libdir and TARGET_LIBDIR cannot both be specified" "$LINENO" 5 fi # Define a few macros that were defined in config/m-nt.h @@ -24179,6 +24559,90 @@ case $target in #( ;; esac +# Determine the three Runtime IDs (see runtime/Mangling.md) + +alphabet='0123456789abcdefghijklmnopqrstuv' + + + +# Bits 0-4 (dev + low 4 bits of release) + + +# Bits 5-6 (high 2 bits of release) + + +# Bits 7-9 (low 3 bits of reserved) +quintet1="1 + $(expr \( $reserved_header_bits \* 4 \) % 32)" +quintet1="$(echo "$alphabet" | cut -c $(expr $quintet1 + 1))" + +# Bits 10-11 (high 2 bits of reserved) +quintet2_byte=\ +"$(echo "$alphabet" | cut -c $(expr \( $reserved_header_bits / 8 \) + 1))" + +# Bit 12 (no-flat-float-array) +if $flat_float_array +then : + quintet2_zinc='0' +else $as_nop + quintet2_byte="4 + $quintet2_byte" + quintet2_zinc='4' +fi +# Bit 13 (fp) +if $frame_pointers +then : + quintet2_native="8 + $quintet2_byte" +else $as_nop + quintet2_native="$quintet2_byte" +fi +# Bit 14 (tsan) +if $tsan +then : + quintet2_native="16 + $quintet2_native" +fi + +quintet2_zinc="$(echo "$alphabet" | cut -c $(expr $quintet2_zinc + 1))" +quintet2_byte="$(echo "$alphabet" | cut -c $(expr $quintet2_byte + 1))" +quintet2_native="$(echo "$alphabet" | cut -c $(expr $quintet2_native + 1))" + +# Bit 15 (int31) +if $arch64 +then : + quintet3_zinc='0' +else $as_nop + quintet3_zinc='1' +fi +# Bit 16 (static) +if ! $supports_shared_libraries +then : + quintet3_zinc="2 + $quintet3_zinc" +fi +# Bit 17 (no-compression) +if test x"$zstd_status" != 'xok' +then : + quintet3_zinc="4 + $quintet3_zinc" +fi +# Bit 18 (ansi) +case $target,$windows_unicode in #( + *-*-mingw32,0|*-pc-windows,0) : + quintet3="8 + $quintet3_zinc" ;; #( + *) : + quintet3="$quintet3_zinc" ;; +esac +# Bit 19 (mutable-string) cannot be set since OCaml 5.0 + +quintet3_zinc="$(echo "$alphabet" | cut -c $(expr $quintet3_zinc + 1))" +quintet3="$(echo "$alphabet" | cut -c $(expr $quintet3 + 1))" + +zinc_runtime_id_lo="b1" +zinc_runtime_id_hi="${quintet2_zinc}${quintet3_zinc}" +bytecode_runtime_id="b${quintet1}${quintet2_byte}${quintet3}" +native_runtime_id="b${quintet1}${quintet2_native}${quintet3}" + +# Update the values for is_release and release_number in +# utils/config.common.ml.in (this is done when tools/autogen is run, not each +# time configure is run!) + + # Do not permanently cache the result of flexdll.h unset ac_cv_header_flexdll_h @@ -24186,6 +24650,10 @@ unset ac_cv_header_flexdll_h # (this is needed for the OCaml configuration module) +# Create ld.conf +ac_config_commands="$ac_config_commands runtime/ld.conf" + + # Just before config.status is generated, determine the final values for MKEXE, # MKDLL, MKMAINDLL and MKEXE_VIA_CC. The final variables controlling these are: # $mkexe - the linking command and munged CFLAGS + any extra flexlink flags @@ -24330,6 +24798,7 @@ cclibs="$cclibs $mathlib $DLLIBS $PTHREAD_LIBS" saved_exec_prefix="$exec_prefix" saved_prefix="$prefix" + prefix="$ocaml_prefix" if test "x$prefix" = "xNONE" then : prefix="$ac_default_prefix" @@ -24339,11 +24808,41 @@ then : exec_prefix="$prefix" fi eval "exec_prefix=\"$exec_prefix\"" - eval "ocaml_bindir=\"$bindir\"" - eval "ocaml_libdir=\"$libdir\"" + # Set variables necessary to create utils/config.generated.ml. + # $HOST_BINDIR is always the absolute path to the binary directory, in host + # format (i.e. potentially with backslashes on Windows). + # $TARGET_BINDIR can be specified by the caller when building cross-compilers, + # and the value then is used unaltered. Otherwise, $TARGET_BINDIR is set to + # '.' when the compiler is configured with --with-relative-libdir or the + # value of $HOST_BINDIR otherwise. + # $ocaml_bindir is used in utils/config.generated.ml and is _empty_ if the + # compiler is configured with --with-relative-libdir (otherwise the path + # would be embedded in config.cmo) + eval "HOST_BINDIR=\"$ocaml_bindir\"" + if test "x$bindir_to_libdir" = 'x' +then : + ocaml_bindir="$HOST_BINDIR" +else $as_nop + ocaml_bindir='' +fi if test x"$TARGET_BINDIR" = 'x' then : - TARGET_BINDIR="$ocaml_bindir" + if test "x$bindir_to_libdir" = 'x' +then : + TARGET_BINDIR="$HOST_BINDIR" +else $as_nop + TARGET_BINDIR='.' +fi +fi + eval "ocaml_libdir=\"$ocaml_libdir\"" + if test x"$TARGET_LIBDIR" = 'x' +then : + if test "x$bindir_to_libdir" = 'x' +then : + TARGET_LIBDIR="$ocaml_libdir" +else $as_nop + TARGET_LIBDIR="$bindir_to_libdir" +fi fi if test x"$target_launch_method" = 'x' then : @@ -25304,14 +25803,14 @@ fi - launch_method='$(echo "$launch_method" | sed -e "s/'/'\"'\"'/g")' - target_launch_method=\ -'$(echo "$target_launch_method" | sed -e "s/'/'\"'\"'/g")' - ocaml_bindir='$(echo "$ocaml_bindir" | sed -e "s/'/'\"'\"'/g")' - TARGET_BINDIR='$(echo "$TARGET_BINDIR" | sed -e "s/'/'\"'\"'/g")' ocamltest_unix_mod='$ocamltest_unix_mod' unix_or_win32='$unix_or_win32' +ocaml_additional_stublibs_dir=\ +'$(echo "$ocaml_additional_stublibs_dir" | sed -e "s/'/'\"'\"'/g")' + ocaml_libdir='$(echo "$ocaml_libdir" | sed -e "s/'/'\"'\"'/g")' + default_separator='$default_separator' + supports_shared_libraries='$supports_shared_libraries' _ACEOF @@ -25324,8 +25823,6 @@ do "Makefile.build_config") CONFIG_FILES="$CONFIG_FILES Makefile.build_config" ;; "Makefile.config") CONFIG_FILES="$CONFIG_FILES Makefile.config" ;; "stdlib/sys.ml") CONFIG_FILES="$CONFIG_FILES stdlib/sys.ml" ;; - "manual/src/version.tex") CONFIG_FILES="$CONFIG_FILES manual/src/version.tex" ;; - "manual/src/html_processing/src/common.ml") CONFIG_FILES="$CONFIG_FILES manual/src/html_processing/src/common.ml" ;; "otherlibs/dynlink/dynlink_config.ml") CONFIG_FILES="$CONFIG_FILES otherlibs/dynlink/dynlink_config.ml" ;; "utils/config.common.ml") CONFIG_FILES="$CONFIG_FILES utils/config.common.ml" ;; "utils/config.generated.ml") CONFIG_FILES="$CONFIG_FILES utils/config.generated.ml" ;; @@ -25337,6 +25834,8 @@ do "otherlibs/dynlink/META") CONFIG_FILES="$CONFIG_FILES otherlibs/dynlink/META" ;; "otherlibs/runtime_events/META") CONFIG_FILES="$CONFIG_FILES otherlibs/runtime_events/META" ;; "stdlib/META") CONFIG_FILES="$CONFIG_FILES stdlib/META" ;; + "manual/src/version.tex") CONFIG_FILES="$CONFIG_FILES manual/src/version.tex" ;; + "manual/src/html_processing/src/common.ml") CONFIG_FILES="$CONFIG_FILES manual/src/html_processing/src/common.ml" ;; "native-symlinks") CONFIG_COMMANDS="$CONFIG_COMMANDS native-symlinks" ;; "ocamldoc/META") CONFIG_FILES="$CONFIG_FILES ocamldoc/META" ;; "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;; @@ -25346,11 +25845,11 @@ do "otherlibs/unix/META") CONFIG_FILES="$CONFIG_FILES otherlibs/unix/META" ;; "otherlibs/unix/unix.ml") CONFIG_LINKS="$CONFIG_LINKS otherlibs/unix/unix.ml:otherlibs/unix/unix_${unix_or_win32}.ml" ;; "otherlibs/str/META") CONFIG_FILES="$CONFIG_FILES otherlibs/str/META" ;; - "shebang") CONFIG_COMMANDS="$CONFIG_COMMANDS shebang" ;; "otherlibs/systhreads/META") CONFIG_FILES="$CONFIG_FILES otherlibs/systhreads/META" ;; "links") CONFIG_COMMANDS="$CONFIG_COMMANDS links" ;; "ocamltest/ocamltest_config.ml") CONFIG_FILES="$CONFIG_FILES ocamltest/ocamltest_config.ml" ;; "ocamltest/ocamltest_unix.ml") CONFIG_LINKS="$CONFIG_LINKS ocamltest/ocamltest_unix.ml:${ocamltest_unix_mod}" ;; + "runtime/ld.conf") CONFIG_COMMANDS="$CONFIG_COMMANDS runtime/ld.conf" ;; *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; esac @@ -26487,10 +26986,13 @@ ltmain=$ac_aux_dir/ltmain.sh chmod +x "$ofile" ;; - "shebang":C) printf '%s\n%s\000\n' "$launch_method" "$ocaml_bindir" \ - > stdlib/runtime.info - printf '%s\n%s\000\n' "$target_launch_method" "$TARGET_BINDIR" \ - > stdlib/target_runtime.info ;; + "runtime/ld.conf":C) rm -f runtime/ld.conf + test x"$ocaml_additional_stublibs_dir" = 'x' || \ + echo "$ocaml_additional_stublibs_dir" > runtime/ld.conf + if $supports_shared_libraries; then + echo ".${default_separator}stublibs" >> runtime/ld.conf + fi + echo "." >> runtime/ld.conf ;; esac done # for ac_tag diff --git a/configure.ac b/configure.ac index ebb93959318c..7b51a75cc45f 100644 --- a/configure.ac +++ b/configure.ac @@ -84,6 +84,12 @@ ocamltest_unix_impl="dummy" unix_library="" unix_directory="" diff_supports_color=false +target_libdir_is_relative=false +srcdir_abs='' +srcdir_abs_real='' +build_map_flags='' +runtime_search='' +runtime_search_target='' # Information about the package @@ -106,6 +112,7 @@ AC_SUBST([OCAML_VERSION_MINOR], [OCAML__VERSION_MINOR]) AC_SUBST([OCAML_VERSION_PATCHLEVEL], [OCAML__VERSION_PATCHLEVEL]) AC_SUBST([OCAML_VERSION_EXTRA], [OCAML__VERSION_EXTRA]) AC_SUBST([OCAML_VERSION_SHORT], [OCAML__VERSION_SHORT]) +AC_SUBST([OCAML_RELEASE_NUMBER], [OCAML__RELEASE_NUMBER]) AC_DEFINE([MAGIC_NUMBER_PREFIX], ["][MAGIC_NUMBER__PREFIX]["]) AC_DEFINE([MAGIC_NUMBER_VERSION], ["][MAGIC_NUMBER__VERSION]["]) AC_DEFINE([EXEC_MAGIC_LENGTH], [MAGIC_NUMBER__LENGTH]) @@ -189,6 +196,8 @@ AC_SUBST([flexdll_dir]) AC_SUBST([winpthreads_source_dir]) AC_SUBST([winpthreads_source_include_dir]) AC_SUBST([shebangscripts]) +AC_SUBST([launch_method]) +AC_SUBST([target_launch_method]) AC_SUBST([AR]) AC_SUBST([mklib]) AC_SUBST([supports_shared_libraries]) @@ -216,6 +225,7 @@ AC_SUBST([unix_library]) AC_SUBST([unix_directory]) AC_SUBST([cc_has_debug_prefix_map]) AC_SUBST([as_has_debug_prefix_map]) +AC_SUBST([as_is_cc]) AC_SUBST([with_debugger]) # TODO: rename this variable AC_SUBST([build_ocamldebug]) AC_SUBST([build_ocamltex]) @@ -262,19 +272,29 @@ AC_SUBST([flexdll_chain]) AC_SUBST([PACKLD]) AC_SUBST([build_libraries_manpages]) AC_SUBST([compute_deps]) +AC_SUBST([ocaml_prefix]) AC_SUBST([ocaml_bindir]) AC_SUBST([ocaml_libdir]) AC_SUBST([TARGET_LIBDIR]) AC_SUBST([QS]) AC_SUBST([ar_supports_response_files]) +AC_SUBST([target_libdir_is_relative]) +AC_SUBST([srcdir_abs]) +AC_SUBST([srcdir_abs_real]) +AC_SUBST([build_map_flags]) +AC_SUBST([zinc_runtime_id_lo]) +AC_SUBST([zinc_runtime_id_hi]) +AC_SUBST([bytecode_runtime_id]) +AC_SUBST([native_runtime_id]) +AC_SUBST([suffixing]) +AC_SUBST([runtime_search]) +AC_SUBST([runtime_search_target]) ## Generated files AC_CONFIG_FILES([Makefile.build_config]) AC_CONFIG_FILES([Makefile.config]) AC_CONFIG_FILES([stdlib/sys.ml]) -AC_CONFIG_FILES([manual/src/version.tex]) -AC_CONFIG_FILES([manual/src/html_processing/src/common.ml]) AC_CONFIG_FILES([otherlibs/dynlink/dynlink_config.ml]) AC_CONFIG_FILES([utils/config.common.ml]) AC_CONFIG_FILES([utils/config.generated.ml]) @@ -286,6 +306,9 @@ AC_CONFIG_FILES([compilerlibs/META]) AC_CONFIG_FILES([otherlibs/dynlink/META]) AC_CONFIG_FILES([otherlibs/runtime_events/META]) AC_CONFIG_FILES([stdlib/META]) +AS_IF([test -d manual], + [AC_CONFIG_FILES([manual/src/version.tex]) + AC_CONFIG_FILES([manual/src/html_processing/src/common.ml])]) # Definitions related to the version of OCaml AC_DEFINE([OCAML_VERSION_MAJOR], [OCAML__VERSION_MAJOR]) @@ -296,6 +319,7 @@ m4_if([OCAML__VERSION_EXTRA],[], [], AC_DEFINE([OCAML_VERSION_EXTRA], ["][OCAML__VERSION_EXTRA]["])]) AC_DEFINE([OCAML_VERSION], [OCAML__VERSION_NUMBER]) AC_DEFINE([OCAML_VERSION_STRING], ["][OCAML__VERSION]["]) +AC_DEFINE([OCAML_RELEASE_NUMBER], [OCAML__RELEASE_NUMBER]) # Works out how many "o"s are needed in quoted strings AC_CONFIG_COMMANDS_PRE(OCAML_QUOTED_STRING_ID) @@ -330,12 +354,44 @@ AS_IF([test x"$target_alias" != x], AS_IF([test -n "$target_alias"], [ac_tool_prefix=$target_alias-]) +# $cygwin_build_env=true if the build is taking place in any kind of Cygwin-like +# environment (which may include cross-compiling _from_ Cygwin) +# All patterns end with * (cf. build-aux/config.sub) +# +# In Cygwin itself, the mingw-w64 compilers are cross-compilers +# (host=x86_64-pc-cygwin; target=*-w64-mingw32) and $build when running from +# within Cygwin is always *-pc-cygwin. +# +# In MSYS2, the mingw-w64 compiler are normal host compilers, which MSYS2 makes +# available through different Environments (in a similar to the Microsoft Visual +# Studio Tools Command Prompts; see https://www.msys2.org/docs/environments/). +# It is possible to use MSYS2's "Cygwin" gcc (the equivalent of compiling native +# Cygwin), in which case $build is *-*-msys*. +# The mingw-w64 Environments manually set $build to *-w64-mingw32, but the +# _native_ value inferred by config.guess (which uses uname -s) will be +# *-pc-mingw32 (for the 32-bit _target_ environments, even though MSYS2 is a +# 64-bit build environment) and *-pc-mingw64 (for the 64-bit _target_ +# environments). +# +# This leads to the four patterns below, all of which imply that the build is +# taking place on a system where Cygwin's utilities (cygpath, etc.) can be +# expected to be found and semantics (CYGWIN=winsymlinks:native, etc.) expected +# to apply. +# +# Note that although build=x86_64-pc-mingw64 will be accepted here, it is highly +# likely that that's a misconfigured environment, and the script will +# subsequently fail if host has not been altered to x86_64-w64-mingw32. +AS_CASE([$build], + [*-*-cygwin*|*-*-msys*|*-*-mingw32*|*-*-mingw64*], + [cygwin_build_env=true], + [cygwin_build_env=false]) + # Ensure that AC_CONFIG_LINKS will either create symlinks which are compatible # with native Windows (i.e. NTFS symlinks, not WSL or Cygwin-emulated ones) or # use its fallback mechanisms. Native Windows versions of ocamlc/ocamlopt cannot # interpret either WSL or Cygwin-emulated symlinks. -AS_CASE([$host], - [*-pc-windows|*-w64-mingw32*], +AS_CASE([$cygwin_build_env,$host], + [true,*-pc-windows|true,*-w64-mingw32*], [AC_CONFIG_COMMANDS([native-symlinks], [], [export CYGWIN="\$CYGWIN\${CYGWIN:+ }winsymlinks:nativestrict" export MSYS="\$MSYS\${MSYS:+ }winsymlinks:nativestrict"])]) @@ -581,6 +637,18 @@ AC_ARG_WITH([target-sh], [target_launch_method="$withval"])], [target_launch_method='']) +AC_ARG_WITH([stublibs], + [AS_HELP_STRING([--with-stublibs], + [additional directory for searching for bytecode stub libraries])], + [AS_IF([test x"$withval" = 'xno'], + [ocaml_additional_stublibs_dir=''], + [dnl A user wishing to add "yes" to ld.conf may do so manually - catch + dnl --with-stublibs as an error. + AS_IF([test x"$withval" = 'xyes'], + [AC_MSG_ERROR([--with-stublibs needs an argument])]) + ocaml_additional_stublibs_dir="$withval"])], + [ocaml_additional_stublibs_dir='']) + AC_ARG_ENABLE([reserved-header-bits], [AS_HELP_STRING([--enable-reserved-header-bits=BITS], [reserve BITS (between 0 and 31) bits in block headers])], @@ -625,6 +693,45 @@ AC_ARG_ENABLE([mmap-map-stack], [AS_HELP_STRING([--enable-mmap-map-stack], [use mmap to allocate stacks instead of malloc])]) +AC_ARG_WITH([relative-libdir], + [AS_HELP_STRING([--with-relative-libdir], + [location of the Standard Library, specified relative to --bindir])], + [AS_IF([test x"$withval" = 'xno'], + [bindir_to_libdir=''], + [bindir_to_libdir="$withval"])], + [bindir_to_libdir='']) + +AC_ARG_ENABLE([suffixing], + [AS_HELP_STRING([--disable-suffixing], + [disable suffixing of runtime executables and shared libraries])], + [AS_IF([test "x$enableval" = 'xno'], [suffixing=false], [suffixing=true])], + [suffixing=true]) + +AC_ARG_ENABLE([runtime-search], + [AS_HELP_STRING([--enable-runtime-search], + [allow the distribution's bytecode executables to search for ocamlrun])], + [AS_CASE([$enableval], + [no],[], + [yes],[runtime_search='enable'], + [always],[runtime_search='always'], + [AC_MSG_ERROR(m4_normalize([valid values are yes, no or always for + --enable-runtime-search]))])]) + +AC_ARG_ENABLE([runtime-search-target], + [AS_HELP_STRING([--enable-runtime-search-target], + [allow bytecode executables produced by ocamlc to search for ocamlrun])], + [AS_CASE([$enableval], + [no],[], + [yes],[runtime_search_target='enable'], + [always],[runtime_search_target='always'], + [AC_MSG_ERROR(m4_normalize([valid values are yes, no or always for + --enable-runtime-search-target]))])]) + +AS_CASE([$suffixing,$runtime_search,$runtime_search_target], + [true,*,*|false,,],[], + [false,*,*],[AC_MSG_ERROR(m4_normalize([--disable-suffixed cannot be used with + --enable-runtime-search or --enable-runtime-search-target]))]) + AC_ARG_WITH([afl], [AS_HELP_STRING([--with-afl], [use the AFL fuzzer])]) @@ -811,14 +918,15 @@ AS_CASE([$ocaml_cc_vendor], # See https://lists.gnu.org/archive/html/autoconf/2019-07/msg00002.html # for the detailed explanation. -ocamlsrcdir=$(unset CDPATH; cd -- "$srcdir" && printf %sX "$PWD") || fail -ocamlsrcdir=${ocamlsrcdir%X} +srcdir_abs=$(unset CDPATH; cd -- "$srcdir" && printf %sX "$PWD") || fail +srcdir_abs=${srcdir_abs%X} -AS_CASE([$host], - [*-w64-mingw32*|*-pc-windows], +AS_CASE([$cygwin_build_env,$host], + [true,*-w64-mingw32*|true,*-pc-windows], [OCAML_CHECK_LN_ON_WINDOWS - ocamlsrcdir="$(LC_ALL=C.UTF-8 cygpath -w -- "$ocamlsrcdir")"], - [ln='ln -sf']) + ocamlsrcdir="$(LC_ALL=C.UTF-8 cygpath -w -- "$srcdir_abs")"], + [ln='ln -sf' + ocamlsrcdir="$srcdir_abs"]) # Whether ar supports @FILE arguments @@ -831,9 +939,11 @@ AS_CASE([$lt_cv_ar_at_file], AS_CASE([$target], [*-w64-mingw32*|*-pc-windows], [unix_or_win32="win32" + default_separator='\' ocamltest_libunix="Some false" ocamlyacc_wstr_module="yacc/wstr"], [unix_or_win32="unix" + default_separator='/' ocamltest_libunix="Some true" ocamlyacc_wstr_module=""]) @@ -897,24 +1007,6 @@ AS_IF([test "x$interpval" = "xyes"], )] ) -# stdlib/runtime.info and stdlib/target_runtime.info are generated by commands -# in config.status, rather than by the .in mechanism, since the latter cannot -# reliably process binary files. -AC_CONFIG_COMMANDS([shebang], - [printf '%s\n%s\000\n' "$launch_method" "$ocaml_bindir" \ - > stdlib/runtime.info - printf '%s\n%s\000\n' "$target_launch_method" "$TARGET_BINDIR" \ - > stdlib/target_runtime.info], -dnl These declarations are put in a here-document in configure, so the command -dnl in '$(...)' _is_ evaluated as the content is written to config.status (by -dnl standard interpretation of a here-document). The sed commands quote any -dnl nefarious single quotes which may appear in any of the strings. - [launch_method='$(echo "$launch_method" | sed -e "s/'/'\"'\"'/g")' - target_launch_method=\ -'$(echo "$target_launch_method" | sed -e "s/'/'\"'\"'/g")' - ocaml_bindir='$(echo "$ocaml_bindir" | sed -e "s/'/'\"'\"'/g")' - TARGET_BINDIR='$(echo "$TARGET_BINDIR" | sed -e "s/'/'\"'\"'/g")']) - # Checks for programs ## Check for the C compiler: done by libtool @@ -1136,12 +1228,13 @@ AS_IF([test x"$supports_shared_libraries" != 'xfalse'], [ # ensure it can be executed from a native Windows process. The check # is only necessary when cross-compiling. AS_IF([test x"$build" != x"$host"],[ - AS_CASE([$build], - [*-pc-msys|*-*-cygwin], - [flexlink_where="$(cmd /c "$flexlink" -where 2>/dev/null)" - AS_IF([test -z "$flexlink_where"], - [AC_MSG_ERROR(m4_normalize([$flexlink is not executable from a - native Win32 process]))])]) + AS_IF([$cygwin_build_env], + [flexlink_where="$(cmd /c "$flexlink" -where 2>/dev/null)" + AS_IF([test -z "$flexlink_where"], + [AC_MSG_ERROR(m4_normalize([$flexlink is not executable from a + native Win32 process])) + ]) + ]) ]) ]) @@ -1294,6 +1387,7 @@ AC_SEARCH_LIBS([cos], [m], # Checks for header files +AC_CHECK_HEADER([libgen.h],[AC_DEFINE([HAS_LIBGEN_H], [1])]) AC_CHECK_HEADER([pthread_np.h],[AC_DEFINE([HAS_PTHREAD_NP_H], [1])]) AC_CHECK_HEADER([dirent.h], [AC_DEFINE([HAS_DIRENT], [1])], [], [#include ]) @@ -1780,24 +1874,33 @@ AS_IF([test -n "$target_alias"], # to avoiding forking a C compiler process for each compilation by ocamlopt. # Both AS and ASPP can be overridden by the user. -default_as="$CC -c" -default_aspp="$CC -c" +as_is_cc=true +default_as='' AS_CASE([$as_target,$ocaml_cc_vendor], [*-*-linux*,gcc-*], [AS_CASE([$as_cpu], [x86_64|arm*|aarch64*|i[[3-6]]86|riscv*], - [default_as="${toolpref}as"])], + [default_as="${toolpref}as" + as_is_cc=false])], + [*-*-cygwin,gcc-*], + [default_as="${toolpref}as" + as_is_cc=false], [i686-pc-windows,*], [default_as="ml -nologo -coff -Cp -c -Fo" - default_aspp="$default_as"], + default_aspp="$default_as" + as_is_cc=false], [x86_64-pc-windows,*], [default_as="ml64 -nologo -Cp -c -Fo" - default_aspp="$default_as"], + default_aspp="$default_as" + as_is_cc=false], [*-*-darwin*,clang-*], - [default_as="$default_as -Wno-trigraphs" + [default_as="$CC -c -Wno-trigraphs" default_aspp="$default_as"]) +AS_IF([test -z "$default_as"],[default_as="$CC -c"]) +AS_IF([test -z "$default_aspp"],[default_aspp="$CC -c"]) + AS_IF([test "$with_pic"], [fpic=true AC_DEFINE([CAML_WITH_FPIC], [1]) @@ -1857,6 +1960,9 @@ AC_CHECK_FUNC([getrusage], [AC_DEFINE([HAS_GETRUSAGE], [1])]) ## times AC_CHECK_FUNC([times], [AC_DEFINE([HAS_TIMES], [1])]) +## strlcpy +AC_CHECK_FUNC([strlcpy], [AC_DEFINE([HAS_STRLCPY], [1])]) + ## secure_getenv and __secure_getenv saved_CPPFLAGS="$CPPFLAGS" @@ -2395,7 +2501,6 @@ AC_CHECK_FUNC([pwrite], [AC_DEFINE([HAS_PWRITE], [1])]) ## -fdebug-prefix-map support by the C compiler AS_CASE([$ocaml_cc_vendor,$target], - [*,*-w64-mingw32*], [cc_has_debug_prefix_map=false], [*,*-pc-windows], [cc_has_debug_prefix_map=false], [xlc*,powerpc-ibm-aix*], [cc_has_debug_prefix_map=false], [sunc*,sparc-sun-*], [cc_has_debug_prefix_map=false], @@ -2403,6 +2508,20 @@ AS_CASE([$ocaml_cc_vendor,$target], [cc_has_debug_prefix_map=true], [cc_has_debug_prefix_map=false], [$warn_error_flag])]) +## -ffile-prefix-map support by the C compiler - used in the build, not by +## the compiler +AS_IF([test x"$bindir_to_libdir" != 'x'], + [srcdir_abs_real="$(realpath "$srcdir_abs" 2>/dev/null)" + AS_IF([test x"$srcdir_abs_real" = "x$srcdir_abs"], + [srcdir_abs_real='']) + AX_CHECK_COMPILE_FLAG([-Wa,--debug-prefix-map=old=new], + [build_map_flags='-Wa,--debug-prefix-map='], [], [$warn_error_flag]) + AX_CHECK_COMPILE_FLAG([-ffile-prefix-map=old=new], + [build_map_flags="$build_map_flags -ffile-prefix-map="], + [AS_IF([$cc_has_debug_prefix_map], + [build_map_flags="$build_map_flags -fdebug-prefix-map="])], + [$warn_error_flag])]) + ## Does stat support nanosecond precision stat_has_ns_precision=false @@ -2889,9 +3008,6 @@ AC_CONFIG_COMMANDS_PRE([cclibs="$cclibs $mathlib $DLLIBS $PTHREAD_LIBS"]) AS_IF([test x"$libdir" = x'${exec_prefix}/lib'], [libdir="$libdir"/ocaml]) -AS_IF([test x"$TARGET_LIBDIR" = x], - [TARGET_LIBDIR="$libdir"]) - AS_IF([test x"$mandir" = x'${datarootdir}/man'], [mandir='${prefix}/man']) @@ -2901,10 +3017,71 @@ AS_IF([test x"$prefix" = "xNONE"], [i686-w64-mingw32*], [prefix='C:/ocamlmgw'], [x86_64-w64-mingw32*], [prefix='C:/ocamlmgw64'], [i686-pc-windows], [prefix='C:/ocamlms'], - [x86_64-pc-windows], [prefix='C:/ocamlms64'])], - [AS_CASE([$build,$host], - [*-*-cygwin,*-w64-mingw32*|*-*-cygwin,*-pc-windows], - [prefix="$(LC_ALL=C.UTF-8 cygpath -m "$prefix")"])]) + [x86_64-pc-windows], [prefix='C:/ocamlms64'])]) + +# Normalise $prefix, if necessary, on Windows. There are 9 variables to be +# considered: +# - $prefix and $exec_prefix. These are autoconf variables containing the values +# specified for --prefix and --exec-prefix respectively, or NONE if these +# flags were not given on the command line. These two variables are written to +# Makefile.config and are in _build_ format. On native Windows, the values +# must be suitable to pass to _both_ native Windows processes and Cygwin/MSYS2 +# commands. The values are ultimately converted to Windows paths using slashes +# (i.e. C:/foo) +# - $bindir and $libdir. These similarly contain the values specified for +# --bindir and --libdir, or defaults relative to $exec_prefix otherwise. +# Unlike --prefix, values specified to configure for --bindir and --libdir are +# assumed to be suitable for both native Windows processes and Cygwin/MSYS2. +# These two variables ultimately end up in Makefile.config as $(BINDIR) and +# $(LIBDIR) and are used for installation commands only. +# - $ocaml_prefix, $ocaml_bindir and $ocaml_libdir are the _expanded_ values of +# $prefix, $bindir and $libdir so that the values can be inserted into OCaml +# strings (typically in utils/config.generated.ml). On Windows, these preserve +# the backslashes present in the value passed to --prefix. +# - $TARGET_BINDIR and $TARGET_LIBDIR. These are both precious environment +# variables (meaning their value is recorded by configure) but they default to +# $ocaml_bindir and $ocaml_libdir respectively. +ocaml_bindir="$bindir" +ocaml_libdir="$libdir" +ocaml_prefix="$prefix" +AS_CASE([$cygwin_build_env,$host], + [true,*-w64-mingw32*|true,*-pc-windows], + [prefix="$(LC_ALL=C.UTF-8 cygpath -m "$prefix")" + AS_IF([test "x${ocaml_prefix%%/*}" != "x$ocaml_prefix"], + # $prefix contained a slash - normalise it with cygpath. The rationale for + # this allows both `./configure --prefix $PWD/install` (which will be a + # Cygwin path) and also systems lazily using slashes instead of + # backslashes (i.e. C:/Backslashes/Scare/Us) to work. + [ocaml_prefix="$prefix"], + # $prefix was using backslashes - preserve these in the build, but + # continue to use slashes for the Makefile variables. + [AS_IF([test "x$bindir" = 'x${exec_prefix}/bin'], + [ocaml_bindir='${exec_prefix}\bin'], + [ocaml_bindir="$bindir"]) + AS_IF([test "x$libdir" = 'x${exec_prefix}/lib/ocaml'], + [ocaml_libdir='${exec_prefix}\lib\ocaml'], + [ocaml_libdir="$libdir"])])]) + +AS_IF([test x"$libdir" = x'${exec_prefix}/lib/ocaml'], + [AS_IF([test x"$bindir_to_libdir" != 'x'], + [ocaml_libdir="$bindir_to_libdir" + target_libdir_is_relative=true + AS_CASE([$cygwin_build_env,$host], + [true,*-w64-mingw32*|true,*-pc-windows], + [build_bindir_to_libdir="$(LC_ALL=C.UTF-8 cygpath \ + "$bindir_to_libdir")"], + [build_bindir_to_libdir="$bindir_to_libdir"]) + AS_CASE([$build_bindir_to_libdir], + [./*],[libdir="$bindir${build_bindir_to_libdir[#].}"], + [../*],[libdir="$bindir/$build_bindir_to_libdir"], + [AC_MSG_ERROR([--with-relative-libdir requires a relative path])])])], + [AS_IF([test x"$bindir_to_libdir" != 'x'], + [AC_MSG_ERROR(m4_normalize([--with-relative-libdir and --libdir cannot both + be specified]))])]) + +AS_IF([test x"$bindir_to_libdir" != 'x' && test x"$TARGET_LIBDIR" != 'x'], + [AC_MSG_ERROR(m4_normalize([--with-relative-libdir and TARGET_LIBDIR cannot + both be specified]))]) # Define a few macros that were defined in config/m-nt.h # but whose value is not guessed properly by configure @@ -2923,6 +3100,81 @@ AS_CASE([$target], # as "Infinity" and "Inf" instead of the expected "inf" [AC_DEFINE([HAS_BROKEN_PRINTF], [1])]) +# Determine the three Runtime IDs (see runtime/Mangling.md) +m4_define([ALPHABET], [0123456789abcdefghijklmnopqrstuv]) +alphabet='ALPHABET' + +m4_cond(OCAML__DEVELOPMENT_VERSION, [true], + [m4_define([ID_VERSION], m4_eval((OCAML__RELEASE_NUMBER << 1) + 1))], + [m4_define([ID_VERSION], m4_eval((OCAML__RELEASE_NUMBER << 1)))]) + +# Bits 0-4 (dev + low 4 bits of release) +m4_define([QUINTET0], + [m4_substr(ALPHABET, m4_eval(ID_VERSION & 31), [1])]) + +# Bits 5-6 (high 2 bits of release) +m4_define([QUINTET1_ZINC], + [m4_substr(ALPHABET, m4_eval(ID_VERSION >> 5), [1])]) + +# Bits 7-9 (low 3 bits of reserved) +quintet1="QUINTET1_ZINC + $(expr \( $reserved_header_bits \* 4 \) % 32)" +quintet1="$(echo "$alphabet" | cut -c $(expr $quintet1 + 1))" + +# Bits 10-11 (high 2 bits of reserved) +quintet2_byte=\ +"$(echo "$alphabet" | cut -c $(expr \( $reserved_header_bits / 8 \) + 1))" + +# Bit 12 (no-flat-float-array) +AS_IF([$flat_float_array], + [quintet2_zinc='0'], + [quintet2_byte="4 + $quintet2_byte" + quintet2_zinc='4']) +# Bit 13 (fp) +AS_IF([$frame_pointers], + [quintet2_native="8 + $quintet2_byte"], + [quintet2_native="$quintet2_byte"]) +# Bit 14 (tsan) +AS_IF([$tsan], + [quintet2_native="16 + $quintet2_native"]) + +quintet2_zinc="$(echo "$alphabet" | cut -c $(expr $quintet2_zinc + 1))" +quintet2_byte="$(echo "$alphabet" | cut -c $(expr $quintet2_byte + 1))" +quintet2_native="$(echo "$alphabet" | cut -c $(expr $quintet2_native + 1))" + +# Bit 15 (int31) +AS_IF([$arch64], + [quintet3_zinc='0'], + [quintet3_zinc='1']) +# Bit 16 (static) +AS_IF([! $supports_shared_libraries], + [quintet3_zinc="2 + $quintet3_zinc"]) +# Bit 17 (no-compression) +AS_IF([test x"$zstd_status" != 'xok'], + [quintet3_zinc="4 + $quintet3_zinc"]) +# Bit 18 (ansi) +AS_CASE([$target,$windows_unicode], + [*-*-mingw32,0|*-pc-windows,0], + [quintet3="8 + $quintet3_zinc"], + [quintet3="$quintet3_zinc"]) +# Bit 19 (mutable-string) cannot be set since OCaml 5.0 + +quintet3_zinc="$(echo "$alphabet" | cut -c $(expr $quintet3_zinc + 1))" +quintet3="$(echo "$alphabet" | cut -c $(expr $quintet3 + 1))" + +zinc_runtime_id_lo="QUINTET0[]QUINTET1_ZINC" +zinc_runtime_id_hi="${quintet2_zinc}${quintet3_zinc}" +bytecode_runtime_id="QUINTET0${quintet1}${quintet2_byte}${quintet3}" +native_runtime_id="QUINTET0${quintet1}${quintet2_native}${quintet3}" + +# Update the values for is_release and release_number in +# utils/config.common.ml.in (this is done when tools/autogen is run, not each +# time configure is run!) +m4_syscmd([sed -e '/^let is_release =/s/=.*/= ]'\ +'m4_if(OCAML__DEVELOPMENT_VERSION,true,false,true)[/' \ + -e '/^let release_number =/s/=.*/= ]OCAML__RELEASE_NUMBER[/' \ + utils/config.common.ml.in > utils/config.common.ml.in.new +mv -f utils/config.common.ml.in.new utils/config.common.ml.in]) + # Do not permanently cache the result of flexdll.h unset ac_cv_header_flexdll_h @@ -2931,17 +3183,53 @@ unset ac_cv_header_flexdll_h AC_CONFIG_COMMANDS_PRE([ saved_exec_prefix="$exec_prefix" saved_prefix="$prefix" + prefix="$ocaml_prefix" AS_IF([test "x$prefix" = "xNONE"],[prefix="$ac_default_prefix"]) AS_IF([test "x$exec_prefix" = "xNONE"],[exec_prefix="$prefix"]) eval "exec_prefix=\"$exec_prefix\"" - eval "ocaml_bindir=\"$bindir\"" - eval "ocaml_libdir=\"$libdir\"" - AS_IF([test x"$TARGET_BINDIR" = 'x'],[TARGET_BINDIR="$ocaml_bindir"]) + # Set variables necessary to create utils/config.generated.ml. + # $HOST_BINDIR is always the absolute path to the binary directory, in host + # format (i.e. potentially with backslashes on Windows). + # $TARGET_BINDIR can be specified by the caller when building cross-compilers, + # and the value then is used unaltered. Otherwise, $TARGET_BINDIR is set to + # '.' when the compiler is configured with --with-relative-libdir or the + # value of $HOST_BINDIR otherwise. + # $ocaml_bindir is used in utils/config.generated.ml and is _empty_ if the + # compiler is configured with --with-relative-libdir (otherwise the path + # would be embedded in config.cmo) + eval "HOST_BINDIR=\"$ocaml_bindir\"" + AS_IF([test "x$bindir_to_libdir" = 'x'], + [ocaml_bindir="$HOST_BINDIR"], + [ocaml_bindir='']) + AS_IF([test x"$TARGET_BINDIR" = 'x'], + [AS_IF([test "x$bindir_to_libdir" = 'x'], + [TARGET_BINDIR="$HOST_BINDIR"], + [TARGET_BINDIR='.'])]) + eval "ocaml_libdir=\"$ocaml_libdir\"" + AS_IF([test x"$TARGET_LIBDIR" = 'x'], + [AS_IF([test "x$bindir_to_libdir" = 'x'], + [TARGET_LIBDIR="$ocaml_libdir"], + [TARGET_LIBDIR="$bindir_to_libdir"])]) AS_IF([test x"$target_launch_method" = 'x'], [target_launch_method="$launch_method"]) prefix="$saved_prefix" exec_prefix="$saved_exec_prefix"]) +# Create ld.conf +AC_CONFIG_COMMANDS([runtime/ld.conf], + [rm -f runtime/ld.conf + test x"$ocaml_additional_stublibs_dir" = 'x' || \ + echo "$ocaml_additional_stublibs_dir" > runtime/ld.conf + if $supports_shared_libraries; then + echo ".${default_separator}stublibs" >> runtime/ld.conf + fi + echo "." >> runtime/ld.conf], + [ocaml_additional_stublibs_dir=\ +'$(echo "$ocaml_additional_stublibs_dir" | sed -e "s/'/'\"'\"'/g")' + ocaml_libdir='$(echo "$ocaml_libdir" | sed -e "s/'/'\"'\"'/g")' + default_separator='$default_separator' + supports_shared_libraries='$supports_shared_libraries']) + # Just before config.status is generated, determine the final values for MKEXE, # MKDLL, MKMAINDLL and MKEXE_VIA_CC. The final variables controlling these are: # $mkexe - the linking command and munged CFLAGS + any extra flexlink flags diff --git a/debugger/unix_tools.ml b/debugger/unix_tools.ml index e054615addca..3ca0403ffeb1 100644 --- a/debugger/unix_tools.ml +++ b/debugger/unix_tools.ml @@ -136,6 +136,8 @@ let rec expand_path ch = in try Filename.concat (Sys.getenv "HOME") tail with Not_found -> + (* If Sys.getenv "LOGNAME" = "" then getpwnam will raise + Not_found instead *) concat_root (Sys.getenv "LOGNAME") tail) | n -> concat_root (String.sub ch 1 (n - 1)) diff --git a/driver/compenv.ml b/driver/compenv.ml index 65842714c42a..af5f67fd3474 100644 --- a/driver/compenv.ml +++ b/driver/compenv.ml @@ -43,6 +43,8 @@ let fatal err = prerr_endline err; raise (Exit_with_status 2) +let fatalf fmt = Printf.ksprintf fatal fmt + let extract_output = function | Some s -> s | None -> @@ -624,7 +626,7 @@ type deferred_action = | ProcessCFile of string | ProcessOtherFile of string | ProcessObjects of string list - | ProcessDLLs of string list + | ProcessDLLs of bool * string list let c_object_of_filename name = Filename.chop_suffix (Filename.basename name) ".c" ^ Config.ext_obj @@ -673,8 +675,8 @@ let process_action ctx action = ccobjs := obj_name :: !ccobjs | ProcessObjects names -> ccobjs := names @ !ccobjs - | ProcessDLLs names -> - dllibs := names @ !dllibs + | ProcessDLLs (suffixed, names) -> + dllibs := (List.map (fun n -> (suffixed, n)) names) @ !dllibs | ProcessOtherFile name -> if Filename.check_suffix name ocaml_mod_ext || Filename.check_suffix name ocaml_lib_ext then @@ -687,7 +689,7 @@ let process_action ctx action = ccobjs := name :: !ccobjs end else if not !native_code && Filename.check_suffix name Config.ext_dll then - dllibs := name :: !dllibs + dllibs := (false, name) :: !dllibs else match Compiler_pass.of_input_filename name with | Some start_from -> @@ -778,3 +780,106 @@ let parse_arguments ?(current=ref 0) argv f program = Printf.sprintf "Usage: %s \nOptions are:" program in Printf.printf "%s\n%s" help_msg err_msg; raise (Exit_with_status 0) + +(* Parse and apply multipliers to runtime parameter values + cf. runtime/startup_aux.c *) +let scanmult name opt = + let val_mult v = function + | 'k' -> v * 1024 + | 'M' -> v * 1024 * 1024 + | 'G' -> v * 1024 * 1024 * 1024 + | c -> + fatalf "-set-runtime-default: unknown multiplier %c in %s=%s." + c name opt + in + let scanners = [ + (fun () -> Scanf.sscanf_opt opt "0x%x%c%!" val_mult); + (fun () -> Scanf.sscanf_opt opt "0x%x%!" Fun.id); + (fun () -> Scanf.sscanf_opt opt "%u%c%!" val_mult); + (fun () -> Scanf.sscanf_opt opt "%u%!" Fun.id)] in + match List.find_map (fun f -> f ()) scanners with + | Some v -> v + | None -> + fatalf "-set-runtime-default: could not parse integer value %s for %s." + opt name + +(* The list of runtime parameters for which "=1" can be omitted. Note that while + b is technically a boolean, it has two values for true as b>1 causes + backtrace information to be loaded on bytecode startup *) +let boolish_runtime_parameters = + ["b"; "c"; "p"; "R"] +let integer_runtime_parameters = + ["d"; "e"; "l"; "M"; "m"; "n"; "o"; "s"; "t"; "v"; "V"; "W"] + +(* To keep in sync with startup_aux.c *) +let parse_runtime_parameter opt = + if List.mem opt boolish_runtime_parameters then + Hashtbl.replace Clflags.runtime_parameters opt "1" + else if opt <> "" then + let k, setting = + try Misc.cut_at opt '=' + with Not_found -> + if List.mem opt integer_runtime_parameters then + fatalf "-set-runtime-default: runtime parameter %s requires a \ + parameter." opt + else + fatalf "-set-runtime-default: invalid runtime parameter %s. \ + Expected [=]." opt in + let set_parameter k setting = + let () = + let v = scanmult k setting in + match k with + | "b" -> + if v > 2 then + fatal "-set-runtime-default: runtime parameter b can only be set \ + to 0, 1, or 2." + | "d" -> + (* cf. Max_domains_max in runtime/caml/domain.h *) + let max_domains_max = 4096 in + if v < 1 then + fatal "-set-runtime-default: max_domains(d) must be at least 1"; + if v > max_domains_max then + fatalf "-set-runtime-default: max_domains(d) is too large. \ + The maximum value is %d." max_domains_max + | k when List.mem k boolish_runtime_parameters -> + if v > 1 then + fatalf "-set-runtime-default: runtime parameter %s can only be \ + set to 0 or 1." k + | k -> + if not (List.mem k integer_runtime_parameters) then + fatalf "-set-runtime-default: unrecognized runtime parameter \ + %s." k + in + Hashtbl.replace Clflags.runtime_parameters k setting + in + if k = "standard_library_default" then + Clflags.standard_library_default := Some setting + else + set_parameter k setting + +let overridden_runtime_parameters () = + if Hashtbl.length Clflags.runtime_parameters = 0 then + None + else + let sort (l, _) (r, _) = + (* Parameters in alphabetical order; if an option has both upper/lower + then upper first cf. caml_runtime_parameters *) + let l', r' = String.lowercase_ascii l, String.lowercase_ascii r in + if l' = r' then + String.compare l r + else + String.compare l' r' + in + let convert (parameter, value) = + if List.mem parameter boolish_runtime_parameters && value = "1" then + parameter + else + parameter ^ "=" ^ value + in + let ocamlrunparam = + Hashtbl.fold (fun k v acc -> (k, v) :: acc) Clflags.runtime_parameters [] + |> List.sort sort + |> List.map convert + |> String.concat "," + in + Some ocamlrunparam diff --git a/driver/compenv.mli b/driver/compenv.mli index 7ff196877501..3fdb5c29b693 100644 --- a/driver/compenv.mli +++ b/driver/compenv.mli @@ -23,6 +23,7 @@ val print_version_and_library : string -> 'a val print_version_string : unit -> 'a val print_standard_library : unit -> 'a val fatal : string -> 'a +val fatalf : ('a, unit, string, 'b) format4 -> 'a val first_ccopts : string list ref val first_ppx : string list ref @@ -52,7 +53,7 @@ type deferred_action = | ProcessCFile of string | ProcessOtherFile of string | ProcessObjects of string list - | ProcessDLLs of string list + | ProcessDLLs of bool * string list val c_object_of_filename : string -> string @@ -79,3 +80,10 @@ val process_deferred_actions : action_context -> unit *) val parse_arguments : ?current:(int ref) -> string array ref -> Arg.anon_fun -> string -> unit + +(** Validate a single -set-runtime-default parameter specification. *) +val parse_runtime_parameter : string -> unit + +(** Return {!Clflags.runtime_parameters} in the same format as the contents of + the [OCAMLRUNPARAM] environment variable. *) +val overridden_runtime_parameters : unit -> string option diff --git a/driver/compmisc.ml b/driver/compmisc.ml index f2179c6ba2dd..ddd096cc5523 100644 --- a/driver/compmisc.ml +++ b/driver/compmisc.ml @@ -81,16 +81,16 @@ let initial_env () = ~open_implicit_modules:(List.rev !Clflags.open_modules) let set_from_env flag Clflags.{ parse; usage; env_var } = - try - match parse (Sys.getenv env_var) with - | None -> - Location.prerr_warning Location.none - (Warnings.Bad_env_variable (env_var, usage)) - | Some x -> match !flag with - | None -> flag := Some x - | Some _ -> () - with - Not_found -> () + match Sys.getenv_opt env_var with + | None | Some "" -> () + | Some value -> + match parse value with + | None -> + Location.prerr_warning Location.none + (Warnings.Bad_env_variable (env_var, usage)) + | Some x -> match !flag with + | None -> flag := Some x + | Some _ -> () let read_clflags_from_env () = set_from_env Clflags.color Clflags.color_reader; diff --git a/driver/compmisc.mli b/driver/compmisc.mli index 3359b481988f..263da1358d7d 100644 --- a/driver/compmisc.mli +++ b/driver/compmisc.mli @@ -18,7 +18,9 @@ val init_path : -> ?dir:string -> unit -> unit val initial_env : unit -> Env.t -(* Support for flags that can also be set from an environment variable *) +(* Support for flags that can also be set from an environment variable. + Environment variables which are set, but to the empty string are ignored + (i.e. they are treated as if they were not set at all) *) val set_from_env : 'a option ref -> 'a Clflags.env_reader -> unit val read_clflags_from_env : unit -> unit diff --git a/driver/main_args.ml b/driver/main_args.ml index 393f9fe3837a..977543e9f898 100644 --- a/driver/main_args.ml +++ b/driver/main_args.ml @@ -89,6 +89,11 @@ let mk_custom f = let mk_dllib f = "-dllib", Arg.String f, " Use the dynamically-loaded library " +let mk_dllib_suffixed f = + "-dllib-suffixed", Arg.String f, + " Use the dynamically-loaded library , with the runtime suffix \ + appended to the name" + let mk_dllpath f = "-dllpath", Arg.String f, " Add to the run-time search path for shared libraries" @@ -163,6 +168,10 @@ let mk_H f = " Add to the list of \"hidden\" include directories\n\ \ (Like -I, but the program can not directly reference these dependencies)" +let mk_set_runtime_default f = + "-set-runtime-default", Arg.String f, "= Set the default for \ + runtime parameter to " + let mk_impl f = "-impl", Arg.String f, " Compile as a .ml file" @@ -520,6 +529,24 @@ let mk_unsafe_string = in "-unsafe-string", Arg.Unit err, " (option not available)" +let mk_launch_method f = + "-launch-method", Arg.String f, + " Specify the mechanism for the bytecode launcher:\n\ + \ exe - use the executable launcher in runtime-launch-info\n\ + \ sh - use a #!, using sh if the interpreter path cannot be used\n\ + \ /path/interpreter - use #!, or the given sh-compatible \ + \ interpreter if the interpreter path cannot be used" + +let mk_search_method f = + "-runtime-search", Arg.Symbol (["disable"; "enable"; "always"], f), + Printf.sprintf + " Control the way the bytecode header searches for the interpreter\n\ + \ The following settings are supported:\n\ + \ disable use a fixed absolute path to the runtime\n\ + \ enable search for runtime only if not found at the absolute path\n\ + \ always always search for the runtime\n\ + \ The default setting is 'disable'." + let mk_use_runtime f = "-use-runtime", Arg.String f, " Generate bytecode for the given runtime system" @@ -893,6 +920,7 @@ module type Compiler_options = sig val _runtime_variant : string -> unit val _with_runtime : unit -> unit val _without_runtime : unit -> unit + val _set_runtime_default : string -> unit val _short_paths : unit -> unit val _thread : unit -> unit val _v : unit -> unit @@ -933,10 +961,13 @@ module type Bytecomp_options = sig val _custom : unit -> unit val _no_check_prims : unit -> unit val _dllib : string -> unit + val _dllib_suffixed : string -> unit val _dllpath : string -> unit val _make_runtime : unit -> unit val _vmthread : unit -> unit val _use_runtime : string -> unit + val _launch_method : string -> unit + val _search_method : string -> unit val _output_complete_exe : unit -> unit val _dinstr : unit -> unit @@ -1067,6 +1098,7 @@ struct mk_config_var F._config_var; mk_custom F._custom; mk_dllib F._dllib; + mk_dllib_suffixed F._dllib_suffixed; mk_dllpath F._dllpath; mk_dtypes F._annot; mk_for_pack_byt F._for_pack; @@ -1121,6 +1153,7 @@ struct mk_without_runtime F._without_runtime; mk_safe_string; mk_safer_matching F._safer_matching; + mk_set_runtime_default F._set_runtime_default; mk_short_paths F._short_paths; mk_strict_sequence F._strict_sequence; mk_no_strict_sequence F._no_strict_sequence; @@ -1133,6 +1166,8 @@ struct mk_unsafe_string; mk_use_runtime F._use_runtime; mk_use_runtime_2 F._use_runtime; + mk_launch_method F._launch_method; + mk_search_method F._search_method; mk_v F._v; mk_verbose F._verbose; mk_version F._version; @@ -1346,6 +1381,7 @@ struct mk_S F._S; mk_safe_string; mk_safer_matching F._safer_matching; + mk_set_runtime_default F._set_runtime_default; mk_shared F._shared; mk_short_paths F._short_paths; mk_strict_sequence F._strict_sequence; @@ -1839,6 +1875,7 @@ module Default = struct let _plugin _p = plugin := true let _pp s = preprocessor := (Some s) let _runtime_variant s = runtime_variant := s + let _set_runtime_default s = Compenv.parse_runtime_parameter s let _stop_after pass = let module P = Compiler_pass in match P.of_string pass with @@ -1968,7 +2005,9 @@ third-party libraries such as Lwt, but with a different API." let _custom = set custom_runtime let _dcamlprimc = set keep_camlprimc_file let _dinstr = set dump_instr - let _dllib s = Compenv.defer (ProcessDLLs (Misc.rev_split_words s)) + let _dllib s = Compenv.defer (ProcessDLLs (false, Misc.rev_split_words s)) + let _dllib_suffixed s = + Compenv.defer (ProcessDLLs (true, Misc.rev_split_words s)) let _dllpath s = dllpaths := ((!dllpaths) @ [s]) let _make_runtime () = custom_runtime := true; make_runtime := true; link_everything := true @@ -1982,6 +2021,31 @@ third-party libraries such as Lwt, but with a different API." let _output_obj () = output_c_object := true; custom_runtime := true let _use_prims s = use_prims := s let _use_runtime s = use_runtime := s + let _launch_method s = + let s, bindir = + try Misc.cut_at s ' ' + with Not_found -> + s, Config.target_bindir + in + match s with + | "exe" -> + launch_method := (Config.Executable, bindir) + | "sh" -> + launch_method := (Config.Shebang None, bindir) + | s when s <> "" && s.[0] = '/' -> + launch_method := (Config.Shebang (Some s), bindir) + | _ -> + Compenv.fatal + "-launch-method: expect sh, exe or an absolute path for " + let _search_method = function + | "disable" -> + search_method := Config.Absolute + | "enable" -> + search_method := Config.Absolute_then_search + | "always" -> + search_method := Config.Search + | _ -> + assert false let _v () = Compenv.print_version_and_library "compiler" let _vmthread () = Compenv.fatal vmthread_removed_message end diff --git a/driver/main_args.mli b/driver/main_args.mli index 96cce1ca5386..2f0a1e184115 100644 --- a/driver/main_args.mli +++ b/driver/main_args.mli @@ -119,6 +119,7 @@ module type Compiler_options = sig val _runtime_variant : string -> unit val _with_runtime : unit -> unit val _without_runtime : unit -> unit + val _set_runtime_default : string -> unit val _short_paths : unit -> unit val _thread : unit -> unit val _v : unit -> unit @@ -160,10 +161,13 @@ module type Bytecomp_options = sig val _custom : unit -> unit val _no_check_prims : unit -> unit val _dllib : string -> unit + val _dllib_suffixed : string -> unit val _dllpath : string -> unit val _make_runtime : unit -> unit val _vmthread : unit -> unit val _use_runtime : string -> unit + val _launch_method : string -> unit + val _search_method : string -> unit val _output_complete_exe : unit -> unit val _dinstr : unit -> unit diff --git a/driver/maindriver.ml b/driver/maindriver.ml index 8a83c3c16cf9..afadf5e95b97 100644 --- a/driver/maindriver.ml +++ b/driver/maindriver.ml @@ -62,7 +62,7 @@ let main argv ppf = "Please specify at most one of -pack, -a, -c, -output-obj"; | Some ((P.Parsing | P.Typing | P.Lambda) as p) -> assert (P.is_compilation_pass p); - Printf.ksprintf Compenv.fatal + Compenv.fatalf "Options -i and -stop-after (%s) \ are incompatible with -pack, -a, -output-obj" (String.concat "|" diff --git a/driver/optmaindriver.ml b/driver/optmaindriver.ml index d4251c6b841b..b3a3446769c8 100644 --- a/driver/optmaindriver.ml +++ b/driver/optmaindriver.ml @@ -78,7 +78,7 @@ let main argv ppf = -output-obj"; | Some ((P.Parsing | P.Typing | P.Lambda | P.Scheduling | P.Emit) as p) -> assert (P.is_compilation_pass p); - Printf.ksprintf Compenv.fatal + Compenv.fatalf "Options -i and -stop-after (%s) \ are incompatible with -pack, -a, -shared, -output-obj" (String.concat "|" diff --git a/file_formats/cmo_format.mli b/file_formats/cmo_format.mli index 81769a2c0193..549deb6f73a2 100644 --- a/file_formats/cmo_format.mli +++ b/file_formats/cmo_format.mli @@ -67,7 +67,7 @@ type library = how they end up being used on the command line. *) lib_ccobjs: string list; (* C object files needed for -custom *) lib_ccopts: string list; (* Extra opts to C compiler *) - lib_dllibs: string list } (* DLLs needed *) + lib_dllibs: (bool * string) list } (* DLLs needed *) (* Format of a .cma file: magic number (Config.cma_magic_number) diff --git a/file_formats/cmt_format.ml b/file_formats/cmt_format.ml index 4baae3b45599..d9bf051bf09e 100644 --- a/file_formats/cmt_format.ml +++ b/file_formats/cmt_format.ml @@ -111,8 +111,9 @@ let iter_on_declarations ~(f: Shape.Uid.t -> item_declaration -> unit) = { } let need_to_clear_env = - try ignore (Sys.getenv "OCAML_BINANNOT_WITHENV"); false - with Not_found -> true + match Sys.getenv_opt "OCAML_BINANNOT_WITHENV" with + | None | Some "" -> true + | Some _ -> false let keep_only_summary = Env.keep_only_summary diff --git a/file_formats/cmx_format.mli b/file_formats/cmx_format.mli index 3339db49e73f..2a8ccd0b5195 100644 --- a/file_formats/cmx_format.mli +++ b/file_formats/cmx_format.mli @@ -46,7 +46,8 @@ type unit_infos = mutable ui_send_fun: int list; (* Send functions needed *) mutable ui_export_info: export_info; mutable ui_force_link: bool; (* Always linked *) - mutable ui_for_pack: string option } (* Part of a pack *) + mutable ui_for_pack: string option; (* Part of a pack *) + mutable ui_need_stdlib: bool} (* caml_standard_library_nat needed *) (* Each .a library has a matching .cmxa file that provides the following infos on the library: *) diff --git a/flexdll b/flexdll index 3400287999af..1fbab5748323 160000 --- a/flexdll +++ b/flexdll @@ -1 +1 @@ -Subproject commit 3400287999afcdc737f35c1d0e1447c7d2ae5a83 +Subproject commit 1fbab57483231da60cf26f12ed99bf9cbb2ce584 diff --git a/lambda/lambda.ml b/lambda/lambda.ml index 5ef68810650d..41ef61532337 100644 --- a/lambda/lambda.ml +++ b/lambda/lambda.ml @@ -25,6 +25,8 @@ type compile_time_constant = | Ostype_win32 | Ostype_cygwin | Backend_type + | Standard_library_default + | Shared_libraries type immediate_or_pointer = | Immediate diff --git a/lambda/lambda.mli b/lambda/lambda.mli index 92ddc2cd965d..32c183793303 100644 --- a/lambda/lambda.mli +++ b/lambda/lambda.mli @@ -26,6 +26,8 @@ type compile_time_constant = | Ostype_win32 | Ostype_cygwin | Backend_type + | Standard_library_default + | Shared_libraries type immediate_or_pointer = | Immediate diff --git a/lambda/printlambda.ml b/lambda/printlambda.ml index 8b2d6fdc565b..c4ee4ff4e414 100644 --- a/lambda/printlambda.ml +++ b/lambda/printlambda.ml @@ -269,7 +269,9 @@ let primitive ppf = function | Ostype_unix -> "ostype_unix" | Ostype_win32 -> "ostype_win32" | Ostype_cygwin -> "ostype_cygwin" - | Backend_type -> "backend_type" in + | Backend_type -> "backend_type" + | Standard_library_default -> "standard_library_default" + | Shared_libraries -> "shared_libraries" in fprintf ppf "sys.constant_%s" const_name | Pisint -> fprintf ppf "isint" | Pisout -> fprintf ppf "isout" diff --git a/lambda/translprim.ml b/lambda/translprim.ml index da6cd6c061f4..dca5b1e67f69 100644 --- a/lambda/translprim.ml +++ b/lambda/translprim.ml @@ -161,12 +161,15 @@ let primitives_table = "%boolnot", Primitive (Pnot, 1); "%big_endian", Primitive ((Pctconst Big_endian), 1); "%backend_type", Primitive ((Pctconst Backend_type), 1); + "%shared_libraries", Primitive ((Pctconst Shared_libraries), 1); "%word_size", Primitive ((Pctconst Word_size), 1); "%int_size", Primitive ((Pctconst Int_size), 1); "%max_wosize", Primitive ((Pctconst Max_wosize), 1); "%ostype_unix", Primitive ((Pctconst Ostype_unix), 1); "%ostype_win32", Primitive ((Pctconst Ostype_win32), 1); "%ostype_cygwin", Primitive ((Pctconst Ostype_cygwin), 1); + "%standard_library_default", + Primitive ((Pctconst Standard_library_default), 1); "%frame_pointers", Frame_pointers; "%negint", Primitive (Pnegint, 1); "%succint", Primitive ((Poffsetint 1), 1); diff --git a/man/Makefile b/man/Makefile index 10cc8bbe417b..e43a3451bc66 100644 --- a/man/Makefile +++ b/man/Makefile @@ -14,6 +14,7 @@ #************************************************************************** ROOTDIR = .. +SUBDIR_NAME = man include $(ROOTDIR)/Makefile.common MANPAGES = $(addsuffix .1,\ @@ -22,5 +23,4 @@ MANPAGES = $(addsuffix .1,\ .PHONY: install install: - $(MKDIR) '$(INSTALL_PROGRAMS_MAN_DIR)' - $(INSTALL_DATA) $(MANPAGES) '$(INSTALL_PROGRAMS_MAN_DIR)' + $(call INSTALL_ITEMS, $(MANPAGES), man, $(INSTALL_MANDIR_PROGRAMS)) diff --git a/man/ocamlrun.1 b/man/ocamlrun.1 index 4e6e3db99d7a..e08c01407332 100644 --- a/man/ocamlrun.1 +++ b/man/ocamlrun.1 @@ -150,8 +150,10 @@ See the Gc module documentation for details. .TP .B b Trigger the printing of a stack backtrace -when an uncaught exception aborts the program. -This option takes no argument. +when an uncaught exception aborts the program. Possible values +are 0 for disabled, 1 for enabled, and 2 which, in bytecode, causes the +required debugging information to be loaded when the program starts instead +of when the program aborts. .TP .B c (cleanup_on_exit) Shut the runtime down gracefully on exit. The option @@ -219,13 +221,12 @@ Turn on debugging support for .BR ocamlyacc -generated parsers. When this option is on, the pushdown automaton that executes the parsers prints a -trace of its actions. This option takes no argument. +trace of its actions. .TP .BR R Turn on randomization of all hash tables by default (see the .B Hashtbl -module of the standard library). This option takes no -argument. +module of the standard library). .TP .BR s " (minor_heap_size)" The size of the minor heap (in words). diff --git a/manual/src/cmds/runtime.etex b/manual/src/cmds/runtime.etex index 5f41a2d097b9..5ebc7456be96 100644 --- a/manual/src/cmds/runtime.etex +++ b/manual/src/cmds/runtime.etex @@ -167,7 +167,7 @@ The following environment variables are also consulted: \item[p] (parser trace) Turn on debugging support for "ocamlyacc"-generated parsers. When this option is on, the pushdown automaton that executes the parsers prints a - trace of its actions. This option takes no argument. + trace of its actions. \item[R] (randomize) Turn on randomization of all hash tables by default (see \ifouthtml @@ -175,7 +175,6 @@ The following environment variables are also consulted: \else section~\ref{Hashtbl}). \fi - This option takes no argument. \item[s] ("minor_heap_size") Size of the minor heap. (in words) \item[t] Set the trace level for the debug runtime (ignored by the standard runtime). \item[v] ("verbose") What GC messages to print to stderr. This @@ -250,7 +249,9 @@ library directory. Users can add there the names of other directories containing frequently-used shared libraries; however, for consistency of installation, we recommend that shared libraries are installed directly in the system "stublibs" directory, rather than adding lines -to the "ld.conf" file. +to the "ld.conf" file. "ocamlrun" will add lines from "ld.conf" files +found in the directories pointed to by "OCAMLLIB", "CAMLLIB" and the +standard library directory, in that order. \item Default directories searched by the system dynamic loader. Under Unix, these generally include "/lib" and "/usr/lib", plus the directories listed in the file "/etc/ld.so.conf" and the environment diff --git a/middle_end/closure/closure.ml b/middle_end/closure/closure.ml index 428896fbeca8..6d92c266e59d 100644 --- a/middle_end/closure/closure.ml +++ b/middle_end/closure/closure.ml @@ -1056,22 +1056,30 @@ let rec close ({ backend; fenv; cenv ; mutable_vars } as env) lam = None ubody), approx) (* Compile-time constants *) - | Lprim(Pctconst c, [arg], _loc) -> - let cst, approx = - match c with - | Big_endian -> make_const_bool B.big_endian - | Word_size -> make_const_int (8*B.size_int) - | Int_size -> make_const_int (8*B.size_int - 1) - | Max_wosize -> make_const_int ((1 lsl ((8*B.size_int) - 10)) - 1 ) - | Ostype_unix -> make_const_bool (Config.target_os_type = "Unix") - | Ostype_win32 -> make_const_bool (Config.target_os_type = "Win32") - | Ostype_cygwin -> make_const_bool (Config.target_os_type = "Cygwin") - | Backend_type -> - make_const_int 0 (* tag 0 is the same as Native here *) + | Lprim(Pctconst c, [arg], loc) -> + let cst f v = + let cst, approx = f v in + let arg, _approx = close env arg in + let id = Ident.create_local "dummy" in + Ulet(Immutable, Pgenval, VP.create id, arg, cst), approx in - let arg, _approx = close env arg in - let id = Ident.create_local "dummy" in - Ulet(Immutable, Pgenval, VP.create id, arg, cst), approx + begin match c with + | Big_endian -> cst make_const_bool B.big_endian + | Word_size -> cst make_const_int (8*B.size_int) + | Int_size -> cst make_const_int (8*B.size_int - 1) + | Max_wosize -> cst make_const_int ((1 lsl ((8*B.size_int) - 10)) - 1) + | Ostype_unix -> cst make_const_bool (Config.target_os_type = "Unix") + | Ostype_win32 -> cst make_const_bool (Config.target_os_type = "Win32") + | Ostype_cygwin -> cst make_const_bool (Config.target_os_type = "Cygwin") + | Backend_type -> + cst make_const_int 0 (* tag 0 is the same as Native here *) + | Standard_library_default -> + Compilenv.need_stdlib_location (); + let dbg = Debuginfo.from_location loc in + let id = Ident.name Compilenv.stdlib_symbol_name in + Uprim(P.Pread_symbol id, [], dbg), Value_const (Uconst_ref (id, None)) + | Shared_libraries -> make_const_bool Config.supports_shared_libraries + end | Lprim(Pignore, [arg], _loc) -> let expr, approx = make_const_int 0 in Usequence(fst (close env arg), expr), approx @@ -1463,7 +1471,9 @@ let collect_exported_structured_constants a = | Uconst_ref (s, (Some c)) -> Compilenv.add_exported_constant s; structured_constant c - | Uconst_ref (_s, None) -> assert false (* Cannot be generated *) + | Uconst_ref (s, None) -> + (* Only generated in one context *) + assert (s = Ident.name Compilenv.stdlib_symbol_name) | Uconst_int _ -> () and structured_constant = function | Uconst_block (_, ul) -> List.iter const ul diff --git a/middle_end/compilenv.ml b/middle_end/compilenv.ml index 6792bfa33e98..cfea90cd0462 100644 --- a/middle_end/compilenv.ml +++ b/middle_end/compilenv.ml @@ -88,7 +88,8 @@ let current_unit = ui_send_fun = []; ui_force_link = false; ui_export_info = default_ui_export_info; - ui_for_pack = None } + ui_for_pack = None; + ui_need_stdlib = false } let linuxlike_mangling = match Config.system with | "macosx" @@ -131,6 +132,7 @@ let reset ?packname name = current_unit.ui_send_fun <- []; current_unit.ui_force_link <- !Clflags.link_everything; current_unit.ui_for_pack <- packname; + current_unit.ui_need_stdlib <- false; Hashtbl.clear exported_constants; structured_constants := structured_constants_empty; current_unit.ui_export_info <- default_ui_export_info; @@ -261,11 +263,16 @@ let global_approx id = | None -> Clambda.Value_unknown | Some ui -> get_clambda_approx ui +(* The name of the symbol defined globally for %standard_library_default *) +let stdlib_symbol_name = Ident.create_persistent "caml_standard_library_nat" + (* Return the symbol used to refer to a global identifier *) let symbol_for_global id = if Ident.is_predef id then "caml_exn_" ^ Ident.name id + else if Ident.same stdlib_symbol_name id then + Ident.name id else begin let unitname = Ident.name id in match @@ -293,7 +300,7 @@ let is_predefined_exception sym = let symbol_for_global' id = let sym_label = Linkage_name.create (symbol_for_global id) in - if Ident.is_predef id then + if Ident.is_predef id || Ident.same stdlib_symbol_name id then Symbol.of_global_linkage predefined_exception_compilation_unit sym_label else Symbol.of_global_linkage (unit_for_global id) sym_label @@ -351,6 +358,11 @@ let need_send_fun n = if not (List.mem n current_unit.ui_send_fun) then current_unit.ui_send_fun <- n :: current_unit.ui_send_fun +(* Record that caml_standard_library_nat is needed *) + +let need_stdlib_location () = + current_unit.ui_need_stdlib <- true + (* Write the description of the current unit *) let write_unit_info info filename = diff --git a/middle_end/compilenv.mli b/middle_end/compilenv.mli index d5a3ffd1b63c..9e2c159a0598 100644 --- a/middle_end/compilenv.mli +++ b/middle_end/compilenv.mli @@ -108,6 +108,14 @@ val need_send_fun: int -> unit (* Record the need of a currying (resp. application, message sending) function with the given arity *) +val need_stdlib_location: unit -> unit + (* Record that caml_standard_library_nat needs to be initialised if this + unit is linked. *) + +val stdlib_symbol_name: Ident.t + (* The name of the symbol defined globally for + %standard_library_default *) + val new_const_symbol : unit -> string val closure_symbol : Closure_id.t -> Symbol.t (* Symbol of a function if the function is diff --git a/middle_end/flambda/closure_conversion.ml b/middle_end/flambda/closure_conversion.ml index 1aad9643ada6..bc8ec40933f9 100644 --- a/middle_end/flambda/closure_conversion.ml +++ b/middle_end/flambda/closure_conversion.ml @@ -384,26 +384,32 @@ let rec close t env (lam : Lambda.lambda) : Flambda.t = ~name:Names.raise) | Lprim (Pctconst c, [arg], _loc) -> let module Backend = (val t.backend) in - let const = - begin match c with - | Big_endian -> lambda_const_bool Backend.big_endian - | Word_size -> lambda_const_int (8*Backend.size_int) - | Int_size -> lambda_const_int (8*Backend.size_int - 1) - | Max_wosize -> - lambda_const_int ((1 lsl ((8*Backend.size_int) - 10)) - 1) - | Ostype_unix -> - lambda_const_bool (String.equal Config.target_os_type "Unix") - | Ostype_win32 -> - lambda_const_bool (String.equal Config.target_os_type "Win32") - | Ostype_cygwin -> - lambda_const_bool (String.equal Config.target_os_type "Cygwin") - | Backend_type -> - Lambda.const_int 0 (* tag 0 is the same as Native *) - end - in - close t env - (Lambda.Llet(Strict, Pgenval, Ident.create_local "dummy", + let cst f v = + let const = f v in + close t env (Lambda.Llet(Strict, Pgenval, Ident.create_local "dummy", arg, Lconst const)) + in + begin match c with + | Big_endian -> cst lambda_const_bool Backend.big_endian + | Word_size -> cst lambda_const_int (8*Backend.size_int) + | Int_size -> cst lambda_const_int (8*Backend.size_int - 1) + | Max_wosize -> + cst lambda_const_int ((1 lsl ((8*Backend.size_int) - 10)) - 1) + | Ostype_unix -> + cst lambda_const_bool (String.equal Config.target_os_type "Unix") + | Ostype_win32 -> + cst lambda_const_bool (String.equal Config.target_os_type "Win32") + | Ostype_cygwin -> + cst lambda_const_bool (String.equal Config.target_os_type "Cygwin") + | Backend_type -> cst Lambda.const_int 0 (* tag 0 is the same as Native *) + | Standard_library_default -> + Compilenv.need_stdlib_location (); + let symbol = t.symbol_for_global' Compilenv.stdlib_symbol_name in + t.imported_symbols <- Symbol.Set.add symbol t.imported_symbols; + name_expr (Symbol symbol) ~name:Names.pgetglobal + | Shared_libraries -> + cst lambda_const_bool Config.supports_shared_libraries + end | Lprim (Pfield _, [Lprim (Pgetglobal id, [],_)], _) when Ident.same id t.current_unit_id -> Misc.fatal_errorf "[Pfield (Pgetglobal ...)] for the current compilation \ diff --git a/ocaml-variants.opam b/ocaml-variants.opam index 73a23a27470d..fb16299c509d 100644 --- a/ocaml-variants.opam +++ b/ocaml-variants.opam @@ -38,18 +38,18 @@ depends: [ # facility is not yet available for other platforms. "host-arch-x86_32" {os != "win32" & arch = "x86_32" & post} ("host-arch-x86_64" {os != "win32" & arch = "x86_64" & post} | - ("host-arch-x86_32" {os != "win32" & arch = "x86_64" & post} & "ocaml-option-32bit" {os != "win32" & arch = "x86_64"})) + ("host-arch-x86_32" {os != "win32" & arch = "x86_64" & post} & "ocaml-option-32bit" {build & os != "win32" & arch = "x86_64"})) "host-arch-unknown" {os != "win32" & arch != "arm32" & arch != "arm64" & arch != "ppc64" & arch != "riscv64" & arch != "s390x" & arch != "x86_32" & arch != "x86_64" & post} # Port selection (Windows) # amd64 mingw-w64 / MSVC - (("arch-x86_64" {os = "win32" & arch = "x86_64"} & - (("system-mingw" & "mingw-w64-shims" {os-distribution = "cygwin" & build}) | - ("system-msvc" & "winpthreads" & "ocaml-option-no-compression" {os = "win32"}))) | + (("arch-x86_64" {build & os = "win32" & arch = "x86_64"} & + (("system-mingw" {build} & "mingw-w64-shims" {os-distribution = "cygwin" & build}) | + ("system-msvc" {build} & "winpthreads" {os = "win32"} & "ocaml-option-no-compression" {build & os = "win32"}))) | # i686 mingw-w64 / MSVC - ("arch-x86_32" {os = "win32"} & "ocaml-option-bytecode-only" {os = "win32"} & - (("system-mingw" & "mingw-w64-shims" {os-distribution = "cygwin" & build}) | - ("system-msvc" & "winpthreads" & "ocaml-option-no-compression" {os = "win32"}))) | + ("arch-x86_32" {build & os = "win32"} & "ocaml-option-bytecode-only" {build & os = "win32"} & + (("system-mingw" {build} & "mingw-w64-shims" {os-distribution = "cygwin" & build}) | + ("system-msvc" {build} & "winpthreads" {os = "win32"} & "ocaml-option-no-compression" {build & os = "win32"}))) | # Non-Windows systems "host-system-other" {os != "win32" & post}) @@ -75,8 +75,10 @@ build: [ "--host=i686-w64-mingw32" {os-distribution = "cygwin" & system-mingw:installed & arch-x86_32:installed} "--prefix=%{prefix}%" "--docdir=%{doc}%/ocaml" + "--with-stublibs=../stublibs" {os != "win32"} + "--with-stublibs=..\\stublibs" {os = "win32"} "--with-flexdll=%{flexdll:share}%" {os = "win32" & flexdll:installed} - "--with-winpthreads-msvc=%{winpthreads:share}%" {system-msvc:installed} + "--with-winpthreads-msvc=%{winpthreads:share}%" {winpthreads:installed & system-msvc:installed} "-C" "--with-afl" {ocaml-option-afl:installed} "--disable-native-compiler" {ocaml-option-bytecode-only:installed} @@ -102,8 +104,9 @@ build: [ "--disable-warn-error" ] [make "-j%{jobs}%"] + [make "INSTALL_MODE=opam" "install"] ] -install: [make "install"] +install: ["sh" "./%{name}%-fixup.sh" prefix] depopts: [ "ocaml-option-32bit" "ocaml-option-afl" diff --git a/ocamldoc/odoc_man.ml b/ocamldoc/odoc_man.ml index c8a4d9bf9dfb..5d7f03f0ea91 100644 --- a/ocamldoc/odoc_man.ml +++ b/ocamldoc/odoc_man.ml @@ -901,7 +901,7 @@ class man = let b = new_buf () in bs b (".TH \""^cl.cl_name^"\" "); bs b !man_section ; - bs b (" "^Odoc_misc.current_date^" "); + bs b (" "^Odoc_misc.current_date()^" "); bs b "OCamldoc "; bs b ("\""^(match !Global.title with Some t -> t | None -> "")^"\"\n"); @@ -959,7 +959,7 @@ class man = let b = new_buf () in bs b (".TH \""^ct.clt_name^"\" "); bs b !man_section ; - bs b (" "^Odoc_misc.current_date^" "); + bs b (" "^Odoc_misc.current_date()^" "); bs b "OCamldoc "; bs b ("\""^(match !Global.title with Some t -> t | None -> "")^"\"\n"); @@ -1051,7 +1051,7 @@ class man = let b = new_buf () in bs b (".TH \""^mt.mt_name^"\" "); bs b !man_section ; - bs b (" "^Odoc_misc.current_date^" "); + bs b (" "^Odoc_misc.current_date()^" "); bs b "OCamldoc "; bs b ("\""^(match !Global.title with Some t -> t | None -> "")^"\"\n"); @@ -1133,7 +1133,7 @@ class man = let b = new_buf () in bs b (".TH \""^m.m_name^"\" "); bs b !man_section ; - bs b (" "^Odoc_misc.current_date^" "); + bs b (" "^Odoc_misc.current_date()^" "); bs b "OCamldoc "; bs b ("\""^(match !Global.title with Some t -> t | None -> "")^"\"\n"); @@ -1239,7 +1239,7 @@ class man = let b = new_buf () in bs b (".TH \""^name^"\" "); bs b !man_section ; - bs b (" "^Odoc_misc.current_date^" "); + bs b (" "^Odoc_misc.current_date()^" "); bs b "OCamldoc "; bs b ("\""^(match !Global.title with Some t -> t | None -> "")^"\"\n"); bs b ".SH NAME\n"; diff --git a/ocamldoc/odoc_misc.ml b/ocamldoc/odoc_misc.ml index 1e587af681b4..66878915a01e 100644 --- a/ocamldoc/odoc_misc.ml +++ b/ocamldoc/odoc_misc.ml @@ -247,14 +247,28 @@ let string_of_date ?(absolute=false) ?(hour=true) d = "" ) +let date_warning = ref (Fun.const ()) + let current_date = let time = - try - float_of_string (Sys.getenv "SOURCE_DATE_EPOCH") - with - Not_found -> Unix.time () + match Sys.getenv_opt "SOURCE_DATE_EPOCH" with + | None -> Unix.time () + | Some value -> + match Float.of_string_opt value with + | Some stamp -> stamp + | None -> + date_warning := (fun () -> + Odoc_global.pwarning + "The SOURCE_DATE_EPOCH environment variable could not be \ + parsed and has been ignored."; + date_warning := Fun.const ()); + Unix.time () in string_of_date ~absolute: true ~hour: false time +let current_date () = + (* Displays a warning the first time this is called if SOURCE_DATE_EPOCH was + set but could not be parsed. *) + !date_warning (); current_date let rec text_list_concat sep l = match l with diff --git a/ocamldoc/odoc_misc.mli b/ocamldoc/odoc_misc.mli index 86db543940d9..6cb927ec1f8a 100644 --- a/ocamldoc/odoc_misc.mli +++ b/ocamldoc/odoc_misc.mli @@ -66,7 +66,7 @@ val string_of_date : ?absolute:bool -> ?hour:bool -> float -> string (* Value returned by string_of_date for current time. * Uses environment variable SOURCE_DATE_EPOCH if set; falls back to * current timestamp otherwise. *) -val current_date : string +val current_date : unit -> string (** Return the first sentence (until the first dot) of a text. Don't stop in the middle of [Code], [Verbatim], [List], [Lnum], diff --git a/ocamltest/main.ml b/ocamltest/main.ml index e22acc24bc5b..9c0f9e8d61c2 100644 --- a/ocamltest/main.ml +++ b/ocamltest/main.ml @@ -251,7 +251,7 @@ let test_file test_filename = let reference_filename = Filename.concat test_source_directory (test_prefix ^ ".reference") in - let make = try Sys.getenv "MAKE" with Not_found -> "make" in + let make = Sys.getenv_with_default_value "MAKE" "make" in let initial_environment = Environments.from_bindings [ Builtin_variables.dev_null, "/dev/null"; diff --git a/ocamltest/ocaml_actions.ml b/ocamltest/ocaml_actions.ml index a9c62b4071dc..c1a05ab67f08 100644 --- a/ocamltest/ocaml_actions.ml +++ b/ocamltest/ocaml_actions.ml @@ -604,7 +604,9 @@ let mklib log env = Ocaml_commands.ocamlrun_ocamlmklib; "-ocamlc '" ^ ocamlc_command ^ "'"; "-o " ^ program - ] @ modules env in + ] @ (if Ocamltest_config.suffixing then ["-suffixed"] else []) + @ modules env + in let expected_exit_status = 0 in let exit_status = Actions_helpers.run_cmd diff --git a/ocamltest/ocaml_tests.ml b/ocamltest/ocaml_tests.ml index fbef17b375f7..4c19aa09afb8 100644 --- a/ocamltest/ocaml_tests.ml +++ b/ocamltest/ocaml_tests.ml @@ -46,7 +46,16 @@ let bytecode = check_program_output; ] @ (if not Sys.win32 && Ocamltest_config.native_compiler then - opt_build @ [compare_bytecode_programs] + (* If the compiler is configured using --with-relative-libdir then at + present we can't compare the bytecode programs because ocamlc.opt and + ocamlrun are at different levels in the build tree, but they're both + configured with the same relative directory path. + This problem will disappear when ocamltest runs the testsuite against a + compiler in an install-tree like way. *) + if Ocamltest_config.has_relative_libdir then + opt_build + else + opt_build @ [compare_bytecode_programs] else [] ) diff --git a/ocamltest/ocamltest_config.ml.in b/ocamltest/ocamltest_config.ml.in index c4c772b51496..1d0c2ca9764a 100644 --- a/ocamltest/ocamltest_config.ml.in +++ b/ocamltest/ocamltest_config.ml.in @@ -102,3 +102,7 @@ let instrumented_runtime = @instrumented_runtime@ let frame_pointers = @frame_pointers@ let tsan = @tsan@ + +let has_relative_libdir = @target_libdir_is_relative@ + +let suffixing = @suffixing@ diff --git a/ocamltest/ocamltest_config.mli b/ocamltest/ocamltest_config.mli index e5cd28e77478..789b2336370c 100644 --- a/ocamltest/ocamltest_config.mli +++ b/ocamltest/ocamltest_config.mli @@ -145,3 +145,10 @@ val frame_pointers : bool val tsan : bool (** Whether ThreadSanitizer support has been enabled at configure time *) + +val has_relative_libdir : bool +(** Whether the compiler has been configured using --with-relative-libdir *) + +val suffixing : bool +(** Whether C stub library filenames are being mangled with the Bytecode + Runtime ID and {!Config.target}. *) diff --git a/ocamltest/ocamltest_stdlib.ml b/ocamltest/ocamltest_stdlib.ml index 75f4ee146865..c38098e437fa 100644 --- a/ocamltest/ocamltest_stdlib.ml +++ b/ocamltest/ocamltest_stdlib.ml @@ -223,7 +223,9 @@ module Sys = struct Fun.protect ~finally:(fun () -> Sys.chdir oldcwd) f let getenv_with_default_value variable default_value = - try Sys.getenv variable with Not_found -> default_value + match Sys.getenv_opt variable with + | None | Some "" -> default_value + | Some value -> value let safe_getenv variable = getenv_with_default_value variable "" end diff --git a/otherlibs/Makefile b/otherlibs/Makefile index d76643bd297b..83dce3235d50 100644 --- a/otherlibs/Makefile +++ b/otherlibs/Makefile @@ -14,6 +14,7 @@ #************************************************************************** ROOTDIR=.. +SUBDIR_NAME=otherlibs include $(ROOTDIR)/Makefile.common # Although the OTHERLIBS variable is defined in ../Makefile.config, diff --git a/otherlibs/Makefile.otherlibs.common b/otherlibs/Makefile.otherlibs.common index f08dd13ef8cc..59fc68640331 100644 --- a/otherlibs/Makefile.otherlibs.common +++ b/otherlibs/Makefile.otherlibs.common @@ -16,6 +16,7 @@ # Common Makefile for otherlibs ROOTDIR=../.. +SUBDIR_NAME=otherlibs/$(LIBNAME) include $(ROOTDIR)/Makefile.common include $(ROOTDIR)/Makefile.best_binaries @@ -35,6 +36,9 @@ ifeq "$(FLAMBDA)" "true" OPTCOMPFLAGS += -O3 endif MKLIB=$(OCAMLRUN) $(ROOTDIR)/tools/ocamlmklib$(EXE) +ifeq "$(SUFFIXING)" "true" +MKLIB += -suffixed +endif # Variables that must be defined by individual libraries: # LIBNAME @@ -52,8 +56,13 @@ CAMLOBJS_NAT ?= $(CAMLOBJS:.cmo=.cmx) CLIBNAME ?= $(LIBNAME) ifeq "$(C_SOURCES)" "" -STUBSLIB= + +STUBSLIB_BYTECODE= +STUBSLIB_NATIVE= +STUBSDLL= + else + COBJS_BYTECODE = $(C_SOURCES:.c=.b.$(O)) COBJS_NATIVE = $(C_SOURCES:.c=.n.$(O)) COBJS = $(COBJS_BYTECODE) $(COBJS_NATIVE) @@ -62,6 +71,12 @@ CLIBNAME_BYTECODE=$(CLIBNAME)byt CLIBNAME_NATIVE=$(CLIBNAME)nat STUBSLIB_BYTECODE=lib$(CLIBNAME_BYTECODE).$(A) STUBSLIB_NATIVE=lib$(CLIBNAME_NATIVE).$(A) + +ifeq "$(SUFFIXING)" "true" +STUBSDLL=dll$(CLIBNAME_BYTECODE)-$(TARGET)-$(BYTECODE_RUNTIME_ID)$(EXT_DLL) +else +STUBSDLL=dll$(CLIBNAME_BYTECODE)$(EXT_DLL) +endif endif .PHONY: all allopt opt.opt # allopt and opt.opt are synonyms @@ -87,7 +102,7 @@ else endif $(LIBNAME).cmxs: $(LIBNAME).cmxa $(STUBSLIB_NATIVE) - $(V_OCAMLOPT)$(CAMLOPT) -shared -o $(LIBNAME).cmxs -I . $(LIBNAME).cmxa + $(V_OCAMLOPT)$(CAMLOPT) -shared -o $@ -I . $< $(addprefix -cclib , $(LDOPTS)) lib$(CLIBNAME_BYTECODE).$(A): $(COBJS) $(V_OCAMLMKLIB)$(MKLIB) -oc $(CLIBNAME_BYTECODE) $(COBJS_BYTECODE) $(LDOPTS) @@ -95,49 +110,37 @@ lib$(CLIBNAME_BYTECODE).$(A): $(COBJS) lib$(CLIBNAME_NATIVE).$(A): $(COBJS) $(V_OCAMLMKLIB)$(MKLIB) -oc $(CLIBNAME_NATIVE) $(COBJS_NATIVE) $(LDOPTS) -INSTALL_LIBDIR_LIBNAME = $(INSTALL_LIBDIR)/$(LIBNAME) - install:: - if test -f dll$(CLIBNAME_BYTECODE)$(EXT_DLL); then \ - $(INSTALL_PROG) \ - dll$(CLIBNAME_BYTECODE)$(EXT_DLL) "$(INSTALL_STUBLIBDIR)"; \ - fi ifneq "$(STUBSLIB_BYTECODE)" "" - $(INSTALL_DATA) $(STUBSLIB_BYTECODE) "$(INSTALL_LIBDIR)/" +ifeq "$(SUPPORTS_SHARED_LIBRARIES)" "true" + $(call INSTALL_ITEMS, $(STUBSDLL), stublibs) +endif + $(call INSTALL_ITEMS, $(STUBSLIB_BYTECODE), lib) endif # If installing over a previous OCaml version, ensure the library is removed # from the previous installation. - rm -f $(addprefix "$(INSTALL_LIBDIR)"/, \ - $(LIBNAME).cma $(CMIFILES) \ - $(CMIFILES:.cmi=.mli) $(CMIFILES:.cmi=.cmti) \ - $(CAMLOBJS_NAT) $(LIBNAME).cmxa $(LIBNAME).cmxs $(LIBNAME).$(A)) - $(MKDIR) "$(INSTALL_LIBDIR_LIBNAME)" - $(INSTALL_DATA) \ - $(LIBNAME).cma $(CMIFILES) META \ - "$(INSTALL_LIBDIR_LIBNAME)/" + $(call INSTALL_RM, \ + $(addprefix "$(INSTALL_LIBDIR)"/, \ + $(LIBNAME).cma $(CMIFILES) \ + $(CMIFILES:.cmi=.mli) $(CMIFILES:.cmi=.cmti) \ + $(CAMLOBJS_NAT) $(LIBNAME).cmxa $(LIBNAME).cmxs $(LIBNAME).$(A))) + $(call INSTALL_ITEMS, $(LIBNAME).cma $(CMIFILES) META, lib, $(LIBNAME)) ifeq "$(INSTALL_SOURCE_ARTIFACTS)" "true" - $(INSTALL_DATA) \ - $(CMIFILES:.cmi=.mli) \ - $(CMIFILES:.cmi=.cmti) \ - "$(INSTALL_LIBDIR_LIBNAME)/" + $(call INSTALL_ITEMS, $(CMIFILES:.cmi=.mli) $(CMIFILES:.cmi=.cmti), \ + lib, $(LIBNAME)) +endif +ifneq "$(HEADERS)" "" + $(call INSTALL_ITEMS, $(HEADERS), lib, $(INSTALL_LIBDIR_CAML)) endif - if test -n "$(HEADERS)"; then \ - $(INSTALL_DATA) $(HEADERS) "$(INSTALL_INCDIR)/"; \ - fi installopt: - $(INSTALL_DATA) \ - $(CAMLOBJS_NAT) $(LIBNAME).cmxa $(LIBNAME).$(A) \ - "$(INSTALL_LIBDIR_LIBNAME)/" - if test -f $(LIBNAME).cmxs; then \ - $(INSTALL_PROG) $(LIBNAME).cmxs "$(INSTALL_LIBDIR_LIBNAME)"; \ - fi - if test -f dll$(CLIBNAME_NATIVE)$(EXT_DLL); then \ - $(INSTALL_PROG) \ - dll$(CLIBNAME_NATIVE)$(EXT_DLL) "$(INSTALL_STUBLIBDIR)"; \ - fi + $(call INSTALL_ITEMS, \ + $(CAMLOBJS_NAT) $(LIBNAME).cmxa $(LIBNAME).$(A), lib, $(LIBNAME)) +ifeq "$(SUPPORTS_SHARED_LIBRARIES)" "true" + $(call INSTALL_ITEMS, $(LIBNAME).cmxs, libexec, $(LIBNAME)) +endif ifneq "$(STUBSLIB_NATIVE)" "" - $(INSTALL_DATA) $(STUBSLIB_NATIVE) "$(INSTALL_LIBDIR)/" + $(call INSTALL_ITEMS, $(STUBSLIB_NATIVE), lib) endif partialclean: diff --git a/otherlibs/dynlink/byte/dynlink.ml b/otherlibs/dynlink/byte/dynlink.ml index 714198bdeacd..4d5ce67dff1e 100644 --- a/otherlibs/dynlink/byte/dynlink.ml +++ b/otherlibs/dynlink/byte/dynlink.ml @@ -173,6 +173,8 @@ module Bytecode = struct (DT.Error (Library's_module_initializers_failed exn)) (Printexc.get_raw_backtrace ()) + external supports_shared_libraries : unit -> bool = "%shared_libraries" + let load ~filename:file_name ~priv = let ic = try open_in_bin file_name @@ -202,7 +204,11 @@ module Bytecode = struct let toc_pos = input_binary_int ic in (* Go to table of contents *) seek_in ic toc_pos; let lib = (input_value ic : library) in - Symtable.open_dlls lib.lib_dllibs; + if supports_shared_libraries () then + Symtable.open_dlls lib.lib_dllibs + else + raise (DT.Error (Cannot_open_dynamic_library (Failure + "loading shared libraries not supported by this runtime"))); handle, lib.lib_units end else begin raise (DT.Error (Not_a_bytecode_file file_name)) diff --git a/otherlibs/dynlink/byte/dynlink_symtable.ml b/otherlibs/dynlink/byte/dynlink_symtable.ml index 4b510b4faa73..60a8f8d27893 100644 --- a/otherlibs/dynlink/byte/dynlink_symtable.ml +++ b/otherlibs/dynlink/byte/dynlink_symtable.ml @@ -89,14 +89,24 @@ let primitives : (string, int) Hashtbl.t = Hashtbl.create 100 #52 "bytecomp/dll.ml" (* Extract the name of a DLLs from its external name (xxx.so or -lxxx) *) -let extract_dll_name file = - if Filename.check_suffix file Config.ext_dll then +let extract_dll_name (suffixed, file) = + if not suffixed && Filename.check_suffix file Config.ext_dll then Filename.chop_suffix file Config.ext_dll - else if String.length file >= 2 && String.sub file 0 2 = "-l" then - "dll" ^ String.sub file 2 (String.length file - 2) else - file (* will cause error later *) -#100 "otherlibs/dynlink/byte/dynlink_symtable.ml" + let file = + if String.starts_with ~prefix:"-l" file then + "dll" ^ String.sub file 2 (String.length file - 2) + else + file + in + if suffixed then +#104 "otherlibs/dynlink/byte/dynlink_symtable.ml" + (* This name must be in sync with Misc.RuntimeID.stubslib *) + Printf.sprintf "%s-%s-%s" file Config.target Config.bytecode_runtime_id +#66 "bytecomp/dll.ml" + else + file +#110 "otherlibs/dynlink/byte/dynlink_symtable.ml" (* Specialized version of [Dll.{open_dll,open_dlls,find_primitive}] for the execution mode. *) let open_dll name = @@ -232,19 +242,15 @@ let patch_object buff patchlist = #328 "bytecomp/symtable.ml" (* Functions for toplevel use *) -(* Update the in-core table of globals *) -#237 "otherlibs/dynlink/byte/dynlink_symtable.ml" -module Meta = struct -#16 "bytecomp/meta.ml" external global_data : unit -> Obj.t array = "caml_get_global_data" external realloc_global_data : int -> unit = "caml_realloc_global" -#242 "otherlibs/dynlink/byte/dynlink_symtable.ml" -end -#332 "bytecomp/symtable.ml" + +(* Update the in-core table of globals *) + let update_global_table () = let ng = !global_table.cnt in - if ng > Array.length(Meta.global_data()) then Meta.realloc_global_data ng; - let glob = Meta.global_data() in + if ng > Array.length(global_data()) then realloc_global_data ng; + let glob = global_data() in List.iter (fun (slot, cst) -> glob.(slot) <- cst) !literal_table; @@ -264,16 +270,16 @@ external get_bytecode_sections : unit -> bytecode_sections = let init_toplevel () = let sect = get_bytecode_sections () in global_table := sect.symb; -#268 "otherlibs/dynlink/byte/dynlink_symtable.ml" +#274 "otherlibs/dynlink/byte/dynlink_symtable.ml" Dll.init ~dllpaths:sect.dlpt ~prims:sect.prim; -#358 "bytecomp/symtable.ml" +#361 "bytecomp/symtable.ml" sect.crcs (* Find the value of a global identifier *) -#364 "bytecomp/symtable.ml" +#367 "bytecomp/symtable.ml" let get_global_value global = - (Meta.global_data()).(slot_for_getglobal global) -#369 "bytecomp/symtable.ml" + (global_data()).(slot_for_getglobal global) +#372 "bytecomp/symtable.ml" (* Check that all compilation units referenced in the given patch list have already been initialized *) @@ -314,17 +320,17 @@ let check_global_initialized patchlist = type global_map = GlobalMap.t let current_state () = !global_table -#412 "bytecomp/symtable.ml" +#415 "bytecomp/symtable.ml" let hide_additions (st : global_map) = if st.cnt > !global_table.cnt then -#321 "otherlibs/dynlink/byte/dynlink_symtable.ml" +#327 "otherlibs/dynlink/byte/dynlink_symtable.ml" failwith "Symtable.hide_additions"; -#415 "bytecomp/symtable.ml" +#418 "bytecomp/symtable.ml" global_table := {GlobalMap. cnt = !global_table.cnt; tbl = st.tbl } -#434 "bytecomp/symtable.ml" +#437 "bytecomp/symtable.ml" let is_defined_in_global_map (gmap : global_map) global = Global.Map.mem global gmap.tbl diff --git a/otherlibs/dynlink/byte/dynlink_symtable.mli b/otherlibs/dynlink/byte/dynlink_symtable.mli index 803451c2212f..94049b5cfac3 100644 --- a/otherlibs/dynlink/byte/dynlink_symtable.mli +++ b/otherlibs/dynlink/byte/dynlink_symtable.mli @@ -31,7 +31,7 @@ module Global : sig val description: Format.formatter -> t -> unit end -val open_dlls : string list -> unit +val open_dlls : (bool * string) list -> unit val patch_object: (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t -> diff --git a/otherlibs/dynlink/dynlink_config.ml.in b/otherlibs/dynlink/dynlink_config.ml.in index 231bda97d209..67b588024695 100644 --- a/otherlibs/dynlink/dynlink_config.ml.in +++ b/otherlibs/dynlink/dynlink_config.ml.in @@ -24,3 +24,7 @@ let ext_dll = "." ^ {@QS@|@SO@|@QS@} and cmo_magic_number = {magic|@CMO_MAGIC_NUMBER@|magic} and cma_magic_number = {magic|@CMA_MAGIC_NUMBER@|magic} and cmxs_magic_number = {magic|@CMXS_MAGIC_NUMBER@|magic} + +let bytecode_runtime_id = {@QS@|@bytecode_runtime_id@|@QS@} + +let target = {@QS@|@target@|@QS@} diff --git a/otherlibs/dynlink/dynlink_config.mli b/otherlibs/dynlink/dynlink_config.mli index ad44848ebc20..30dbdf4df6bf 100644 --- a/otherlibs/dynlink/dynlink_config.mli +++ b/otherlibs/dynlink/dynlink_config.mli @@ -21,3 +21,7 @@ val ext_dll: string val cmo_magic_number: string val cma_magic_number: string val cmxs_magic_number: string + +val bytecode_runtime_id: string + +val target : string diff --git a/otherlibs/runtime_events/runtime_events.mli b/otherlibs/runtime_events/runtime_events.mli index e7f87e0f27a7..a20cead3876e 100644 --- a/otherlibs/runtime_events/runtime_events.mli +++ b/otherlibs/runtime_events/runtime_events.mli @@ -34,16 +34,16 @@ The runtime events system's behaviour can be controlled by the following environment variables: - - OCAML_RUNTIME_EVENTS_START if set will cause the runtime events system - to be started as part of the OCaml runtime initialization. + - OCAML_RUNTIME_EVENTS_START if non-empty will cause the runtime events + system to be started as part of the OCaml runtime initialization. - OCAML_RUNTIME_EVENTS_DIR sets the directory where the runtime events ring buffers will be located. If not present the program's working directory will be used. - - OCAML_RUNTIME_EVENTS_PRESERVE if set will prevent the OCaml runtime from - removing its ring buffers when it terminates. This can help if monitoring - very short running programs. + - OCAML_RUNTIME_EVENTS_PRESERVE if non-empty will prevent the OCaml runtime + from removing its ring buffers when it terminates. This can help if + monitoring very short running programs. *) (** The type for counter events emitted by the runtime. Counter events are used diff --git a/otherlibs/systhreads/Makefile b/otherlibs/systhreads/Makefile index 64d41cc19d66..2b410b0724c1 100644 --- a/otherlibs/systhreads/Makefile +++ b/otherlibs/systhreads/Makefile @@ -14,6 +14,7 @@ #************************************************************************** ROOTDIR=../.. +SUBDIR_NAME=otherlibs/systhreads include $(ROOTDIR)/Makefile.common include $(ROOTDIR)/Makefile.best_binaries @@ -27,6 +28,12 @@ CAMLC=$(BEST_OCAMLC) $(LIBS) CAMLOPT=$(BEST_OCAMLOPT) $(LIBS) MKLIB=$(OCAMLRUN) $(ROOTDIR)/tools/ocamlmklib$(EXE) +ifeq "$(SUFFIXING)" "true" +MKLIB += -suffixed +DLLTHREADS = dllthreads-$(TARGET)-$(BYTECODE_RUNTIME_ID)$(EXT_DLL) +else +DLLTHREADS = dllthreads$(EXT_DLL) +endif COMPFLAGS=-w +33..39 -warn-error +A -g -bin-annot ifeq "$(FLAMBDA)" "true" OPTCOMPFLAGS += -O3 @@ -39,9 +46,6 @@ LIBNAME=threads # That's why this dependency is handled in the Makefile directly # and removed from the output of the C compiler during make depend -BYTECODE_C_OBJS=st_stubs.b.$(O) -NATIVECODE_C_OBJS=st_stubs.n.$(O) - THREADS_SOURCES = thread.ml event.ml THREADS_BCOBJS = $(THREADS_SOURCES:.ml=.cmo) @@ -55,15 +59,20 @@ all: lib$(LIBNAME).$(A) $(LIBNAME).cma $(CMIFILES) allopt: lib$(LIBNAME)nat.$(A) $(LIBNAME).cmxa $(CMIFILES) +ifeq "$(NATDYNLINK)" "true" +allopt: $(LIBNAME).cmxs +endif + lib$(LIBNAME).$(A): OC_CFLAGS = $(OC_BYTECODE_CFLAGS) -lib$(LIBNAME).$(A): $(BYTECODE_C_OBJS) - $(V_OCAMLMKLIB)$(MKLIB) -o $(LIBNAME) $(BYTECODE_C_OBJS) +lib$(LIBNAME).$(A): st_stubs.b.$(O) st_stubs_shared.b.$(O) + @$(MKLIB) -o $(LIBNAME) st_stubs_shared.b.$(O) + $(V_OCAMLMKLIB)$(MKLIB) -custom -o $(LIBNAME) $< lib$(LIBNAME)nat.$(A): OC_CFLAGS = $(OC_NATIVE_CFLAGS) -lib$(LIBNAME)nat.$(A): $(NATIVECODE_C_OBJS) - $(V_OCAMLMKLIB)$(MKLIB) -o $(LIBNAME)nat $^ +lib$(LIBNAME)nat.$(A): st_stubs.n.$(O) + $(V_OCAMLMKLIB)$(MKLIB) -custom -o $(LIBNAME)nat $^ $(LIBNAME).cma: $(THREADS_BCOBJS) $(V_OCAMLMKLIB)$(MKLIB) -o $(LIBNAME) -ocamlc '$(CAMLC)' -linkall $^ @@ -72,14 +81,20 @@ $(LIBNAME).cma: $(THREADS_BCOBJS) $(LIBNAME).cmxa: $(THREADS_NCOBJS) $(V_LINKOPT)$(CAMLOPT) -linkall -a -cclib -lthreadsnat -o $@ $^ -# The following lines produce two object files st_stubs.b.$(O) and -# st_stubs.n.$(O) from the same source file st_stubs.c (it is compiled -# twice, each time with different options). +st_stubs_shared.n.$(O): OC_CFLAGS = $(OC_NATIVE_CFLAGS) + +$(LIBNAME).cmxs: $(THREADS_NCOBJS) st_stubs_shared.n.$(O) + $(V_LINKOPT)$(CAMLOPT) -linkall -shared -o $@ $^ + +# The following lines produce object files based on st_stubs.c. Four objects are +# produced - a static and shared version in both bytecode and native versions. + +st_stubs_shared.%.$(O): OC_CPPFLAGS += -DSYSTHREADS_SHARED ifeq "$(COMPUTE_DEPS)" "true" -st_stubs.%.$(O): st_stubs.c +st_stubs%.$(O): st_stubs.c else -st_stubs.%.$(O): st_stubs.c $(RUNTIME_HEADERS) $(wildcard *.h) +st_stubs%.$(O): st_stubs.c $(RUNTIME_HEADERS) $(wildcard *.h) endif $(V_CC)$(CC) $(OC_CFLAGS) $(CFLAGS) $(OC_CPPFLAGS) $(CPPFLAGS) \ $(OUTPUTOBJ)$@ -c $< @@ -97,30 +112,25 @@ clean: partialclean distclean: clean rm -f META -INSTALL_THREADSLIBDIR=$(INSTALL_LIBDIR)/$(LIBNAME) - install: - if test -f dllthreads$(EXT_DLL); then \ - $(INSTALL_PROG) dllthreads$(EXT_DLL) "$(INSTALL_STUBLIBDIR)"; \ - fi - $(INSTALL_DATA) libthreads.$(A) "$(INSTALL_LIBDIR)" - $(MKDIR) "$(INSTALL_THREADSLIBDIR)" - $(INSTALL_DATA) \ - $(CMIFILES) threads.cma META \ - "$(INSTALL_THREADSLIBDIR)" +ifeq "$(SUPPORTS_SHARED_LIBRARIES)" "true" + $(call INSTALL_ITEMS, $(DLLTHREADS), stublibs) +endif + $(call INSTALL_ITEMS, libthreads.$(A), lib) + $(call INSTALL_ITEMS, $(CMIFILES) threads.cma META, lib, $(LIBNAME)) ifeq "$(INSTALL_SOURCE_ARTIFACTS)" "true" - $(INSTALL_DATA) \ - $(CMIFILES:.cmi=.cmti) \ - "$(INSTALL_THREADSLIBDIR)" - $(INSTALL_DATA) $(MLIFILES) "$(INSTALL_THREADSLIBDIR)" + $(call INSTALL_ITEMS, $(CMIFILES:.cmi=.cmti), lib, $(LIBNAME)) + $(call INSTALL_ITEMS, $(MLIFILES), lib, $(LIBNAME)) endif - $(INSTALL_DATA) caml/threads.h "$(INSTALL_INCDIR)" + $(call INSTALL_ITEMS, caml/threads.h, lib, $(INSTALL_LIBDIR_CAML)) installopt: - $(INSTALL_DATA) libthreadsnat.$(A) "$(INSTALL_LIBDIR)" - $(INSTALL_DATA) \ - $(THREADS_NCOBJS) threads.cmxa threads.$(A) \ - "$(INSTALL_THREADSLIBDIR)" + $(call INSTALL_ITEMS, libthreadsnat.$(A), lib) + $(call INSTALL_ITEMS, $(THREADS_NCOBJS) threads.cmxa threads.$(A), \ + lib, $(LIBNAME)) +ifeq "$(NATDYNLINK)" "true" + $(call INSTALL_ITEMS, $(LIBNAME).cmxs, libexec, $(LIBNAME)) +endif %.cmi: %.mli $(V_OCAMLC)$(CAMLC) -c $(COMPFLAGS) $< diff --git a/otherlibs/systhreads/st_stubs.c b/otherlibs/systhreads/st_stubs.c index 971c8992c949..b18c75a93182 100644 --- a/otherlibs/systhreads/st_stubs.c +++ b/otherlibs/systhreads/st_stubs.c @@ -19,16 +19,13 @@ any reason. In mingw-w64 13.0.0, a subtle change meant that time.h causes pthread_compat.h to be read. For this reason, this next block must appear before anything headers are included. */ -#if defined(_WIN32) && !defined(NATIVE_CODE) && !defined(_MSC_VER) +#if defined(_WIN32) && defined(SYSTHREADS_SHARED) /* Ensure that pthread.h marks symbols __declspec(dllimport) so that they can be picked up from the runtime (which will have linked winpthreads statically). mingw-w64 11.0.0 introduced WINPTHREADS_USE_DLLIMPORT to do this explicitly; prior versions co-opted this on the internal DLL_EXPORT, but this is ignored in 11.0 and later unless IN_WINPTHREAD is also defined, so we can safely - define both to support both versions. - When compiling with MSVC, we currently link directly the winpthreads objects - into our runtime, so we do not want to mark its symbols with - __declspec(dllimport). */ + define both to support both versions. */ #define WINPTHREADS_USE_DLLIMPORT #define DLL_EXPORT #endif diff --git a/otherlibs/unix/Makefile b/otherlibs/unix/Makefile index 9cedb6f36370..d64ca05d4bab 100644 --- a/otherlibs/unix/Makefile +++ b/otherlibs/unix/Makefile @@ -35,9 +35,7 @@ unixLabels.cmi: \ EXTRACAMLFLAGS += -pp "$(AWK) -f $(ROOTDIR)/stdlib/expand_module_aliases.awk" ifeq "$(UNIX_OR_WIN32)" "win32" -WIN32_LIBS=$(call SYSLIB,ws2_32) $(call SYSLIB,advapi32) -LINKOPTS=$(addprefix -cclib ,$(WIN32_LIBS)) -LDOPTS=$(addprefix -ldopt ,$(WIN32_LIBS)) +LDOPTS=-lws2_32 -ladvapi32 else # Unix # dllunix.so particularly requires libm for modf symbols LDOPTS=$(NATIVECCLIBS) diff --git a/otherlibs/unix/unix_win32.ml b/otherlibs/unix/unix_win32.ml index ee4b6cd37ca9..4a351882d3c5 100644 --- a/otherlibs/unix/unix_win32.ml +++ b/otherlibs/unix/unix_win32.ml @@ -655,7 +655,7 @@ type group_entry = gr_gid : int; gr_mem : string array } -let getlogin () = try Sys.getenv "USERNAME" with Not_found -> "" +let getlogin () = Option.value (Sys.getenv_opt "USERNAME") ~default:"" let getpwnam _x = raise Not_found let getgrnam = getpwnam let getpwuid = getpwnam @@ -1115,8 +1115,9 @@ let open_process_args_full prog args = let open_process_shell fn cmd = let shell = - try Sys.getenv "COMSPEC" - with Not_found -> raise(Unix_error(ENOEXEC, "open_process_shell", cmd)) in + match Sys.getenv_opt "COMSPEC" with + | None | Some "" -> raise(Unix_error(ENOEXEC, "open_process_shell", cmd)) + | Some shell -> shell in fn shell (shell ^ " /c " ^ cmd) let open_process_in cmd = open_process_shell open_process_cmdline_in cmd diff --git a/runtime/Mangling.md b/runtime/Mangling.md new file mode 100644 index 000000000000..84a0a52bfac7 --- /dev/null +++ b/runtime/Mangling.md @@ -0,0 +1,135 @@ +# Filename Mangling + +## Background + +OCaml compiler installations exist in isolation. When running the compiler, it +is assumed that the caller will have configured the environment of the compiler +such that files and settings related to other compiler installations will not +interfere. + +This is not true of the runtime. Shared libraries are loaded from a global +namespace (dynamically loaded bytecode stub libraries and the shared versions of +both the native and bytecode runtimes) and programs may be searched in a global +PATH. To allow programs compiled against different coinstalled versions of the +runtime to be executed, a name mangling scheme is used for the runtime's +executables and shared libraries. + +## Filename Mangling + +Filenames are mangled using one or both of two pieces of configuration +information. The first is the standard "autoconf" triplet on which the runtime +executes (e.g. `x86_64-pc-linux-gnu`). The other is a summary of the runtime +version and configuration called the Runtime ID. This information is a series of +bits encoded in base32 using the alphabet `[0-9a-v]` and with the quintets laid +out little-endian. + +Mangling is applied to the name of any file which will be searched for at +runtime: + +- `ocamlrun` (and variants) are triplet-prefixed and Bytecode-suffixed. For + example, `x86_64-pc-linux-gnu-ocamlrun-a140` is OCaml 5.5 configured with + `--disable-flat-float-array` on 64-bit Intel/AMD Linux. A symbolic link is + still created for `ocamlrun` pointing to this mangled name. Additionally, a + symbolic link is also created for `ocamlrun-a140`, using the Zinc-suffix. +- C stub libraries loaded by both the bytecode runtime and bytecode `Dynlink` + library are triplet- and Bytecode-suffixed. For example, + `dllunixbyt-x86_64-pc-linux-gnu-a140.so` contains the C stubs for the Unix + library for OCaml 5.5 configured with `--disable-flat-float-array` on 64-bit + Intel/AMD Linux. +- Shared versions of the bytecode and native runtimes (`libcamlrun_shared.so` + and `libasmrun_shared.so`) are triplet- and Bytecode/Native-suffixed + respectively. For example, `libasmrun-x86_64-pc-linux-gnu-a1k0.so` and + `libcamlrun-x86_64-pc-linux-gnu-a140.so` are OCaml 5.5 configured with + `--disable-flat-float-array` and `--enable-tsan` on 64-bit Intel/AMD Linux + (note the **tsan** bit not being set for the name of libcamlrun). + Additionally, symbolic links are also created for `libasmrun_shared.so` and + `libcamlrun_shared.so`. + +## Runtime ID + +A Runtime ID is a bit string describing a given OCaml runtime. At present, +20 bits are used, but the format is intended to be trivially extensible. +Ultimately, the only requirement is that each version and configuration +generates some kind of unique identifier which can then be used in filenames. + +- Bit 0 (**dev**): Development bit. This should be set for development versions + of OCaml or for customised compilers. If it is not set, the compiler should be + an unaltered official release. +- Bits 1-6 (**release**): OCaml release number. This is incremented for each + minor release of the compiler, with OCaml 3.12.0[^1] being release 0. At + present, the ordering of release numbers matches the semantic ordering of the + version numbers, but this is not guaranteed and should not be assumed[^2]. +- Bits 7-11 (**reserved**): Number of reserved header bits. This is the value + passed to `--enable-reserved-header-bits` when the compiler distribution was + configured. +- Bit 12 (**no-flat-float-array**): Set if the compiler distribution was + configured with `--disable-flat-float-array`. +- Bit 13 (**fp**): Set if the compiler distribution was configured with + `--enable-frame-pointers`. Affects the **native** runtime only. +- Bit 14 (**tsan**): For OCaml 5.2 onwards, set if the compiler distribution was + configured with `--enable-tsan`. Prior to OCaml 5.2, set if the compiler + distribution was configured with `--enable-spacetime` (this option was removed + in OCaml 4.12, meaning this bit is always unset for OCaml 4.12-5.1). Affects + the **native** runtime only. +- Bit 15 (**int31**): Set if the runtime uses 31-bit `int` values (i.e. runtimes + running on 32-bit systems). +- Bit 16 (**static**): Set if the runtime does not support shared libraries, + meaning dynamic loading of C code is not supported in bytecode, and native + dynlink is not supported at all. +- Bit 17 (**no-compression**): For OCaml 5.1 onwards, set if the runtime does + not support compressed marshalling. Prior to OCaml 5.1, set if the compiler + distribution was configured with `--enable-naked-pointers` (this bit was + always unset for OCaml 5.0, since it supports neither naked pointers nor + compressed marshalling). +- Bit 18 (**ansi**): Set if the compiler distribution was configured with the + legacy support `WINDOWS_UNICODE=ansi`. +- Bit 19 (**mutable-string**): Set if the compiler distribution was configured + with `--disable-force-safe-string`. This option was removed in OCaml 5.0, and + the bit is available for re-use. When this bit is unset, strings are + guaranteed to be immutable. + +The bit descriptions are designed such that the default configuration of the +latest version of the compiler has unset bits. The ordering of the bits is +designed to mean ID values in the same version of OCaml will usually have the +same opening sequence of characters (since `--enable-reserved-header-bits` is +now rarely used) and laying out the characters little-endian in the mangling +scheme means that the opening two characters of the Runtime ID define its +version (and consequently its length, should that change in future). + +[^1]: OCaml 3.12.0 was the first version where `ocamlrun` supported the `-vnum` +argument; the original author had a fantasy of backporting the scheme to the +entire 4.x series, but following some therapy stopped at 4.08. The release +numbering persists to allow for future madness. +[^2]: In particular, should there be any additional releases in the OCaml 4.x +series, these will have higher release numbers than releases already made in the +OCaml 5.x series. + +## Masks + +A particular configuration of the compiler has one Runtime ID, but this is used +in three different contexts where certain bits are masked out: + +1. _Bytecode Mask_: masks out bits which are only ever set by the native runtime + (at present, **fp** and **tsan**). +2. _Native Mask_: masks out bits which are only ever set by the bytecode runtime + (at present there aren't any). +3. _Zinc Mask_: masks out bits which are not related to bytecode portability. + Where the _Bytecode_ and _Native_ masks relate to _runtimes_, the _Zinc_ mask + relates to _bytecode images_. The Zinc ID therefore includes: + - **release** and **dev** (a given bytecode image targets a specific version + of OCaml) + - **no-flat-float-array** (code compiled assuming that float arrays are boxed + will segfault on runtimes which unbox them) + - **int31**, **static**, and **no-compression** (a bytecode image using + 63-bit integers, dynamically loaded C stubs and compressed marshalling will + be rejected by an interpreter which doesn't support any of these features) + +Note that the inclusion of a bit in a mask is determined by whether that +property affects the ability to load and execute the code, rather than whether +it is semantically affected by it. For example, the **reserved** bits affects +the value representation, and therefore both runtimes. It does not directly +affect bytecode (although a bytecode program may use unsafe features to observe +it). **reserved** is therefore part of both the _Bytecode_ and _Native Masks_, +but not part of the _Zinc Mask_. Similarly, although **no-flat-float-array** +affects code generation for bytecode, **mutable-string** never did, and so would +not be included in the _Zinc Mask_. diff --git a/runtime/backtrace_byt.c b/runtime/backtrace_byt.c index 13d2c7591b88..790e8f40620d 100644 --- a/runtime/backtrace_byt.c +++ b/runtime/backtrace_byt.c @@ -50,8 +50,6 @@ /* The table of debug information fragments */ struct ext_table caml_debug_info; -CAMLexport char_os * caml_cds_file = NULL; - /* Location of fields in the Instruct.debug_event record */ enum { EV_POS = 0, @@ -443,7 +441,7 @@ static void read_main_debug_info(struct debug_info *di) { CAMLparam0(); CAMLlocal3(events, evl, l); - char_os *exec_name; + const char_os *exec_name; int fd, num_events, orig; struct channel *chan; struct exec_trailer trail; @@ -451,21 +449,21 @@ static void read_main_debug_info(struct debug_info *di) CAMLassert(di->already_read == 0); di->already_read = 1; - /* At the moment, bytecode programs built with --output-complete-exe + /* At the moment, bytecode programs built with -output-complete-exe do not contain any debug info. See https://github.com/ocaml/ocaml/issues/9344 for details. */ - if (caml_params->cds_file == NULL && caml_byte_program_mode == COMPLETE_EXE) + if (caml_params->cds_file == NULL && caml_byte_program_mode == EMBEDDED) CAMLreturn0; if (caml_params->cds_file != NULL) { - exec_name = (char_os*) caml_params->cds_file; + exec_name = caml_params->cds_file; } else { - exec_name = (char_os*) caml_params->exe_name; + exec_name = caml_params->exe_name; } - fd = caml_attempt_open(&exec_name, &trail, 1); + fd = caml_attempt_open(exec_name, &trail, 1); if (fd < 0) { /* Record the failure of caml_attempt_open in di->already-read */ di->already_read = fd; diff --git a/runtime/caml/backtrace.h b/runtime/caml/backtrace.h index 40bbb637d6f7..e5501eb815ef 100644 --- a/runtime/caml/backtrace.h +++ b/runtime/caml/backtrace.h @@ -111,9 +111,6 @@ CAMLextern void caml_record_backtraces(int); #ifndef NATIVE_CODE -/* Path to the file containing debug information, if any, or NULL. */ -CAMLextern char_os * caml_cds_file; - /* Primitive called _only_ by runtime to record unwinded frames to * backtrace. A similar primitive exists for native code, but with a * different prototype. */ diff --git a/runtime/caml/dynlink.h b/runtime/caml/dynlink.h index 016a35cc72b6..a7441fcc0e4f 100644 --- a/runtime/caml/dynlink.h +++ b/runtime/caml/dynlink.h @@ -41,11 +41,10 @@ extern void caml_build_primitive_table_builtin(void); /* Unload all the previously loaded shared libraries */ extern void caml_free_shared_libs(void); -/* Return the effective location of the standard library */ -extern const char_os * caml_get_stdlib_location(void); - -/* Parse ld.conf and add the lines read to caml_shared_libs_path */ -extern char_os * caml_parse_ld_conf(void); +/* If found, parse $OCAMLLIB/ld.conf, $CAMLLIB/ld.conf and stdlib/ld.conf in + that order and add the lines read to table. */ +extern char_os * caml_parse_ld_conf(const char_os * stdlib, + struct ext_table * table); #endif /* CAML_INTERNALS */ diff --git a/runtime/caml/gc_ctrl.h b/runtime/caml/gc_ctrl.h index af160f3f4761..97e540d4d6cf 100644 --- a/runtime/caml/gc_ctrl.h +++ b/runtime/caml/gc_ctrl.h @@ -18,6 +18,8 @@ #ifdef CAML_INTERNALS +#include + #include "misc.h" CAMLextern atomic_uintnat caml_max_stack_wsize; @@ -28,6 +30,7 @@ void caml_init_gc (void); value caml_gc_stat(value); value caml_gc_major(value); +extern atomic_bool caml_runtime_randomized; #define caml_stat_top_heap_wsz caml_top_heap_words(Caml_state->shared_heap) #define caml_stat_compactions 0 diff --git a/runtime/caml/misc.h b/runtime/caml/misc.h index 0f8f7f0f0719..d2b451de12fa 100644 --- a/runtime/caml/misc.h +++ b/runtime/caml/misc.h @@ -524,6 +524,7 @@ extern double caml_log1p(double); #define strlen_os wcslen #define sscanf_os swscanf #define strcpy_os wcscpy +#define strdup_os wcsdup #define mktemp_os _wmktemp #define fopen_os _wfopen @@ -569,6 +570,7 @@ extern double caml_log1p(double); #define strlen_os strlen #define sscanf_os sscanf #define strcpy_os strcpy +#define strdup_os strdup #define mktemp_os mktemp #define fopen_os fopen diff --git a/runtime/caml/osdeps.h b/runtime/caml/osdeps.h index 0def271da2bd..cfc38575ffc9 100644 --- a/runtime/caml/osdeps.h +++ b/runtime/caml/osdeps.h @@ -96,6 +96,21 @@ void *caml_plat_mem_commit(void *, uintnat); void caml_plat_mem_decommit(void *, uintnat); void caml_plat_mem_unmap(void *, uintnat); +/* caml_locate_standard_library returns the location of the Standard Library. + The location returned is absolute, if stdlib_default is a relative path then + the result is computed relative to the directory portion of exe_name. + + If dirname is not NULL and stdlib_default is a relative path, a copy of the + directory name part of exe_name is returned. If stdlib_default is an absolute + path, dirname is never changed. + + Both strings are allocated with [caml_stat_alloc], so should be freed using + [caml_stat_free]. +*/ +CAMLextern char_os *caml_locate_standard_library (const char_os *exe_name, + const char_os *stdlib_default, + char_os **dirname); + #ifdef _WIN32 #include @@ -138,6 +153,20 @@ CAMLextern value caml_win32_xdg_defaults(void); CAMLextern value caml_win32_get_temp_path(void); +#define CAML_DIR_SEP T("\\") +#define Is_separator(c) (c == '\\' || c == '/') +#define EXT_DLL L".dll" + +#else + +#define CAML_DIR_SEP T("/") +#define Is_separator(c) (c == '/') +#define EXT_DLL ".so" + +/* As caml_search_exe_in_path, but returns NULL if the file cannot be found in + any of the directories specified in PATH. Used by stdlib/header.c */ +caml_stat_string caml_search_in_system_path(const char *); + #endif /* _WIN32 */ /* Returns the current value of a counter that increments once per nanosecond. @@ -151,6 +180,18 @@ CAMLextern uint64_t caml_time_counter(void); extern void caml_init_os_params(void); +/* True if: + - dir equals "." + - dir equals ".." + - dir begins "./" + - dir begins "../" + The tests for null avoid the need to call strlen_os. */ +#define Is_relative_dir(dir) \ + (dir[0] == '.' \ + && (dir[1] == 0 \ + || Is_separator(dir[1]) \ + || (dir[1] == '.' && (dir[2] == 0 || Is_separator(dir[2]))))) + #endif /* CAML_INTERNALS */ #ifdef _WIN32 diff --git a/runtime/caml/s.h.in b/runtime/caml/s.h.in index 5a6582615abb..aa05e8633479 100644 --- a/runtime/caml/s.h.in +++ b/runtime/caml/s.h.in @@ -72,6 +72,8 @@ #undef HAS_TIMES +#undef HAS_STRLCPY + #undef HAS_SECURE_GETENV #undef HAS___SECURE_GETENV @@ -116,6 +118,10 @@ #undef HAS_DECL_SETTHREADDESCRIPTION +#undef HAS_LIBGEN_H + +/* Define HAS_LIBGEN_H if you have /usr/include/libgen.h. */ + #undef HAS_DIRENT /* Define HAS_DIRENT if you have /usr/include/dirent.h and the result of diff --git a/runtime/caml/startup.h b/runtime/caml/startup.h index 49fc5b9d77b4..8e98ac66a302 100644 --- a/runtime/caml/startup.h +++ b/runtime/caml/startup.h @@ -39,7 +39,7 @@ CAMLextern value caml_startup_code_exn( /* These enum members should all be negative */ enum { FILE_NOT_FOUND = -1, BAD_BYTECODE = -2, WRONG_MAGIC = -3, NO_FDS = -4 }; -extern int caml_attempt_open(char_os **name, struct exec_trailer *trail, +extern int caml_attempt_open(const char_os *name, struct exec_trailer *trail, int do_open_script); extern int caml_read_trailer(int fd, struct exec_trailer *trail); extern void caml_read_section_descriptors(int fd, struct exec_trailer *trail); @@ -48,14 +48,19 @@ extern int32_t caml_seek_optional_section(int fd, struct exec_trailer *trail, extern int32_t caml_seek_section(int fd, struct exec_trailer *trail, const char *name); -enum caml_byte_program_mode - { - STANDARD /* normal bytecode program requiring "ocamlrun" */, - COMPLETE_EXE /* embedding the vm, i.e. compiled with --output-complete-exe */ - }; +enum caml_byte_program_mode { + STANDARD, /* Default mode for ocamlrun */ + APPENDED, /* bytecode must be appended (i.e. -custom) */ + EMBEDDED /* bytecode embedded in C (e.g. -output-complete-exe/-output-obj) */ +}; extern enum caml_byte_program_mode caml_byte_program_mode; +/* The default location of the Standard Library as used by the runtime to find + ld.conf */ +extern const char_os *caml_runtime_standard_library_default; +extern const char_os *caml_runtime_standard_library_effective; + #endif /* CAML_INTERNALS */ #endif /* CAML_STARTUP_H */ diff --git a/runtime/caml/startup_aux.h b/runtime/caml/startup_aux.h index d0a44affbdf0..050a554988b0 100644 --- a/runtime/caml/startup_aux.h +++ b/runtime/caml/startup_aux.h @@ -43,8 +43,6 @@ struct caml_params { uintnat trace_level; uintnat runtime_events_log_wsize; uintnat verify_heap; - uintnat print_magic; - uintnat print_config; uintnat init_percent_free; uintnat init_minor_heap_wsz; @@ -62,6 +60,8 @@ struct caml_params { extern const struct caml_params* const caml_params; +extern const char_os *caml_executable_ocamlrunparam; + extern void caml_parse_ocamlrunparam (void); /* Common entry point to caml_startup. diff --git a/runtime/caml/sys.h b/runtime/caml/sys.h index 8f6afb9af73c..d4bb93161da0 100644 --- a/runtime/caml/sys.h +++ b/runtime/caml/sys.h @@ -35,6 +35,10 @@ CAMLextern void caml_sys_init (const char_os * proc_self_exe, CAMLnoret CAMLextern void caml_do_exit (int); +/* The default location of the Standard Library as used by the + %standard_library_default to find */ +extern char_os *caml_standard_library_default; + #endif /* CAML_INTERNALS */ #endif /* CAML_SYS_H */ diff --git a/runtime/caml/version.h.in b/runtime/caml/version.h.in index bfe5d70957dc..66c7611bc4a5 100644 --- a/runtime/caml/version.h.in +++ b/runtime/caml/version.h.in @@ -22,3 +22,4 @@ #undef OCAML_VERSION_EXTRA #undef OCAML_VERSION #undef OCAML_VERSION_STRING +#undef OCAML_RELEASE_NUMBER diff --git a/runtime/dynlink.c b/runtime/dynlink.c index d3e6b1b5345e..b21b124125e1 100644 --- a/runtime/dynlink.c +++ b/runtime/dynlink.c @@ -43,23 +43,30 @@ #include "build_config.h" -#ifndef NATIVE_CODE - #ifndef O_BINARY #define O_BINARY 0 #endif +#ifndef NATIVE_CODE + /* The table of primitives */ struct ext_table caml_prim_table; /* The names of primitives */ struct ext_table caml_prim_name_table; +#ifndef DEBUG +/* The buffer for the strings in caml_prim_name_table */ +static char *prim_names = NULL; +#endif /* The table of shared libraries currently opened */ static struct ext_table shared_libs; /* The search path for shared libraries */ struct ext_table caml_shared_libs_path; +/* Buffers under-pinning caml_shared_libs_path */ +static char_os *shared_libs_buffer1 = NULL; +static char_os *shared_libs_buffer2 = NULL; /* Look up the given primitive name in the built-in primitive table, then in the opened shared libraries (shared_libs) */ @@ -78,24 +85,49 @@ static c_primitive lookup_primitive(const char * name) return NULL; } +#endif /* NATIVE_CODE */ + /* Parse the ld.conf file and add the directories listed there to the search path */ #define LD_CONF_NAME T("ld.conf") -CAMLexport const char_os * caml_get_stdlib_location(void) +/* Return a copy of [path], interpreting explicit-relative paths relative to + [root]. [root] must not end with a directory separator. The result of this + function can never be ".", ".." or a path beginning "./" or "../". Note that + the function does not necessary canonicalise the path. */ +static char_os *make_relative_path_absolute(char_os *path, char_os *root) { - const char_os * stdlib; - stdlib = caml_secure_getenv(T("OCAMLLIB")); - if (stdlib == NULL) stdlib = caml_secure_getenv(T("CAMLLIB")); - if (stdlib == NULL) stdlib = OCAML_STDLIB_DIR; - return stdlib; + if (path[0] == '.') { + if (path[1] == '\0') { + /* path is exactly "." => return root */ + return caml_stat_strdup_os(root); + } else if (Is_separator(path[1])) { + /* path is exactly "./" or begins "./". In both cases, replace the "." + with root */ + return caml_stat_strconcat_os(2, root, (path + 1)); + } else if (path[1] == '.' && (path[2] == '\0' || Is_separator(path[2]))) { + /* path is either exactly ".." or begins "../" => prefix it with root + (which has no trailing separator) */ + return caml_stat_strconcat_os(3, root, CAML_DIR_SEP, path); + } else { + /* path is not relative, but simply begins with a dot => return a copy */ + return caml_stat_strdup_os(path); + } + } else { + /* path is not relative => return a copy */ + return caml_stat_strdup_os(path); + } } -CAMLexport char_os * caml_parse_ld_conf(void) +CAMLexport char_os * caml_parse_ld_conf(const char_os * stdlib, + struct ext_table *table) { - const char_os * stdlib; - char_os * ldconfname, * wconfig, * p, * q; + const char_os * const locations[3] = { + caml_secure_getenv(T("OCAMLLIB")), + caml_secure_getenv(T("CAMLLIB")), + stdlib}; + char_os * libroot, * ldconfname, * wconfig, * p, * q; char * config; #ifdef _WIN32 struct _stati64 st; @@ -103,48 +135,140 @@ CAMLexport char_os * caml_parse_ld_conf(void) struct stat st; #endif int ldconf, nread; + size_t length = 0; + struct ext_table entries; + + /* Use a temporary ext_table to hold the individually-allocated entries */ + caml_ext_table_init(&entries, 8); + for (int i = 0; i < sizeof(locations) / sizeof(locations[0]); i++) { + if (locations[i] != NULL && *locations[i] != '\0') { + libroot = caml_stat_strdup_os(locations[i]); + size_t libroot_length = strlen_os(libroot); + if (libroot_length > 1 && Is_separator(libroot[libroot_length - 1])) + libroot[libroot_length - 1] = '\0'; + ldconfname = + caml_stat_strconcat_os(3, libroot, CAML_DIR_SEP, LD_CONF_NAME); + if (stat_os(ldconfname, &st) == -1) { + caml_stat_free(ldconfname); + caml_stat_free(libroot); + continue; + } + ldconf = open_os(ldconfname, O_RDONLY | O_BINARY, 0); + if (ldconf == -1) + caml_fatal_error("cannot read loader config file %s", + caml_stat_strdup_of_os(ldconfname)); + config = caml_stat_alloc(st.st_size + 1); + nread = read(ldconf, config, st.st_size); + if (nread == -1) + caml_fatal_error + ("error while reading loader config file %s", + caml_stat_strdup_of_os(ldconfname)); + close(ldconf); + config[nread] = 0; + wconfig = caml_stat_strdup_to_os(config); + caml_stat_free(config); + caml_stat_free(ldconfname); + + p = wconfig; + while (*p != '\0') { + for (q = p; *q != '\0' && *q != '\n'; q++) /*nothing*/; + char_os *r = q; + if (*q == '\n') { + r++; + /* Ignore any trailing CR characters, so that CR*LF is uniformly + treated as a single LF. */ + while (q > p && *(q - 1) == '\r') + q--; + } + if (p < q) { + *q = '\0'; + char_os *entry = make_relative_path_absolute(p, libroot); + length += strlen_os(entry) + 1; + caml_ext_table_add(&entries, entry); + } + p = r; + } - stdlib = caml_get_stdlib_location(); - ldconfname = caml_stat_strconcat_os(3, stdlib, T("/"), LD_CONF_NAME); - if (stat_os(ldconfname, &st) == -1) { - caml_stat_free(ldconfname); - return NULL; - } - ldconf = open_os(ldconfname, O_RDONLY, 0); - if (ldconf == -1) - caml_fatal_error("cannot read loader config file %s", - caml_stat_strdup_of_os(ldconfname)); - config = caml_stat_alloc(st.st_size + 1); - nread = read(ldconf, config, st.st_size); - if (nread == -1) - caml_fatal_error - ("error while reading loader config file %s", - caml_stat_strdup_of_os(ldconfname)); - config[nread] = 0; - wconfig = caml_stat_strdup_to_os(config); - caml_stat_free(config); - q = wconfig; - for (p = wconfig; *p != 0; p++) { - if (*p == '\n') { - *p = 0; - caml_ext_table_add(&caml_shared_libs_path, q); - q = p + 1; + caml_stat_free(wconfig); + caml_stat_free(libroot); } } - if (q < p) caml_ext_table_add(&caml_shared_libs_path, q); - close(ldconf); - caml_stat_free(ldconfname); - return wconfig; + + /* Now concatenate them all and load the search path */ + char_os *result = caml_stat_alloc(length * sizeof(char_os)); + p = result; + for (int i = 0; i < entries.size; i++) { + char_os *entry = entries.contents[i]; + length = strlen_os(entry) + 1; + memcpy(p, entry, length * sizeof(char_os)); + caml_ext_table_add(table, p); + p += length; + } + caml_ext_table_free(&entries, 1); + + return result; +} + +/* Exposes caml_parse_ld_conf as a primitive for the bytecode compiler, saving + the duplication of the logic with the bytecode compiler. */ +CAMLprim value caml_dynlink_parse_ld_conf(value vstdlib) +{ + CAMLparam1(vstdlib); + CAMLlocal2(list, str); + + char_os *stdlib = caml_stat_strdup_to_os(String_val(vstdlib)); + struct ext_table table; + caml_ext_table_init(&table, 8); + char_os *tofree = caml_parse_ld_conf(stdlib, &table); + caml_stat_free(stdlib); + + list = Val_emptylist; + for (int i = table.size - 1; i >= 0; i--) { + str = caml_copy_string_of_os(table.contents[i]); + list = caml_alloc_2(Tag_cons, str, list); + } + + caml_ext_table_free(&table, 0); + caml_stat_free(tofree); + + CAMLreturn(list); +} + +#ifndef NATIVE_CODE + +char_os * caml_search_dll_in_path(struct ext_table * path, const char_os * name) +{ + char_os * dllname; + char_os * res; + + dllname = caml_stat_strconcat_os(2, name, EXT_DLL); + res = caml_search_in_path(path, dllname); + caml_stat_free(dllname); + return res; } /* Open the given shared library and add it to shared_libs. Abort on error. */ static void open_shared_lib(char_os * name) { - char_os * realname; + char_os * realname, * suffixed = NULL; char * u8; void * handle; + if (*name == '\0') + caml_fatal_error("corrupt DLLS section"); + + if (*name == '-') { + char * suffix = + caml_stat_strconcat(4, "-", HOST, "-", BYTECODE_RUNTIME_ID); + char_os * suffix_os = caml_stat_strdup_to_os(suffix); + name = suffixed = caml_stat_strconcat_os(2, name + 1, suffix_os); + caml_stat_free(suffix_os); + caml_stat_free(suffix); + } else { + name++; + } + realname = caml_search_dll_in_path(&caml_shared_libs_path, name); u8 = caml_stat_strdup_of_os(realname); CAML_GC_MESSAGE(STARTUP, "Loading shared library %s\n", u8); @@ -161,6 +285,7 @@ static void open_shared_lib(char_os * name) caml_dlerror() ); caml_ext_table_add(&shared_libs, handle); + caml_stat_free(suffixed); caml_stat_free(realname); } @@ -175,16 +300,21 @@ void caml_build_primitive_table(char_os * lib_path, - directories specified on the command line with the -I option - directories specified in the CAML_LD_LIBRARY_PATH - directories specified in the executable + - directories specified in OCAMLLIB/ld.conf + - directories specified in CAMLLIB/ld.conf - directories specified in the file /ld.conf caml_shared_libs_path and caml_prim_name_table are not freed afterwards: they may later be used by caml_dynlink_get_bytecode_sections. */ - caml_decompose_path(&caml_shared_libs_path, - caml_secure_getenv(T("CAML_LD_LIBRARY_PATH"))); + shared_libs_buffer1 = + caml_decompose_path(&caml_shared_libs_path, + caml_secure_getenv(T("CAML_LD_LIBRARY_PATH"))); if (lib_path != NULL) for (char_os *p = lib_path; *p != 0; p += strlen_os(p) + 1) caml_ext_table_add(&caml_shared_libs_path, p); - caml_parse_ld_conf(); + shared_libs_buffer2 = + caml_parse_ld_conf(caml_runtime_standard_library_effective, + &caml_shared_libs_path); /* Open the shared libraries */ caml_ext_table_init(&shared_libs, 8); if (libs != NULL) @@ -193,14 +323,18 @@ void caml_build_primitive_table(char_os * lib_path, /* Build the primitive table */ caml_ext_table_init(&caml_prim_table, 0x180); caml_ext_table_init(&caml_prim_name_table, 0x180); - if (req_prims != NULL) + if (req_prims != NULL) { +#ifndef DEBUG + prim_names = req_prims; +#endif for (char *q = req_prims; *q != 0; q += strlen(q) + 1) { c_primitive prim = lookup_primitive(q); if (prim == NULL) caml_fatal_error("unknown C primitive `%s'", q); caml_ext_table_add(&caml_prim_table, (void *) prim); - caml_ext_table_add(&caml_prim_name_table, caml_stat_strdup(q)); + caml_ext_table_add(&caml_prim_name_table, q); } + } } /* Build the table of primitives as a copy of the builtin primitive table. @@ -284,6 +418,13 @@ CAMLprim value caml_dynlink_get_bytecode_sections(value unit) list = caml_alloc_2(Tag_cons, str, list); } Store_field(ret, 2, list); +#ifndef DEBUG + if (caml_prim_name_table.size > 0) { + /* caml_prim_name_table is no longer required */ + caml_ext_table_free(&caml_prim_name_table, 0); + caml_stat_free(prim_names); + } +#endif list = Val_emptylist; for (int i = caml_shared_libs_path.size - 1; i >= 0; i--) { @@ -292,6 +433,13 @@ CAMLprim value caml_dynlink_get_bytecode_sections(value unit) } Store_field(ret, 3, list); + if (caml_shared_libs_path.size > 0) { + /* caml_shared_libs_path is no longer required */ + caml_ext_table_free(&caml_shared_libs_path, 0); + caml_stat_free(shared_libs_buffer1); + caml_stat_free(shared_libs_buffer2); + } + CAMLreturn (ret); } diff --git a/runtime/extern.c b/runtime/extern.c index dcb860957d75..8d14f9ee0fac 100644 --- a/runtime/extern.c +++ b/runtime/extern.c @@ -19,6 +19,7 @@ /* The interface of this file is "caml/intext.h" */ +#include #include #include @@ -111,6 +112,10 @@ struct caml_extern_state { struct caml_output_block * extern_output_first; struct caml_output_block * extern_output_block; + + /* extern_value sets this to true if the value written was 32-bit compatible. + Set regardless of COMPAT_32 (see caml_output_value_with_compat) */ + bool compat_32; }; static void init_extern_stack(struct caml_extern_state* s) @@ -571,6 +576,7 @@ Caml_inline void extern_int(struct caml_extern_state* s, intnat n) writecode16(s, CODE_INT16, n); #ifdef ARCH_SIXTYFOUR } else if (n < -((intnat)1 << 30) || n >= ((intnat)1 << 30)) { + s->compat_32 = false; if (s->extern_flags & COMPAT_32) extern_failwith(s, "output_value: integer cannot be read back on " "32-bit platform"); @@ -609,9 +615,12 @@ Caml_inline void extern_header(struct caml_extern_state* s, } else { header_t hd = Make_header(sz, tag, NOT_MARKABLE); #ifdef ARCH_SIXTYFOUR - if (sz > 0x3FFFFF && (s->extern_flags & COMPAT_32)) - extern_failwith(s, "output_value: array cannot be read back on " - "32-bit platform"); + if (sz > 0x3FFFFF) { + s->compat_32 = false; + if (s->extern_flags & COMPAT_32) + extern_failwith(s, "output_value: array cannot be read back on " + "32-bit platform"); + } if (hd < (uintnat)1 << 32) writecode32(s, CODE_BLOCK32, hd); else @@ -633,9 +642,12 @@ Caml_inline void extern_string(struct caml_extern_state *s, writecode8(s, CODE_STRING8, len); } else { #ifdef ARCH_SIXTYFOUR - if (len > 0xFFFFFB && (s->extern_flags & COMPAT_32)) - extern_failwith(s, "output_value: string cannot be read back on " - "32-bit platform"); + if (len > 0xFFFFFB) { + s->compat_32 = false; + if (s->extern_flags & COMPAT_32) + extern_failwith(s, "output_value: string cannot be read back on " + "32-bit platform"); + } if (len < (uintnat)1 << 32) writecode32(s, CODE_STRING32, len); else @@ -664,9 +676,12 @@ Caml_inline void extern_double_array(struct caml_extern_state* s, writecode8(s, CODE_DOUBLE_ARRAY8_NATIVE, nfloats); } else { #ifdef ARCH_SIXTYFOUR - if (nfloats > 0x1FFFFF && (s->extern_flags & COMPAT_32)) - extern_failwith(s, "output_value: float array cannot be read back on " - "32-bit platform"); + if (nfloats > 0x1FFFFF) { + s->compat_32 = false; + if (s->extern_flags & COMPAT_32) + extern_failwith(s, "output_value: float array cannot be read back on " + "32-bit platform"); + } if (nfloats < (uintnat) 1 << 32) writecode32(s, CODE_DOUBLE_ARRAY32_NATIVE, nfloats); else @@ -927,6 +942,7 @@ static intnat extern_value(struct caml_extern_state* s, value v, value flags, intnat res_len; /* Parse flag list */ s->extern_flags = caml_convert_flag_list(flags, extern_flag_values); + s->compat_32 = true; /* Turn compression off if Zlib missing or if called from caml_output_value_to_block */ #ifdef HAS_ZSTD @@ -950,14 +966,16 @@ static intnat extern_value(struct caml_extern_state* s, value v, value flags, res_len = extern_output_length(s); /* Check lengths if compat32 mode is requested */ #ifdef ARCH_SIXTYFOUR - if (s->extern_flags & COMPAT_32 - && (uncompressed_len >= (uintnat)1 << 32 - || res_len >= (uintnat)1 << 32 - || s->size_32 >= (uintnat)1 << 32 - || s->size_64 >= (uintnat)1 << 32)) { - free_extern_output(s); - caml_failwith("output_value: object too big to be read back on " - "32-bit platform"); + if (uncompressed_len >= (uintnat)1 << 32 + || res_len >= (uintnat)1 << 32 + || s->size_32 >= (uintnat)1 << 32 + || s->size_64 >= (uintnat)1 << 32) { + s->compat_32 = false; + if (s->extern_flags & COMPAT_32) { + free_extern_output(s); + caml_failwith("output_value: object too big to be read back on " + "32-bit platform"); + } } #endif /* Write the header in compressed format */ @@ -979,6 +997,7 @@ static intnat extern_value(struct caml_extern_state* s, value v, value flags, s->size_32 >= ((intnat)1 << 32) || s->size_64 >= ((intnat)1 << 32)) { /* The object is too big for the small header format. Fail if we are in compat32 mode, or use big header. */ + s->compat_32 = false; if (s->extern_flags & COMPAT_32) { free_extern_output(s); caml_failwith("output_value: object too big to be read back on " @@ -1039,6 +1058,13 @@ CAMLprim value caml_output_value(value vchan, value v, value flags) CAMLreturn (Val_unit); } +CAMLprim value caml_output_value_with_compat(value vchan, value v, value flags) +{ + struct caml_extern_state* s = init_extern_state (); + caml_output_value(vchan, v, flags); + return Val_bool(s->compat_32); +} + CAMLprim value caml_output_value_to_bytes(value v, value flags) { char header[MAX_INTEXT_HEADER_SIZE]; @@ -1281,6 +1307,7 @@ CAMLprim value caml_obj_reachable_words(value v) s->obj_counter = 0; s->extern_flags = 0; + s->compat_32 = true; extern_init_position_table(s); sp = s->extern_stack; size = 0; diff --git a/runtime/gc_ctrl.c b/runtime/gc_ctrl.c index 85946e63309e..8adf48679292 100644 --- a/runtime/gc_ctrl.c +++ b/runtime/gc_ctrl.c @@ -372,15 +372,30 @@ CAMLprim value caml_runtime_variant (value unit) #endif } +atomic_bool caml_runtime_randomized = false; + +CAMLprim value caml_runtime_randomize(value vunit) +{ + caml_runtime_randomized = true; + return Val_unit; +} + +CAMLprim value caml_runtime_is_randomized(value vunit) +{ + return Val_bool(caml_runtime_randomized); +} + CAMLprim value caml_runtime_parameters (value unit) { #define F_Z CAML_PRIuNAT #define F_S CAML_PRIuSZT CAMLassert (unit == Val_unit); + /* Parameters in alphabetical order; if an option has both upper/lower then + upper first cf. Compenv.overridden_runtime_parameters */ return caml_alloc_sprintf ("b=%d,c=%"F_Z",e=%"F_Z",l=%"F_Z",M=%"F_Z",m=%"F_Z",n=%"F_Z"," - "o=%"F_Z",p=%d,s=%"F_S",t=%"F_Z",v=%"F_Z",V=%"F_Z",W=%"F_Z"", + "o=%"F_Z",p=%d,R=%u,s=%"F_S",t=%"F_Z",V=%"F_Z",v=%"F_Z",W=%"F_Z"", /* b */ (int) Caml_state->backtrace_active, /* c */ caml_params->cleanup_on_exit, /* e */ caml_params->runtime_events_log_wsize, @@ -390,11 +405,11 @@ CAMLprim value caml_runtime_parameters (value unit) /* n */ caml_custom_minor_max_bsz, /* o */ caml_percent_free, /* p */ Caml_state->parser_trace, - /* R */ /* missing */ + /* R */ caml_runtime_randomized, /* s */ Caml_state->minor_heap_wsz, /* t */ caml_params->trace_level, - /* v */ caml_verb_gc, /* V */ caml_params->verify_heap, + /* v */ caml_verb_gc, /* W */ caml_runtime_warnings ); #undef F_Z diff --git a/runtime/gen_primsc.sh b/runtime/gen_primsc.sh index c18ab95a0987..d02728f16c1d 100755 --- a/runtime/gen_primsc.sh +++ b/runtime/gen_primsc.sh @@ -31,6 +31,8 @@ cat <<'EOF' #define CAML_INTERNALS #include "caml/mlvalues.h" #include "caml/prims.h" +#include "caml/startup.h" +#include "build_config.h" EOF @@ -61,3 +63,14 @@ echo echo 'const char * const caml_names_of_builtin_cprim[] = {' sed -e 's/.*/ "&",/' "$primitives" echo ' 0 };' + +# ocamlrun values for symbols which are provided by the bytecode linker +# - ocamlrun is able to use any of the mechanisms to load the bytecode +# - caml_runtime_standard_library_default for bytecode images on this runtime +# - stub default OCAMLRUNPARAM string +cat <<'EOF' + +enum caml_byte_program_mode caml_byte_program_mode = STANDARD; +const char_os *caml_runtime_standard_library_default = OCAML_STDLIB_DIR; +const char_os *caml_executable_ocamlrunparam = NULL; +EOF diff --git a/runtime/meta.c b/runtime/meta.c index 40cb506f7266..5d6e18c4a9a2 100644 --- a/runtime/meta.c +++ b/runtime/meta.c @@ -204,31 +204,13 @@ CAMLprim value caml_invoke_traced_function(value codeptr, value env, value arg) value caml_get_global_data(value unit) { - caml_invalid_argument("Meta.get_global_data"); + caml_invalid_argument("Symtable.get_global_data"); return Val_unit; /* not reached */ } value caml_realloc_global(value size) { - caml_invalid_argument("Meta.realloc_global"); - return Val_unit; /* not reached */ -} - -value caml_invoke_traced_function(value codeptr, value env, value arg) -{ - caml_invalid_argument("Meta.invoke_traced_function"); - return Val_unit; /* not reached */ -} - -value caml_reify_bytecode(value prog, value len) -{ - caml_invalid_argument("Meta.reify_bytecode"); - return Val_unit; /* not reached */ -} - -value caml_static_release_bytecode(value prog, value len) -{ - caml_invalid_argument("Meta.static_release_bytecode"); + caml_invalid_argument("Symtable.realloc_global"); return Val_unit; /* not reached */ } diff --git a/runtime/runtime_events.c b/runtime/runtime_events.c index 5b9578b7589a..67ee95c1e5e9 100644 --- a/runtime/runtime_events.c +++ b/runtime/runtime_events.c @@ -138,17 +138,19 @@ void caml_runtime_events_init(void) { runtime_events_path = caml_secure_getenv(T("OCAML_RUNTIME_EVENTS_DIR")); - if (runtime_events_path) { + if (runtime_events_path && *runtime_events_path != '\0') { /* caml_secure_getenv's return shouldn't be cached */ runtime_events_path = caml_stat_strdup_os(runtime_events_path); } ring_size_words = 1 << caml_params->runtime_events_log_wsize; + char_os *value = caml_secure_getenv(T("OCAML_RUNTIME_EVENTS_PRESERVE")); preserve_ring = - caml_secure_getenv(T("OCAML_RUNTIME_EVENTS_PRESERVE")) ? 1 : 0; + (value && *value != '\0') ? 1 : 0; - if (caml_secure_getenv(T("OCAML_RUNTIME_EVENTS_START"))) { + value = caml_secure_getenv(T("OCAML_RUNTIME_EVENTS_START")); + if (value && *value != '\0') { runtime_events_create_from_stw_single(); /* stw_single: mutators and domains have not started yet. */ } diff --git a/runtime/startup_aux.c b/runtime/startup_aux.c index 70f13ed94bc7..21535de8ab12 100644 --- a/runtime/startup_aux.c +++ b/runtime/startup_aux.c @@ -27,12 +27,14 @@ #ifndef NATIVE_CODE #include "caml/dynlink.h" #endif +#include "caml/gc_ctrl.h" #include "caml/gc_stats.h" #include "caml/osdeps.h" #include "caml/shared_heap.h" #include "caml/startup_aux.h" #include "caml/prims.h" #include "caml/signals.h" +#include "caml/platform.h" #ifdef _WIN32 extern void caml_win32_unregister_overflow_detection (void); @@ -62,19 +64,20 @@ static void init_startup_params(void) atomic_store_relaxed(&caml_verb_gc, CAML_GC_MSG_VERBOSE | CAML_GC_MSG_MINOR); #endif #ifndef NATIVE_CODE + /* TODO #4703 The .cds file should be determined from exe_name */ cds_file = caml_secure_getenv(T("CAML_DEBUG_FILE")); - if (cds_file != NULL) { - params.cds_file = caml_stat_strdup_os(cds_file); + /* Ignore CAML_DEBUG_FILE if it's "Set But Null" */ + if (cds_file != NULL && *cds_file != '\0') { + /* Largely by historical accident, resolve CAML_DEBUG_FILE in PATH */ + params.cds_file = caml_search_exe_in_path(cds_file); } #endif params.trace_level = 0; params.cleanup_on_exit = 0; - params.print_magic = 0; - params.print_config = 0; params.event_trace = 0; } -static void scanmult (char_os *opt, uintnat *var) +static void scanmult (const char_os *opt, uintnat *var) { char_os mult = ' '; unsigned int val = 1; @@ -88,14 +91,10 @@ static void scanmult (char_os *opt, uintnat *var) } } -void caml_parse_ocamlrunparam(void) +/* To keep in sync with Compenv.parse_runtime_parameter */ +static void parse_ocamlrunparam(const char_os *opt) { - init_startup_params(); uintnat val; - - char_os *opt = caml_secure_getenv (T("OCAMLRUNPARAM")); - if (opt == NULL) opt = caml_secure_getenv (T("CAMLRUNPARAM")); - if (opt != NULL){ while (*opt != '\0'){ switch (*opt++){ @@ -109,7 +108,10 @@ void caml_parse_ocamlrunparam(void) case 'n': scanmult (opt, ¶ms.init_custom_minor_max_bsz); break; case 'o': scanmult (opt, ¶ms.init_percent_free); break; case 'p': scanmult (opt, ¶ms.parser_trace); break; - case 'R': break; /* see stdlib/hashtbl.mli */ + case 'R': + scanmult (opt, &val); + caml_runtime_randomized = !!val; + break; case 's': scanmult (opt, ¶ms.init_minor_heap_wsz); break; case 't': scanmult (opt, ¶ms.trace_level); break; case 'v': @@ -136,6 +138,19 @@ void caml_parse_ocamlrunparam(void) } } +void caml_parse_ocamlrunparam(void) +{ + init_startup_params(); + + /* Update any of this runtime's default parameter values with the defaults + specified in the executable/image */ + parse_ocamlrunparam(caml_executable_ocamlrunparam); + + /* Now parse OCAMLRUNPARAM/CAMLRUNPARAM for values specified by the user */ + const char_os *opt = caml_secure_getenv (T("OCAMLRUNPARAM")); + if (opt == NULL || *opt == '\0') opt = caml_secure_getenv (T("CAMLRUNPARAM")); + parse_ocamlrunparam(opt); +} /* The number of outstanding calls to caml_startup */ static int startup_count = 0; diff --git a/runtime/startup_byt.c b/runtime/startup_byt.c index 0483fb36240b..3c2847434866 100644 --- a/runtime/startup_byt.c +++ b/runtime/startup_byt.c @@ -22,11 +22,13 @@ #include #include #include +#include #include "caml/config.h" #ifndef _WIN32 #include #endif #ifdef _WIN32 +#include #include #include #endif @@ -72,10 +74,15 @@ #define SEEK_END 2 #endif +const char_os * caml_runtime_standard_library_effective = NULL; + +static bool print_magic = false; +static bool print_config = false; + static char magicstr[EXEC_MAGIC_LENGTH+1]; /* Print the specified error message followed by an end-of-line and exit */ -static void error(const char *msg, ...) +CAMLnoret static void error(const char *msg, ...) { va_list ap; va_start(ap, msg); @@ -104,7 +111,7 @@ int caml_read_trailer(int fd, struct exec_trailer *trail) memcpy(magicstr, trail->magic, EXEC_MAGIC_LENGTH); magicstr[EXEC_MAGIC_LENGTH] = 0; - if (caml_params->print_magic) { + if (print_magic) { printf("%s\n", magicstr); exit(0); } @@ -113,46 +120,41 @@ int caml_read_trailer(int fd, struct exec_trailer *trail) ? 0 : WRONG_MAGIC; } -enum caml_byte_program_mode caml_byte_program_mode = STANDARD; - -int caml_attempt_open(char_os **name, struct exec_trailer *trail, +int caml_attempt_open(const char_os *name, struct exec_trailer *trail, int do_open_script) { - char_os * truename; int fd; int err; - char buf [2], * u8; + char *u8; - truename = caml_search_exe_in_path(*name); - u8 = caml_stat_strdup_of_os(truename); + u8 = caml_stat_strdup_of_os(name); CAML_GC_MESSAGE(STARTUP, "Opening bytecode executable %s\n", u8); caml_stat_free(u8); - fd = open_os(truename, O_RDONLY | O_BINARY); + fd = open_os(name, O_RDONLY | O_BINARY); if (fd == -1) { - caml_stat_free(truename); CAML_GC_MESSAGE(STARTUP, "Cannot open file\n"); if (errno == EMFILE) return NO_FDS; else return FILE_NOT_FOUND; } +#ifndef _WIN32 + char buf[2]; if (!do_open_script) { err = read (fd, buf, 2); - if (err < 2 || (buf [0] == '#' && buf [1] == '!')) { + if (err < 2 || (buf[0] == '#' && buf[1] == '!')) { close(fd); - caml_stat_free(truename); CAML_GC_MESSAGE(STARTUP, "Rejected #! script\n"); return BAD_BYTECODE; } } +#endif err = caml_read_trailer(fd, trail); if (err != 0) { close(fd); - caml_stat_free(truename); CAML_GC_MESSAGE(STARTUP, "Not a bytecode executable\n"); return err; } - *name = truename; return fd; } @@ -300,12 +302,12 @@ static void do_print_help(void) /* Parse options on the command line */ -static int parse_command_line(char_os **argv) +static int parse_command_line(char_os **argv, + uintnat *trace_level, + uintnat *backtrace_enabled, + uintnat *event_trace) { int i, len, parsed; - /* cast to make caml_params mutable; this assumes we are only called - by one thread at startup */ - struct caml_params* params = (struct caml_params*)caml_params; for(i = 1; argv[i] != NULL && argv[i][0] == '-'; i++) { len = strlen_os(argv[i]); @@ -317,7 +319,7 @@ static int parse_command_line(char_os **argv) return i + 1; break; case 't': - params->trace_level += 1; /* ignored unless DEBUG mode */ + *trace_level += 1; /* ignored unless DEBUG mode */ break; case 'v': atomic_store_relaxed(&caml_verb_gc, CAML_GC_MSG_VERBOSE); @@ -328,7 +330,7 @@ static int parse_command_line(char_os **argv) exit(0); break; case 'b': - params->backtrace_enabled = 1; + *backtrace_enabled = 1; break; case 'I': if (argv[i + 1] != NULL) { @@ -339,7 +341,7 @@ static int parse_command_line(char_os **argv) } break; case 'm': - params->print_magic = 1; + print_magic = true; break; case 'M': printf("%s\n", EXEC_MAGIC); @@ -357,13 +359,13 @@ static int parse_command_line(char_os **argv) printf("%s\n", OCAML_VERSION_STRING); exit(0); } else if (!strcmp_os(argv[i], T("-events"))) { - params->event_trace = 1; /* Ignored unless DEBUG mode */ + *event_trace = 1; /* Ignored unless DEBUG mode */ } else if (!strcmp_os(argv[i], T("-help")) || !strcmp_os(argv[i], T("--help"))) { do_print_help(); exit(0); } else if (!strcmp_os(argv[i], T("-config"))) { - params->print_config = 1; + print_config = true; } else { parsed = 0; } @@ -376,22 +378,30 @@ static int parse_command_line(char_os **argv) return i; } +static const char_os * get_stdlib_location(void) +{ + const char_os * stdlib; + stdlib = caml_secure_getenv(T("OCAMLLIB")); + if (stdlib == NULL) stdlib = caml_secure_getenv(T("CAMLLIB")); + if (stdlib == NULL) stdlib = caml_runtime_standard_library_effective; + return stdlib; +} + /* Print the configuration of the runtime to stdout; memory allocated is not freed, since the runtime will terminate after calling this. */ static void do_print_config(void) { - const char_os * dir; - /* Print the runtime configuration */ printf("version: %s\n", OCAML_VERSION_STRING); printf("standard_library_default: %s\n", - caml_stat_strdup_of_os(OCAML_STDLIB_DIR)); + caml_stat_strdup_of_os(caml_runtime_standard_library_default)); printf("standard_library: %s\n", - caml_stat_strdup_of_os(caml_get_stdlib_location())); + caml_stat_strdup_of_os(get_stdlib_location())); printf("int_size: %d\n", 8 * (int)sizeof(value)); printf("word_size: %d\n", 8 * (int)sizeof(value) - 1); printf("os_type: %s\n", OCAML_OS_TYPE); printf("host: %s\n", HOST); + printf("bytecode_runtime_id: %s\n", BYTECODE_RUNTIME_ID); printf("flat_float_array: %s\n", #ifdef FLAT_FLOAT_ARRAY "true"); @@ -424,18 +434,10 @@ static void do_print_config(void) puts("shared_libs_path:"); caml_decompose_path(&caml_shared_libs_path, caml_secure_getenv(T("CAML_LD_LIBRARY_PATH"))); - caml_parse_ld_conf(); - for (int i = 0; i < caml_shared_libs_path.size; i++) { - dir = caml_shared_libs_path.contents[i]; - if (dir[0] == 0) -#ifdef _WIN32 - /* See caml_search_in_path in win32.c */ - continue; -#else - dir = "."; -#endif - printf(" %s\n", caml_stat_strdup_of_os(dir)); - } + caml_parse_ld_conf(caml_runtime_standard_library_effective, + &caml_shared_libs_path); + for (int i = 0; i < caml_shared_libs_path.size; i++) + printf(" %s\n", caml_stat_strdup_of_os(caml_shared_libs_path.contents[i])); } #ifdef _WIN32 @@ -453,83 +455,276 @@ extern void caml_install_invalid_parameter_handler(void); CAMLexport void caml_main(char_os **argv) { - int fd, pos; + int fd = -1, pos; struct exec_trailer trail; struct channel * chan; value res; char * req_prims; char_os * shared_lib_path, * shared_libs; - char_os * exe_name, * proc_self_exe; + char_os * exe_name = NULL, * proc_self_exe, * argv0, * tofree = NULL; + /* Only one thread at startup - caml_params won't be mutated once the VM + starts */ + struct caml_params* params = (struct caml_params*)caml_params; + uintnat trace_level = 0, backtrace_enabled = 0, event_trace = 0; - /* Determine options */ +#ifdef _MSC_VER + caml_install_invalid_parameter_handler(); +#endif + + /* Parse OCAMLRUNPARAM - for -custom, -output-obj, etc. this will take + caml_executable_ocamlrunparam into account, but for tendered bytecode + images (or for explicit invocation as ocamlrun ./foo.byte) the ORUN section + has not yet been read. The only relevant setting between here and ORUN + being read is c=1 (pooling). If ORUN includes c=1 and OCAMLRUNPARAM does + not include c=0, then a brief memory dance is done to re-initialise the + runtime in pooling mode. */ caml_parse_ocamlrunparam(); if (!caml_startup_aux(/* pooling */ caml_params->cleanup_on_exit)) return; - caml_init_codefrag(); - - caml_init_locale(); -#ifdef _MSC_VER - caml_install_invalid_parameter_handler(); -#endif - caml_init_custom_operations(); - caml_init_os_params(); - caml_ext_table_init(&caml_shared_libs_path, 8); - /* Determine position of bytecode file */ pos = 0; - /* First, try argv[0] (when ocamlrun is called by a bytecode program) */ - exe_name = argv[0]; - fd = caml_attempt_open(&exe_name, &trail, 0); + argv0 = proc_self_exe = caml_executable_name(); - proc_self_exe = caml_executable_name(); + /* caml_shared_libs_path is used by parse_command_line */ + caml_ext_table_init(&caml_shared_libs_path, 8); - /* Little grasshopper wonders why we do that at all, since - "The current executable is ocamlrun itself, it's never a bytecode - program". Little grasshopper "ocamlc -custom" in mind should keep. - With -custom, we have an executable that is ocamlrun itself - concatenated with the bytecode. So, if the attempt with argv[0] - failed, it is worth trying again with executable_name. */ - if (fd < 0 && proc_self_exe != NULL) { - exe_name = proc_self_exe; - fd = caml_attempt_open(&exe_name, &trail, 0); - } + char_os *str_fd = NULL; + /* -custom executables do not inspect __OCAML_EXEC_FD */ + if (caml_byte_program_mode != APPENDED) + str_fd = caml_secure_getenv(T("__OCAML_EXEC_FD")); - if (fd < 0) { - pos = parse_command_line(argv); - if (caml_params->print_config) { - do_print_config(); - exit(0); + if (str_fd != NULL) { +#ifdef _WIN32 + /* On Windows, __OCAML_EXEC_FD must be exactly one wchar_t and the scalar + value of that character is the fd number */ + if (wcslen(str_fd) != 1) + error("descriptor passed via environment is invalid"); + else + fd = (int)str_fd[0]; + + DWORD len = + GetFinalPathNameByHandle((HANDLE)_get_osfhandle(fd), + NULL, 0, VOLUME_NAME_DOS); + if (len > 0) { + exe_name = caml_stat_alloc((len + 1) * sizeof(wchar_t)); + if (GetFinalPathNameByHandle((HANDLE)_get_osfhandle(fd), + exe_name, len, VOLUME_NAME_DOS) != 0) { + CAMLassert(len > 4 && exe_name[0] == '\\' && exe_name[1] == '\\' + && exe_name[2] == '?' && exe_name[3] == '\\'); + wchar_t *p, *w; + /* GetFinalPathNameByHandle always returns a string beginning \\?\ and + returns \\?\UNC\ for a UNC path. Setup p and w to copy the string + back either 4 characters (for this first case) or 6 characters (for + the second) so that \\?\C:\Foo becomes C:\Foo and \\?\UNC\Server\Foo + becomes \\Server\Foo */ + if (len >= 8 && exe_name[4] == 'U' && exe_name[5] == 'N' + && exe_name[6] == 'C' && exe_name[7] == '\\') { + p = exe_name + 8; + w = exe_name + 2; + } else { + p = exe_name + 4; + w = exe_name; + } + while ((*w++ = *p++)); + } else { + error("descriptor passed via environment is invalid"); + } + } else { + error("descriptor passed via environment is invalid"); } - if (argv[pos] == 0) { - error("no bytecode file specified"); +#else + int offset; + if (sscanf_os(str_fd, T("%u,%n"), &fd, &offset) <= 0) + error("descriptor passed via environment is invalid"); + exe_name = caml_stat_strdup(str_fd + offset); +#endif + int err = caml_read_trailer(fd, &trail); + if (err != 0) { + close(fd); + CAML_GC_MESSAGE(STARTUP, "Descriptor is not a bytecode image\n"); + /* Termination code shared with normal startup route */ + fd = err; + } else { +#if defined(_WIN32) + _wputenv(L"__OCAML_EXEC_FD="); +#elif defined(HAS_SETENV_UNSETENV) + unsetenv("__OCAML_EXEC_FD"); +#endif } - exe_name = argv[pos]; - fd = caml_attempt_open(&exe_name, &trail, 1); - switch(fd) { - case FILE_NOT_FOUND: - error("cannot find file '%s'", - caml_stat_strdup_of_os(argv[pos])); - break; - case BAD_BYTECODE: - error( - "the file '%s' is not a bytecode executable file", - caml_stat_strdup_of_os(exe_name)); - break; - case WRONG_MAGIC: - error( - "the file '%s' has not the right magic number: "\ - "expected %s, got %s", - caml_stat_strdup_of_os(exe_name), - EXEC_MAGIC, - magicstr); - break; + if (proc_self_exe == NULL) + argv0 = exe_name; + } else { + if (caml_byte_program_mode != APPENDED || proc_self_exe == NULL) { + /* First, try argv[0] (when ocamlrun is called by a bytecode program) */ + exe_name = caml_search_exe_in_path(argv[0]); + fd = caml_attempt_open(exe_name, &trail, 0); + if (proc_self_exe == NULL) { + argv0 = exe_name; + if (fd < 0) + tofree = exe_name; + } else if (fd < 0) { + caml_stat_free(exe_name); + } + } + + /* Little grasshopper wonders why we do that at all, since + "The current executable is ocamlrun itself, it's never a bytecode + program". Little grasshopper "ocamlc -custom" in mind should keep. + With -custom, we have an executable that is ocamlrun itself + concatenated with the bytecode. So, if the attempt with argv[0] + failed, it is worth trying again with executable_name. */ + if (caml_byte_program_mode == APPENDED || fd < 0) { + if (proc_self_exe != NULL) { + exe_name = proc_self_exe; + fd = caml_attempt_open(exe_name, &trail, 0); + } + if (fd < 0 && caml_byte_program_mode == APPENDED) + error("unable to open file '%s'", caml_stat_strdup_of_os(exe_name)); + } + + if (fd < 0) { + pos = + parse_command_line(argv, + &trace_level, &backtrace_enabled, &event_trace); + if (print_config) { + caml_runtime_standard_library_effective = + caml_locate_standard_library(argv0, + caml_runtime_standard_library_default, + NULL); + + do_print_config(); + exit(0); + } + if (argv[pos] == 0) { + error("no bytecode file specified"); + } + exe_name = caml_search_exe_in_path(argv[pos]); + fd = caml_attempt_open(exe_name, &trail, 1); } } + + params->trace_level += trace_level; + if (backtrace_enabled) + params->backtrace_enabled = 1; + if (event_trace) + params->event_trace = 1; + switch(fd) { + case FILE_NOT_FOUND: + error("cannot find file '%s'", + caml_stat_strdup_of_os(exe_name)); + break; + case BAD_BYTECODE: + error( + "the file '%s' is not a bytecode executable file", + caml_stat_strdup_of_os(exe_name)); + break; + case WRONG_MAGIC: + error( + "the file '%s' has not the right magic number: "\ + "expected %s, got %s", + caml_stat_strdup_of_os(exe_name), + EXEC_MAGIC, + magicstr); + break; + } + /* Read the table of contents (section descriptors) */ caml_read_section_descriptors(fd, &trail); + + /* If caml_executable_ocamlrunparam was set, don't also process ORUN */ + if (!caml_executable_ocamlrunparam) { + /* Load the embedded runtime parameters */ + char_os *orun = read_section_to_os(fd, &trail, "ORUN"); + /* Re-parse options, taking these defaults into account (see note when + caml_parse-ocamlrunparam was previously called in this function) */ + if (orun != NULL) { + int pooling = caml_params->cleanup_on_exit; + caml_executable_ocamlrunparam = orun; + caml_parse_ocamlrunparam(); + + /* caml_parse_ocamlrunparam resets the params fields: re-apply the three + which are affected by command-line parsing. */ + params->trace_level += trace_level; + if (backtrace_enabled) + params->backtrace_enabled = 1; + if (event_trace) + params->event_trace = 1; + + /* c=1 was specified in ORUN, but c not included in OCAMLRUNPARAM */ + if (caml_params->cleanup_on_exit && !pooling) { + /* In order to re-start with pooling, everything which has been + allocated with caml_stat_alloc (i.e. malloc) must be passed to + caml_stat_free (i.e. free) and then reallocated */ + char_os *old_proc_self_exe = NULL; + char_os *old_exe_name = strdup_os(exe_name); + if (proc_self_exe) + old_proc_self_exe = strdup_os(proc_self_exe); + int search_path_size = caml_shared_libs_path.size; + char_os **search_path = + (char_os **)malloc(sizeof(char_os *) * search_path_size); + if (search_path) + memcpy(search_path, caml_shared_libs_path.contents, + sizeof(char_os *) * search_path_size); + else + search_path_size = 0; + + /* caml_stat_free everything which is currently allocated */ + caml_stat_free(orun); + caml_stat_free(proc_self_exe); + caml_stat_free(exe_name); + caml_stat_free(trail.section); + caml_ext_table_free(&caml_shared_libs_path, 0); + + /* Enable pooling */ + caml_stat_create_pool(); + + /* Re-initialise state with pooled memory */ + if (old_proc_self_exe) { + proc_self_exe = caml_stat_strdup_os(old_proc_self_exe); + free(old_proc_self_exe); + } + exe_name = caml_stat_strdup_os(old_exe_name); + free(old_exe_name); + + /* Re-read the table of contents (section descriptors) */ + caml_read_section_descriptors(fd, &trail); + caml_executable_ocamlrunparam = read_section_to_os(fd, &trail, "ORUN"); + + /* Re-initialise caml_shared_libs_path */ + caml_ext_table_init(&caml_shared_libs_path, 8); + for (int i = 0; i < search_path_size; i++) { + caml_ext_table_add(&caml_shared_libs_path, search_path[i]); + } + free(search_path); + } + } + } + + caml_runtime_standard_library_effective = + caml_locate_standard_library(argv0, + caml_runtime_standard_library_default, NULL); + caml_stat_free(tofree); + + /* Load the embedded overridden caml_standard_library_default value, if one is + available. This value is set _after_ caml_standard_library_effective has + been set via caml_locate_standard_library, because ocamlrun must use the + value it was configured with. For -custom executables, the value is the + same (albeit irrelevantly) - they specify caml_standard_library_default via + the primitives object, rather than via the OSLD section. */ + char_os *image_standard_library_default = + read_section_to_os(fd, &trail, "OSLD"); + if (image_standard_library_default != NULL) + caml_standard_library_default = image_standard_library_default; + + caml_init_codefrag(); + + caml_init_locale(); + caml_init_custom_operations(); + caml_init_os_params(); + /* Initialize the abstract machine */ caml_init_gc (); @@ -550,12 +745,19 @@ CAMLexport void caml_main(char_os **argv) /* Build the table of primitives */ shared_lib_path = read_section_to_os(fd, &trail, "DLPT"); shared_libs = read_section_to_os(fd, &trail, "DLLS"); +#ifndef SUPPORT_DYNAMIC_LINKING + if (shared_libs != NULL) + error( + "the file '%s' requires shared libraries to be loaded, which this " + "runtime does not support", + caml_stat_strdup_of_os(exe_name)); +#endif req_prims = read_section(fd, &trail, "PRIM"); if (req_prims == NULL) caml_fatal_error("no PRIM section"); caml_build_primitive_table(shared_lib_path, shared_libs, req_prims); + /* caml_build_primitive_table is responsible for freeing req_prims */ caml_stat_free(shared_lib_path); caml_stat_free(shared_libs); - caml_stat_free(req_prims); /* Load the globals */ caml_seek_section(fd, &trail, "DATA"); chan = caml_open_descriptor_in(fd); @@ -570,7 +772,7 @@ CAMLexport void caml_main(char_os **argv) /* ensure all globals are in major heap */ caml_minor_collection(); #ifdef _WIN32 - /* Start a thread to handle signals */ + /* Start a thread to handle signals - used by ocamlbrowser */ if (caml_secure_getenv(T("CAMLSIGPIPE"))) _beginthread(caml_signal_thread, 4096, NULL); #endif @@ -601,6 +803,10 @@ CAMLexport value caml_startup_code_exn( char_os * exe_name, * proc_self_exe; value res; +#ifdef _MSC_VER + caml_install_invalid_parameter_handler(); +#endif + /* Determine options */ caml_parse_ocamlrunparam(); @@ -612,9 +818,6 @@ CAMLexport value caml_startup_code_exn( caml_init_codefrag(); caml_init_locale(); -#ifdef _MSC_VER - caml_install_invalid_parameter_handler(); -#endif caml_init_custom_operations(); caml_init_os_params(); caml_ext_table_init(&caml_shared_libs_path, 8); @@ -631,6 +834,10 @@ CAMLexport value caml_startup_code_exn( else exe_name = proc_self_exe; + caml_runtime_standard_library_effective = + caml_locate_standard_library(exe_name, + caml_runtime_standard_library_default, NULL); + Caml_state->external_raise = NULL; /* Setup signal handling */ caml_init_signals(); diff --git a/runtime/sys.c b/runtime/sys.c index 99f825e9b2b2..b7545567422a 100644 --- a/runtime/sys.c +++ b/runtime/sys.c @@ -736,6 +736,47 @@ CAMLprim value caml_sys_const_backend_type(value unit) { return Val_int(1); /* Bytecode backed */ } + +/* The native code linker doesn't synthesise calls to this primitive, instead + putting the required string statically in caml_standard_library_nat if any of + the compilation units use %standard_library_default. The primitive is omitted + completely in libasmrun as there are no other existing instances in the + native runtime where OCAML_STDLIB_DIR ends up being embedded. */ +#ifndef NATIVE_CODE +/* If this remains unset than caml_runtime_standard_library_default is used */ +char_os *caml_standard_library_default = NULL; + +CAMLprim value caml_sys_const_standard_library_default(value unit) +{ + return caml_copy_string_of_os( + caml_standard_library_default ? caml_standard_library_default + : caml_runtime_standard_library_default); +} +#endif + +CAMLprim value caml_sys_get_stdlib_dirs(value vstdlib_default) +{ + CAMLparam1(vstdlib_default); + CAMLlocal3(result, eff, root_dir); + + char_os *stdlib_default = caml_stat_strdup_to_os(String_val(vstdlib_default)); + char_os *root = NULL, *stdlib; + + stdlib = + caml_locate_standard_library(caml_params->exe_name, stdlib_default, &root); + + eff = caml_copy_string_of_os(stdlib); + if (root == NULL) { + root_dir = Val_none; + } else { + root_dir = caml_copy_string_of_os(root); + root_dir = caml_alloc_some(root_dir); + } + result = caml_alloc_2(0, eff, root_dir); + + CAMLreturn(result); +} + CAMLprim value caml_sys_get_config(value unit) { CAMLparam0 (); /* unit is unused */ diff --git a/runtime/unix.c b/runtime/unix.c index e58c77cf5170..1dbbddeb44c4 100644 --- a/runtime/unix.c +++ b/runtime/unix.c @@ -54,8 +54,8 @@ #else #include #endif -#ifdef __APPLE__ -#include +#ifdef HAS_LIBGEN_H +#include #endif #ifdef HAS_SYS_MMAN_H #include @@ -122,9 +122,15 @@ caml_stat_string caml_decompose_path(struct ext_table * tbl, char * path) if (path == NULL) return NULL; p = caml_stat_strdup(path); q = p; + while (1) { + /* Skip any prefixing colons */ + while (*q == ':') + q++; + /* Find the end of this entry */ for (n = 0; q[n] != 0 && q[n] != ':'; n++) /*nothing*/; - caml_ext_table_add(tbl, q); + if (n > 0) + caml_ext_table_add(tbl, q); q = q + n; if (*q == 0) break; *q = 0; @@ -154,81 +160,29 @@ caml_stat_string caml_search_in_path(struct ext_table * path, const char * name) return caml_stat_strdup(name); } -#ifdef __CYGWIN__ - -/* Cygwin needs special treatment because of the implicit ".exe" at the - end of executable file names */ - -static int cygwin_file_exists(const char * name) -{ - int fd, ret; - struct stat st; - /* Cannot use stat() here because it adds ".exe" implicitly */ - fd = open(name, O_RDONLY); - if (fd == -1) return 0; - ret = fstat(fd, &st); - close(fd); - return ret == 0 && S_ISREG(st.st_mode); -} - -static caml_stat_string cygwin_search_exe_in_path(struct ext_table * path, - const char * name) -{ - char * dir, * fullname; - for (const char *p = name; *p != 0; p++) { - if (*p == '/' || *p == '\\') goto not_found; - } - for (int i = 0; i < path->size; i++) { - dir = path->contents[i]; - if (dir[0] == 0) dir = "."; /* empty path component = current dir */ - fullname = caml_stat_strconcat(3, dir, "/", name); - if (cygwin_file_exists(fullname)) return fullname; - caml_stat_free(fullname); - fullname = caml_stat_strconcat(4, dir, "/", name, ".exe"); - if (cygwin_file_exists(fullname)) return fullname; - caml_stat_free(fullname); - } - not_found: - if (cygwin_file_exists(name)) return caml_stat_strdup(name); - fullname = caml_stat_strconcat(2, name, ".exe"); - if (cygwin_file_exists(fullname)) return fullname; - caml_stat_free(fullname); - return caml_stat_strdup(name); -} - -#endif - caml_stat_string caml_search_exe_in_path(const char * name) { - struct ext_table path; - char * tofree; - caml_stat_string res; - - caml_ext_table_init(&path, 8); - tofree = caml_decompose_path(&path, getenv("PATH")); -#ifndef __CYGWIN__ - res = caml_search_in_path(&path, name); -#else - res = cygwin_search_exe_in_path(&path, name); -#endif - caml_stat_free(tofree); - caml_ext_table_free(&path, 0); + /* caml_search_in_system_path treats PATH being "Set But Null" (i.e. equal to + "") as being equivalent to being set to "." */ + caml_stat_string res = caml_search_in_system_path(name); + if (res == NULL) + res = caml_stat_strdup(name); return res; } -caml_stat_string caml_search_dll_in_path(struct ext_table * path, - const char * name) +/* Primitive is defined here rather than sys.c as otherwise sys.c would need + duplicating for libcamlrun_non_shared */ +CAMLprim value caml_sys_const_shared_libraries(value unit) { - caml_stat_string dllname; - caml_stat_string res; - - dllname = caml_stat_strconcat(2, name, ".so"); - res = caml_search_in_path(path, dllname); - caml_stat_free(dllname); - return res; +#ifdef WITH_DYNAMIC_LINKING + return Val_true; +#else + return Val_false; +#endif } #ifdef WITH_DYNAMIC_LINKING + #ifdef __CYGWIN__ /* Use flexdll */ @@ -352,57 +306,6 @@ CAMLexport int caml_read_directory(char * dirname, struct ext_table * contents) return 0; } -/* Recover executable name from /proc/self/exe if possible */ - -char * caml_executable_name(void) -{ -#if defined(__linux__) - int namelen, retcode; - char * name; - struct stat st; - - /* lstat("/proc/self/exe") returns st_size == 0 so we cannot use it - to determine the size of the buffer. Instead, we guess and adjust. */ - namelen = 256; - while (1) { - name = caml_stat_alloc(namelen); - retcode = readlink("/proc/self/exe", name, namelen); - if (retcode == -1) { caml_stat_free(name); return NULL; } - if (retcode < namelen) break; - caml_stat_free(name); - if (namelen >= 1024*1024) return NULL; /* avoid runaway and overflow */ - namelen *= 2; - } - /* readlink() does not zero-terminate its result. - There is room for a final zero since retcode < namelen. */ - name[retcode] = 0; - /* Make sure that the contents of /proc/self/exe is a regular file. - (Old Linux kernels return an inode number instead.) */ - if (stat(name, &st) == -1 || ! S_ISREG(st.st_mode)) { - caml_stat_free(name); return NULL; - } - return name; - -#elif defined(__APPLE__) - unsigned int namelen; - char * name; - - namelen = 256; - name = caml_stat_alloc(namelen); - if (_NSGetExecutablePath(name, &namelen) == 0) return name; - caml_stat_free(name); - /* Buffer is too small, but namelen now contains the size needed */ - name = caml_stat_alloc(namelen); - if (_NSGetExecutablePath(name, &namelen) == 0) return name; - caml_stat_free(name); - return NULL; - -#else - return NULL; - -#endif -} - char *caml_secure_getenv (char const *var) { #ifdef HAS_SECURE_GETENV @@ -542,3 +445,70 @@ void caml_plat_mem_unmap(void* mem, uintnat size) if (munmap(mem, size) != 0) CAMLassert(0); } + +static char * caml_dirname (const char * path) +{ +#ifdef HAS_LIBGEN_H + char *dir, *res; + dir = caml_stat_strdup(path); + res = caml_stat_strdup(dirname(dir)); + caml_stat_free(dir); + return res; +#else + /* See Filename.generic_dirname */ + size_t n = strlen(path) - 1; + char *res; + if (n < 0) /* path is "" */ + return caml_stat_strdup("."); + while (n >= 0 && path[n] == '/') + n--; + if (n < 0) /* path is entirely slashes */ + return caml_stat_strdup("/"); + while (n >= 0 && path[n] != '/') + n--; + if (n < 0) /* path is relative */ + return caml_stat_strdup("."); + while (n >= 0 && path[n] == '/') + n--; + if (n < 0) /* path is a file at root */ + return caml_stat_strdup("/"); + /* n is the _index_ of the last character of the dirname */ + res = caml_stat_alloc(n + 2); + memcpy(res, path, n + 1); + res[n + 1] = 0; + return res; +#endif +} + +CAMLextern char_os* caml_locate_standard_library (const char *exe_name, + const char *stdlib_default, + char **dirname) +{ + if (Is_relative_dir(stdlib_default)) { + char * root = caml_dirname(exe_name); + char * candidate = + caml_stat_strconcat(3, root, CAML_DIR_SEP, stdlib_default); + /* In practice, a system which can be configured --with-relative-libdir will + also have realpath. The directory is normalised here for consistency with + the behaviour on Windows, which doesn't have a direct equivalent of + dirname and performs the equivalent of realpath as a side-effect of + determining the root path. */ +#ifdef HAS_REALPATH + char * resolved_candidate = realpath(candidate, NULL); + /* If realpath fails, use the non-normalised path for error messages. */ + if (resolved_candidate != NULL) { + caml_stat_free(candidate); + /* caml_realpath uses malloc */ + candidate = caml_stat_strdup(resolved_candidate); + free(resolved_candidate); + } +#endif + if (dirname == NULL) + caml_stat_free(root); + else + *dirname = root; + return candidate; + } else { + return caml_stat_strdup(stdlib_default); + } +} diff --git a/runtime/unix_executable.c b/runtime/unix_executable.c new file mode 100644 index 000000000000..567bf3b66d76 --- /dev/null +++ b/runtime/unix_executable.c @@ -0,0 +1,109 @@ +/**************************************************************************/ +/* */ +/* OCaml */ +/* */ +/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ +/* */ +/* Copyright 1998 Institut National de Recherche en Informatique et */ +/* en Automatique. */ +/* */ +/* All rights reserved. This file is distributed under the terms of */ +/* the GNU Lesser General Public License version 2.1, with the */ +/* special exception on linking described in the file LICENSE. */ +/* */ +/**************************************************************************/ + +/* caml_search_in_system_path and caml_executable_name live here as they're + shared with ../stdlib/header.c */ + +#define CAML_INTERNALS +#include "caml/memory.h" + +#ifdef __APPLE__ +#include +#endif + +#include +#include +#include + +caml_stat_string caml_search_in_system_path(const char * name) +{ + char * fullname; + char * path; + struct stat st; + size_t len = 0; + + for (char *p = (char *)name, len = 0; *p != 0; p++, len++) { + if (*p == '/') return NULL; + } + if ((path = getenv("PATH")) == NULL) return NULL; + /* len is now strlen(name) + strlen(path) + separator + terminator */ + len += strlen(path) + 2; + if ((fullname = (char *)caml_stat_alloc(len)) == NULL) return NULL; + while(1) { + char * p; + for (p = fullname; *path != 0 && *path != ':'; p++, path++) + if (p < fullname + len) *p = *path; + if (p != fullname && p < fullname + len) + *p++ = '/'; + for (char *q = (char *)name; *q != 0; p++, q++) + if (p < fullname + len) *p = *q; + *p = 0; + if (stat(fullname, &st) == 0 && S_ISREG(st.st_mode)) break; + if (*path == 0) return NULL; + path++; + } + return fullname; +} + +/* Recover executable name from /proc/self/exe if possible */ + +char * caml_executable_name(void) +{ +#if defined(__linux__) + int namelen, retcode; + char * name; + struct stat st; + + /* lstat("/proc/self/exe") returns st_size == 0 so we cannot use it + to determine the size of the buffer. Instead, we guess and adjust. */ + namelen = 256; + while (1) { + name = caml_stat_alloc(namelen); + retcode = readlink("/proc/self/exe", name, namelen); + if (retcode == -1) { caml_stat_free(name); return NULL; } + if (retcode < namelen) break; + caml_stat_free(name); + if (namelen >= 1024*1024) return NULL; /* avoid runaway and overflow */ + namelen *= 2; + } + /* readlink() does not zero-terminate its result. + There is room for a final zero since retcode < namelen. */ + name[retcode] = 0; + /* Make sure that the contents of /proc/self/exe is a regular file. + (Old Linux kernels return an inode number instead.) */ + if (stat(name, &st) == -1 || ! S_ISREG(st.st_mode)) { + caml_stat_free(name); return NULL; + } + return name; + +#elif defined(__APPLE__) + unsigned int namelen; + char * name; + + namelen = 256; + name = caml_stat_alloc(namelen); + if (_NSGetExecutablePath(name, &namelen) == 0) return name; + caml_stat_free(name); + /* Buffer is too small, but namelen now contains the size needed */ + name = caml_stat_alloc(namelen); + if (_NSGetExecutablePath(name, &namelen) == 0) return name; + caml_stat_free(name); + return NULL; + +#else + return NULL; + +#endif +} diff --git a/runtime/win32.c b/runtime/win32.c index 8a5c8ff4b087..7bba8936749e 100644 --- a/runtime/win32.c +++ b/runtime/win32.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "caml/alloc.h" #include "caml/codefrag.h" #include "caml/fail.h" @@ -140,8 +141,12 @@ wchar_t * caml_decompose_path(struct ext_table * tbl, wchar_t * path) p = caml_stat_wcsdup(path); q = p; while (1) { + /* Don't include blank entries */ + while (*q == ';') + q++; for (n = 0; q[n] != 0 && q[n] != L';'; n++) /*nothing*/; - caml_ext_table_add(tbl, q); + if (n > 0) + caml_ext_table_add(tbl, q); q = q + n; if (*q == 0) break; *q = 0; @@ -209,15 +214,15 @@ CAMLexport wchar_t * caml_search_exe_in_path(const wchar_t * name) } } -wchar_t * caml_search_dll_in_path(struct ext_table * path, const wchar_t * name) +/* Primitive is defined here rather than sys.c as otherwise sys.c would need + duplicating for libcamlrun_non_shared */ +CAMLprim value caml_sys_const_shared_libraries(value unit) { - wchar_t * dllname; - wchar_t * res; - - dllname = caml_stat_wcsconcat(2, name, L".dll"); - res = caml_search_in_path(path, dllname); - caml_stat_free(dllname); - return res; +#ifdef WITH_DYNAMIC_LINKING + return Val_true; +#else + return Val_false; +#endif } #ifdef WITH_DYNAMIC_LINKING @@ -450,7 +455,10 @@ CAMLexport int caml_read_directory(wchar_t * dirname, #ifndef NATIVE_CODE -/* Set up a new thread for control-C emulation and termination */ +/* Set up a new thread for control-C emulation and termination. This mechanism + is used by the ocamlbrowser program which was part of the OCaml distribution + until OCaml 4.02 but now lives at https://github.com/garrigue/labltk. The + mechanism isn't known to be used by any other software. */ void caml_signal_thread(void * lpParam) { @@ -1342,3 +1350,88 @@ value caml_win32_get_temp_path(void) caml_win32_sys_error(GetLastError()); CAMLreturn(caml_copy_string_of_utf16(buf)); } + +CAMLextern char_os* caml_locate_standard_library (const wchar_t *exe_name, + const wchar_t *stdlib_default, + wchar_t **dirname) +{ + if (Is_relative_dir(stdlib_default)) { + LPWSTR root = NULL, basename; + DWORD l = MAX_PATH + 1, buf_len; + + do { + buf_len = l; + caml_stat_free(root); + root = caml_stat_alloc(buf_len * sizeof(WCHAR)); + l = GetFullPathName(exe_name, buf_len, root, &basename); + } while (l >= buf_len); + /* It should be an Impossible Thing for exe_name (which will have been the + result of GetModuleFileName) to be unparsable by GetFullPathName */ + if (l == 0) { + caml_stat_free(root); + return caml_stat_wcsdup(stdlib_default); + } + + CAMLassert(basename != root && Is_separator(*(basename - 1))); + + /* Make root the dirname portion */ + *(basename - 1) = 0; + + LPWSTR candidate = + caml_stat_wcsconcat(3, root, CAML_DIR_SEP, stdlib_default); + HANDLE h = + CreateFile(candidate, 0, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, + OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); + if (h == INVALID_HANDLE_VALUE) { + caml_stat_free(candidate); + return caml_stat_wcsdup(stdlib_default); + } + + LPWSTR resolved_candidate = NULL; + l = MAX_PATH + 1; + do { + buf_len = l; + caml_stat_free(resolved_candidate); + resolved_candidate = caml_stat_alloc(buf_len * sizeof(WCHAR)); + l = GetFinalPathNameByHandle(h, resolved_candidate, buf_len, + FILE_NAME_NORMALIZED | VOLUME_NAME_DOS); + } while (l >= buf_len); + + if (l > 0) { + /* GetFinalPathNameByHandle always returns \\?\ which needs stripping. + UNC paths are returned as \\?\UNC\ - in this case, reuse the \\ from + \\?\. */ + CAMLassert(l > 4 && resolved_candidate[0] == '\\' + && resolved_candidate[1] == '\\' + && resolved_candidate[2] == '?' + && resolved_candidate[3] == '\\'); + + caml_stat_free(candidate); + + if (l >= 8 && resolved_candidate[4] == 'U' + && resolved_candidate[5] == 'N' + && resolved_candidate[6] == 'C' + && resolved_candidate[7] == '\\') { + /* NT native UNC path - reuse the 'C' for the backslash */ + resolved_candidate[6] = '\\'; + candidate = caml_stat_wcsdup(resolved_candidate + 6); + } else { + /* Local device path */ + candidate = caml_stat_wcsdup(resolved_candidate + 4); + } + } + + /* It should be another Impossible Thing for l == 0 in the above. If that + did happen, candidate will _not_ have been freed, and we'll return the + path returned by GetFullPathName */ + caml_stat_free(resolved_candidate); + + if (dirname != NULL) + *dirname = caml_stat_wcsdup(root); + caml_stat_free(root); + return candidate; + } else { + return caml_stat_wcsdup(stdlib_default); + } +} diff --git a/stdlib/.depend b/stdlib/.depend index 1420b0094c66..5173aa2396da 100644 --- a/stdlib/.depend +++ b/stdlib/.depend @@ -403,7 +403,6 @@ stdlib__Gc.cmi : gc.mli \ stdlib__Printexc.cmi stdlib__Hashtbl.cmo : hashtbl.ml \ stdlib__Sys.cmi \ - stdlib__String.cmi \ stdlib__Seq.cmi \ stdlib__Random.cmi \ stdlib__Obj.cmi \ @@ -414,7 +413,6 @@ stdlib__Hashtbl.cmo : hashtbl.ml \ stdlib__Hashtbl.cmi stdlib__Hashtbl.cmx : hashtbl.ml \ stdlib__Sys.cmx \ - stdlib__String.cmx \ stdlib__Seq.cmx \ stdlib__Random.cmx \ stdlib__Obj.cmx \ diff --git a/stdlib/Makefile b/stdlib/Makefile index 3f831625292e..20bfe718f615 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -14,6 +14,7 @@ #************************************************************************** ROOTDIR = .. +SUBDIR_NAME = stdlib # NOTE: it is important that the OCAMLDEP variable is defined *before* # Makefile.common gets included, so that its local definition here # take precedence over its general shared definitions in Makefile.common. @@ -47,6 +48,9 @@ endif OPTCOMPILER=$(ROOTDIR)/ocamlopt$(EXE) CAMLOPT=$(OCAMLRUN) $(OPTCOMPILER) +# At present, only META is installed to the package directory +LIBNAME = stdlib + include StdlibModules OBJS=$(addsuffix .cmo,$(STDLIB_MODULES)) @@ -54,38 +58,32 @@ NOSTDLIB= camlinternalFormatBasics.cmo stdlib.cmo OTHERS=$(filter-out $(NOSTDLIB),$(OBJS)) .PHONY: all -all: stdlib.cma std_exit.cmo $(HEADER_NAME) target_$(HEADER_NAME) +all: stdlib.cma std_exit.cmo $(HEADER_NAME) .PHONY: allopt opt.opt # allopt and opt.opt are synonyms allopt: stdlib.cmxa std_exit.cmx opt.opt: allopt -INSTALL_STDLIB_META_DIR=$(DESTDIR)$(LIBDIR)/stdlib - .PHONY: install install:: - $(INSTALL_DATA) \ - stdlib.cma std_exit.cmo *.cmi "$(INSTALL_LIBDIR)" - $(MKDIR) "$(INSTALL_STDLIB_META_DIR)" - $(INSTALL_DATA) META "$(INSTALL_STDLIB_META_DIR)" + $(call INSTALL_ITEMS, stdlib.cma std_exit.cmo *.cmi, lib) + $(call INSTALL_ITEMS, META, lib, $(LIBNAME)) ifeq "$(INSTALL_SOURCE_ARTIFACTS)" "true" - $(INSTALL_DATA) \ - *.cmt *.cmti *.mli *.ml *.ml.in \ - "$(INSTALL_LIBDIR)" + $(call INSTALL_ITEMS, *.cmt *.cmti *.mli *.ml *.ml.in, lib) endif - $(INSTALL_DATA) target_$(HEADER_NAME) "$(INSTALL_LIBDIR)/$(HEADER_NAME)" + $(call INSTALL_ITEMS, $(HEADER_NAME), lib) .PHONY: installopt installopt: installopt-default .PHONY: installopt-default installopt-default: - $(INSTALL_DATA) \ - stdlib.cmxa stdlib.$(A) std_exit.$(O) *.cmx \ - "$(INSTALL_LIBDIR)" + $(call INSTALL_ITEMS, stdlib.cmxa stdlib.$(A) std_exit.$(O) *.cmx, lib) -%-launch-info: %.info tmpheader.exe - @cat $^ > $@ +MANGLING = $(filter true,$(SUFFIXING)) +runtime-launch-info: tmpheader.exe + @{ printf '$(if $(MANGLING),$(ZINC_RUNTIME_ID_HI),\000)'; \ + cat $^; } > $@ # The mingw-w64 and MSVC versions of tmpheader.exe are linked with special flags # to reduce their size (considerably). In particular, the entry point is @@ -120,7 +118,7 @@ HEADERLIBS = endif .INTERMEDIATE: tmpheader.exe -tmpheader.exe: header.$(O) +tmpheader.exe: header.$(O) libcamlrun.$(A) $(V_MKEXE)$(call MKEXE_VIA_CC,$@,$^ $(HEADERLIBS)) # Do not strip the header produced by cl ifneq "$(TOOLCHAIN)" "msvc" @@ -135,11 +133,11 @@ stdlib.cmxa: $(OBJS:.cmo=.cmx) .PHONY: distclean distclean: clean - rm -f sys.ml META runtime.info target_runtime.info + rm -f sys.ml META .PHONY: clean clean:: - rm -f $(HEADER_NAME) target_$(HEADER_NAME) + rm -f $(HEADER_NAME) export AWK diff --git a/stdlib/filename.ml b/stdlib/filename.ml index c1af643c883d..9739d0ab731c 100644 --- a/stdlib/filename.ml +++ b/stdlib/filename.ml @@ -102,7 +102,9 @@ module Unix : SYSDEPS = struct String.ends_with ~suffix:suff name let temp_dir_name = - try Sys.getenv "TMPDIR" with Not_found -> "/tmp" + match Sys.getenv_opt "TMPDIR" with + | None | Some "" -> "/tmp" + | Some dir -> dir let quote = generic_quote "'\\''" let quote_command cmd ?stdin ?stdout ?stderr args = String.concat " " (List.map quote (cmd :: args)) diff --git a/stdlib/hashtbl.ml b/stdlib/hashtbl.ml index dee5f0fe94ba..363b0d5c0dd7 100644 --- a/stdlib/hashtbl.ml +++ b/stdlib/hashtbl.ml @@ -46,15 +46,24 @@ let flip_ongoing_traversal h = (* To pick random seeds if requested *) -let randomized_default = - let params = - try Sys.getenv "OCAMLRUNPARAM" with Not_found -> - try Sys.getenv "CAMLRUNPARAM" with Not_found -> "" in - String.contains params 'R' +(* The runtime stores the initial value of "R" in caml_runtime_randomized. We + choose to copy this initial value here and then keep then in sync in order to + avoid adding a C call to every call to Hashtbl.create. *) +external randomized : unit -> bool = "caml_runtime_is_randomized" [@@noalloc] +let randomized = Atomic.make (randomized ()) + +external randomize : unit -> unit = "caml_runtime_randomize" [@@noalloc] +let randomize () = + Atomic.set randomized true; + (* Update the runtime's value so that the result from Sys.runtime_parameters + includes "R". There is technically a race here where Hashtbl.create () + creates randomized hash tables, but Sys.runtime_parameters doesn't yet + return R=1. We choose not to care - Hashtbl.is_randomized will always + return the correct value, and making Sys.runtime_parameters always be in + sync would either add a C call to every Hashtbl.create call or would + introduce a complicated dependency cycle between Sys and Hashtbl *) + randomize () -let randomized = Atomic.make randomized_default - -let randomize () = Atomic.set randomized true let is_randomized () = Atomic.get randomized let prng_key = Domain.DLS.new_key Random.State.make_self_init diff --git a/stdlib/header.c b/stdlib/header.c index 8ef80b057037..174bafeca261 100644 --- a/stdlib/header.c +++ b/stdlib/header.c @@ -22,29 +22,72 @@ #define NORETURN _Noreturn #endif +#include + #ifdef _WIN32 #define STRICT #define WIN32_LEAN_AND_MEAN #include +typedef wchar_t char_os; +typedef wchar_t * argv_t; +#define T(x) L ## x +#define Is_separator(c) (c == '\\' || c == '/') +#define Directory_separator_character T('\\') +#define ITOL(i) L ## #i +#define ITOT(i) ITOL(i) +#define PATH_NAME L"%Path%" + +/* The header is written to be able to cope with paths greater than MAX_PATH, + so undefine it to stop it being used in error. */ +#undef MAX_PATH + +#if defined(__MINGW32__) && defined(PATH_MAX) +/* mingw-w64 has a limits.h which defines PATH_MAX as an alias for MAX_PATH */ +#undef PATH_MAX +#endif + #if WINDOWS_UNICODE #define CP CP_UTF8 #else #define CP CP_ACP #endif -/* mingw-w64 has a limits.h which defines PATH_MAX as an alias for MAX_PATH */ -#if !defined(PATH_MAX) -#define PATH_MAX MAX_PATH +#ifndef __has_attribute +#define __has_attribute(x) 0 +#endif + +#if __has_attribute(fallthrough) + #define fallthrough __attribute__ ((fallthrough)) +#else + #define fallthrough ((void) 0) #endif +/* The maximum representable path for any API function, after internal expansion + of \\?\ etc. is 32767 characters. PATH_MAX includes the terminator. */ +#define PATH_MAX 0x8000 + +/* Initialised as the first statement of wmainCRTStartup */ +static HANDLE hProcessHeap; + +#define malloc(size) HeapAlloc(hProcessHeap, 0, (size)) +#define free(memblock) HeapFree(hProcessHeap, 0, (memblock)) + #define SEEK_END FILE_END +/* Initialised as the first statement of wmainCRTStartup */ +static HANDLE hProcessHeap; + +#define malloc(size) HeapAlloc(hProcessHeap, 0, (size)) +#define free(memblock) HeapFree(hProcessHeap, 0, (memblock)) + #define lseek(h, offset, origin) SetFilePointer((h), (offset), NULL, (origin)) typedef HANDLE file_descriptor; +#define unsafe_copy(dst, src, dstsize) lstrcpy(dst, src) + static int read(HANDLE h, LPVOID buffer, DWORD buffer_size) { DWORD nread = 0; @@ -60,19 +103,51 @@ static BOOL WINAPI ctrl_handler(DWORD event) return FALSE; } +static int exec_file(wchar_t *file, wchar_t *cmdline, STARTUPINFO *stinfo) +{ + LPWSTR truename = (LPWSTR)malloc(PATH_MAX * sizeof(WCHAR)); + PROCESS_INFORMATION procinfo; + DWORD retcode = ENOMEM; + + if (truename && SearchPath(NULL, file, L".exe", PATH_MAX, truename, NULL)) { + /* Need to ignore ctrl-C and ctrl-break, otherwise we'll die and take the + underlying OCaml program with us! */ + SetConsoleCtrlHandler(ctrl_handler, TRUE); + + if (CreateProcess(truename, cmdline, NULL, NULL, TRUE, 0, NULL, NULL, + stinfo, &procinfo)) { + free(truename); + CloseHandle(procinfo.hThread); + WaitForSingleObject(procinfo.hProcess, INFINITE); + GetExitCodeProcess(procinfo.hProcess, &retcode); + CloseHandle(procinfo.hProcess); + ExitProcess(retcode); + } else { + retcode = ENOEXEC; + } + } else { + retcode = ENOENT; + } + + free(truename); + + return retcode; +} + static void write_error(const wchar_t *wstr, HANDLE hOut) { DWORD consoleMode, numwritten, len; - char str[MAX_PATH]; + char *str; if (GetConsoleMode(hOut, &consoleMode) != 0) { /* The output stream is a Console */ WriteConsole(hOut, wstr, lstrlen(wstr), &numwritten, NULL); } else { /* The output stream is redirected */ - len = - WideCharToMultiByte(CP, 0, wstr, lstrlen(wstr), str, sizeof(str), - NULL, NULL); - WriteFile(hOut, str, len, &numwritten, NULL); + len = WideCharToMultiByte(CP, 0, wstr, -1, NULL, 0, NULL, NULL); + str = (char *)malloc(len); + WideCharToMultiByte(CP, 0, wstr, -1, str, len, NULL, NULL); + /* len includes the terminator */ + WriteFile(hOut, str, len - 1, &numwritten, NULL); } } @@ -90,14 +165,18 @@ NORETURN static void exit_with_error(const wchar_t *wstr1, #else +#include "caml/s.h" + #include #include #include #include #include #include +#ifdef HAS_LIBGEN_H +#include +#endif #include -#include /* O_BINARY is defined in Gnulib, but is not POSIX */ #ifndef O_BINARY @@ -106,87 +185,36 @@ NORETURN static void exit_with_error(const wchar_t *wstr1, typedef int file_descriptor; -#ifndef __CYGWIN__ - -/* Normal Unix search path function */ - -static char * searchpath(char * name) -{ - static char fullname[PATH_MAX + 1]; - char * path; - struct stat st; - - for (char *p = name; *p != 0; p++) { - if (*p == '/') return name; - } - path = getenv("PATH"); - if (path == NULL) return name; - while(1) { - char * p; - for (p = fullname; *path != 0 && *path != ':'; p++, path++) - if (p < fullname + PATH_MAX) *p = *path; - if (p != fullname && p < fullname + PATH_MAX) - *p++ = '/'; - for (char *q = name; *q != 0; p++, q++) - if (p < fullname + PATH_MAX) *p = *q; - *p = 0; - if (stat(fullname, &st) == 0 && S_ISREG(st.st_mode)) break; - if (*path == 0) return name; - path++; - } - return fullname; -} - +typedef char char_os; +typedef char ** argv_t; +#define T(x) x +#define Is_separator(c) (c == '/') +#define Directory_separator_character '/' +#define ITOL(x) #x +#define ITOT(x) ITOL(x) +#define PATH_NAME "$PATH" + +#ifdef HAS_STRLCPY +/* The macro is named unsafe_copy because although it requires a dstsize + argument which _may_ be passed to strlcpy, there are platforms where the + underlying operation is unsafe and will ignore dstsize. */ +#define unsafe_copy strlcpy #else +#define unsafe_copy(dst, src, dstsize) strcpy(dst, src) +#endif -/* Special version for Cygwin32: takes care of the ".exe" implicit suffix */ - -static int file_ok(char * name) +/* caml_search_in_system_path uses caml_stat_alloc and caml_executable_name also + uses caml_stat_free */ +void *caml_stat_alloc(size_t size) { - int fd; - /* Cannot use stat() here because it adds ".exe" implicitly */ - fd = open(name, O_RDONLY); - if (fd == -1) return 0; - close(fd); - return 1; + return malloc(size); } -static char * searchpath(char * name) +void caml_stat_free(void *ptr) { - char * path, * fullname; - - path = getenv("PATH"); - fullname = malloc(strlen(name) + (path == NULL ? 0 : strlen(path)) + 6); - /* 6 = "/" plus ".exe" plus final "\0" */ - if (fullname == NULL) return name; - /* Check for absolute path name */ - for (char *p = name; *p != 0; p++) { - if (*p == '/' || *p == '\\') { - if (file_ok(name)) return name; - strcpy(fullname, name); - strcat(fullname, ".exe"); - if (file_ok(fullname)) return fullname; - return name; - } - } - /* Search in path */ - if (path == NULL) return name; - while(1) { - char * p; - for (p = fullname; *path != 0 && *path != ':'; p++, path++) *p = *path; - if (p != fullname) *p++ = '/'; - strcpy(p, name); - if (file_ok(fullname)) return fullname; - strcat(fullname, ".exe"); - if (file_ok(fullname)) return fullname; - if (*path == 0) break; - path++; - } - return name; + free(ptr); } -#endif - NORETURN static void exit_with_error(const char *str1, const char *str2, const char *str3) @@ -198,8 +226,16 @@ NORETURN static void exit_with_error(const char *str1, exit(2); } +static int exec_file(const char *file, char * const argv[], void *_stinfo) +{ + return (execvp(file, argv) == -1 ? errno : 0); +} + #endif /* defined(_WIN32) */ +#include "caml/version.h" +#define SHORT_VERSION ITOT(OCAML_VERSION_MAJOR) T(".") ITOT(OCAML_VERSION_MINOR) + #define CAML_INTERNALS #include "caml/exec.h" @@ -210,12 +246,11 @@ static uint32_t read_size(const char *ptr) ((uint32_t) p[2] << 8) | p[3]; } -static char * read_runtime_path(file_descriptor fd) +static char * read_runtime_path(file_descriptor fd, uint32_t *rntm_strlen) { char buffer[TRAILER_SIZE]; - static char runtime_path[PATH_MAX]; + char *runtime_path; int num_sections; - uint32_t path_size; long ofs; if (lseek(fd, -TRAILER_SIZE, SEEK_END) == -1) return NULL; @@ -223,91 +258,299 @@ static char * read_runtime_path(file_descriptor fd) num_sections = read_size(buffer); ofs = TRAILER_SIZE + num_sections * 8; if (lseek(fd, -ofs, SEEK_END) == -1) return NULL; - path_size = 0; + *rntm_strlen = 0; for (int i = 0; i < num_sections; i++) { if (read(fd, buffer, 8) < 8) return NULL; if (buffer[0] == 'R' && buffer[1] == 'N' && buffer[2] == 'T' && buffer[3] == 'M') { - path_size = read_size(buffer + 4); - ofs += path_size; - } else if (path_size > 0) + *rntm_strlen = read_size(buffer + 4); + ofs += *rntm_strlen; + } else if (*rntm_strlen > 0) ofs += read_size(buffer + 4); } - if (path_size == 0) return NULL; - if (path_size >= PATH_MAX) return NULL; + if (*rntm_strlen == 0) return NULL; + if ((runtime_path = (char *)malloc(*rntm_strlen + 1)) == NULL) return NULL; if (lseek(fd, -ofs, SEEK_END) == -1) return NULL; - if (read(fd, runtime_path, path_size) != path_size) return NULL; + if (read(fd, runtime_path, *rntm_strlen) != *rntm_strlen) return NULL; + runtime_path[*rntm_strlen] = 0; return runtime_path; } +/* rntm points to a buffer containing rntm_bsz characters consisting of the + decoded content of the RNTM section (which may include NUL characters) and an + additional NUL "terminator". + RNTM is either [\0] or []\0 + Decode rntm and search for a runtime (using argv0_dirname if non-NULL and + required) and exec the first runtime found passing argv. */ +NORETURN void search_and_exec_runtime(char_os *rntm, uint32_t rntm_bsz, + argv_t argv, char_os *argv0_dirname, + void *stinfo) +{ + /* rntm_end points to the NUL "terminator" of rntm (_not_ the last character + of the RNTM section */ + const char_os *rntm_end = rntm + (rntm_bsz - 1); + + char_os *rntm_bindir_end = rntm; + + /* Scan for the first NUL character in rntm (there is always one) */ + while (*rntm_bindir_end != 0) + rntm_bindir_end++; + + /* The first character of rntm is NUL for Search mode */ + if (*rntm != 0) { + /* For Absolute mode, there is no NUL in RNTM, so rntm_bindir_end points to + the terminator pointed to be rntm_end. For Absolute_then_search, there is + a NUL in the middle of the RNTM "string", which rntm_bindir_end points + at. Change that to a directory separator, so that rntm now points to a + NUL-terminated full path we can attempt to exec. */ + if (rntm_bindir_end != rntm_end) + *rntm_bindir_end = Directory_separator_character; + int status = exec_file(rntm, argv, stinfo); + /* exec failed. For Absolute mode, there's nothing else to be tried. For + Absolute_then_search, if the failure was for any other reason than ENOENT + then there is also nothing else to be tried. */ + if (rntm_bindir_end == rntm_end || status != ENOENT) + exit_with_error(T("Cannot exec "), rntm, NULL); + } + + /* Shift rntm to point to */ + rntm = rntm_bindir_end + 1; + if (rntm < rntm_end) { + /* Searching takes place first in the directory containing this executable, + if it's known. */ + if (argv0_dirname != NULL) { + char_os *root = (char_os *)malloc((PATH_MAX + 1) * sizeof(char_os)); + if (root == NULL) + exit_with_error(T("Out of memory"), NULL, NULL); + unsafe_copy(root, argv0_dirname, PATH_MAX); + + /* Ensure root ends with a directory separator. root_basename points to + the character at which to place */ + char_os *root_basename = root; + while (*root_basename != 0) + root_basename++; + if (root_basename > root && !Is_separator(*(root_basename - 1))) + *root_basename++ = Directory_separator_character; + + /* If there isn't enough space to copy rntm to root then simply skip this + check (e.g. an executable called b.exe in a very long directory name). + (root_basename - root) is strlen_os(root) and likewise + (rntm_end - rntm) is strlen_os(rntm). */ + if ((rntm_end - rntm) <= PATH_MAX - (root_basename - root) - 1) { + unsafe_copy(root_basename, rntm, PATH_MAX - (root_basename - root)); + if (exec_file(root, argv, stinfo) != ENOENT) + exit_with_error(T("Cannot exec "), root, NULL); + } + } + + /* Otherwise, search in PATH */ + if (exec_file(rntm, argv, stinfo) != ENOENT) + exit_with_error(T("Cannot exec "), rntm, NULL); + } + + /* If we get here, we've failed... */ + exit_with_error(T("This program requires OCaml ") SHORT_VERSION T("\n") + T("Interpreter ("), (rntm_bindir_end + 1), + T(") not found with the program or in " PATH_NAME)); +} + #ifdef _WIN32 +#undef RtlMoveMemory +void __declspec(dllimport) __stdcall RtlMoveMemory(void *Destination, + const void *Source, + size_t Length); + NORETURN void __cdecl wmainCRTStartup(void) { - wchar_t truename[MAX_PATH]; + LPWSTR truename; + LPWSTR dirname; + uint32_t rntm_strlen = 0, rntm_bsz = 0; char *runtime_path; - wchar_t wruntime_path[MAX_PATH]; + wchar_t *wruntime_path, *basename; HANDLE h; - STARTUPINFO stinfo; - PROCESS_INFORMATION procinfo; - DWORD retcode; - if (GetModuleFileName(NULL, truename, sizeof(truename)/sizeof(wchar_t)) == 0) - exit_with_error(L"Out of memory", NULL, NULL); + hProcessHeap = GetProcessHeap(); + + truename = (LPWSTR)malloc(PATH_MAX * sizeof(WCHAR)); + dirname = (LPWSTR)malloc(PATH_MAX * sizeof(WCHAR)); + if (truename == NULL || dirname == NULL + || GetModuleFileName(NULL, truename, PATH_MAX) == 0 + || GetFullPathName(truename, PATH_MAX, dirname, &basename) >= PATH_MAX) + exit_with_error(L"Out of memory", NULL, NULL); + /* GetFullPathName leaves basename pointing to the first character of the + basename, so setting that to NUL means the string pointed to by dirname + is the dirname of the currently running executable with a trailing + separator (although search_and_exec_runtime will check that anyway) */ + *basename = 0; + + /* Mark the HANDLE as inheritable so ocamlrun can use it */ + SECURITY_ATTRIBUTES sa; + sa.nLength = sizeof(sa); + sa.lpSecurityDescriptor = NULL; + sa.bInheritHandle = TRUE; h = CreateFile(truename, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, - NULL, OPEN_EXISTING, 0, NULL); - if (h == INVALID_HANDLE_VALUE || - (runtime_path = read_runtime_path(h)) == NULL || - !MultiByteToWideChar(CP, 0, runtime_path, -1, wruntime_path, - sizeof(wruntime_path)/sizeof(wchar_t))) + &sa, OPEN_EXISTING, 0, NULL); + if (h == INVALID_HANDLE_VALUE + || (runtime_path = read_runtime_path(h, &rntm_strlen)) == NULL + || (wruntime_path = + (wchar_t *)malloc((rntm_strlen + 1) * sizeof(wchar_t))) == NULL + || (rntm_bsz = MultiByteToWideChar(CP, 0, runtime_path, rntm_strlen + 1, + wruntime_path, rntm_strlen + 1)) == 0) exit_with_error(NULL, truename, L" not found or is not a bytecode executable file"); - CloseHandle(h); - if (SearchPath(NULL, wruntime_path, L".exe", sizeof(truename)/sizeof(wchar_t), - truename, NULL)) { - /* Need to ignore ctrl-C and ctrl-break, otherwise we'll die and take - the underlying OCaml program with us! */ - SetConsoleCtrlHandler(ctrl_handler, TRUE); - - stinfo.cb = sizeof(stinfo); - stinfo.lpReserved = NULL; - stinfo.lpDesktop = NULL; - stinfo.lpTitle = NULL; - stinfo.dwFlags = 0; + free(runtime_path); + free(truename); + STARTUPINFO stinfo; + /* Retrieve the existing STARTUPINFO structure - however this header was + invoked is morally how we should invoke ocamlrun, but we also need to + set-up or augment the cbReserved2 / lpReserved2 members in order to pass + the HANDLE h to ocamlrun as a CRT fd. The cloexec.ml test checks that + existing fds are passed through successfully. The use of lpReserved2 by the + CRT can be seen in the Universal CRT sources info exec/spawnv.cpp for the + code which sets the buffer up and in lowio/ioinit.cpp which reads the + buffer provided to the process. The semantics of this buffer are unchanged + since the very beginning of Windows NT. + It is a relatively well-documented "trick" to be able to pass up to 64KiB + of information to a new process using lpReserved2, on condition that the + data respects the CRT's requirements. The CRT processes lpReserved2 if it + is not NULL and if cbReserved2 is non-zero - it performs no further + checking beyond that. Applications can therefore embed additional data by + setting cbReserved2 to the actual size of lpReserved2 and simply ensuring + that the first 4 bytes pointed to by lpReserved2 are zero. + Cygwin uses this mechanism when invoking processes to allow the Cygwin DLL + to pick up the required information about the caller, amongst other things + to implement fork (it's also used as part of argument passing). + The code below must therefore cater for three cases: + 1. cbReserved2 == 0 / lpReserved2 == NULL, in which case the structure must + be created + 2. cbReserved2 > 0 but there are fewer than 3 fds in the structure, in + which case empty handles must be added so that our HANDLE is fd 3 + 3. cbReserved2 > 0 and there are already 3 or more fds in the structure, in + which case our HANDLE is appended to the end of the structure */ + GetStartupInfo(&stinfo); + + /* This header avoids the CRT to keep its size down - the Windows API doesn't + have anything sprintf-like, however, the largest fd-number fits comfortably + within a 16-bit wide character and we know that it will never be zero - the + number of the fd is therefore passed to ocamlrun as a single wide-character + string where the code-point represents the fd. + Nemo nunc te poteste servare. */ + WCHAR fd[2] = {0, 0}; + + /* Match the CRT's check - ignore the existing values if either cbReserved2 is + zero _or_ lpReserved2 is NULL */ + if (stinfo.cbReserved2 > 0 && stinfo.lpReserved2 == NULL) stinfo.cbReserved2 = 0; - stinfo.lpReserved2 = NULL; - if (CreateProcess(truename, GetCommandLine(), NULL, NULL, TRUE, 0, - NULL, NULL, &stinfo, &procinfo)) { - CloseHandle(procinfo.hThread); - WaitForSingleObject(procinfo.hProcess, INFINITE); - GetExitCodeProcess(procinfo.hProcess, &retcode); - CloseHandle(procinfo.hProcess); - ExitProcess(retcode); - } + + int existing_count = 0; + /* Work out the fd number for h */ + if (stinfo.cbReserved2 > 0) { + existing_count = *(int *)stinfo.lpReserved2; + fd[0] = existing_count; + /* If there is a structure present, but it has no fds, discard it. */ + if (existing_count == 0) + stinfo.cbReserved2 = 0; + } + /* Allow for the standard handles */ + if (fd[0] < 3) + fd[0] = 3; + + WORD buffer_size = sizeof(int) + (fd[0] + 1) * (1 + sizeof(HANDLE)); + LPBYTE buffer = (LPBYTE)malloc(buffer_size); + + /* Store the total number of handles */ + *(int *)buffer = fd[0] + 1; + + /* Copy the existing flags and HANDLEs */ + if (stinfo.cbReserved2 > 0) { + RtlMoveMemory(buffer + sizeof(int), stinfo.lpReserved2 + sizeof(int), + existing_count); + RtlMoveMemory(buffer + sizeof(int) + fd[0] + 1, + stinfo.lpReserved2 + sizeof(int) + existing_count, + existing_count * sizeof(HANDLE)); } - exit_with_error(L"Cannot exec ", wruntime_path, NULL); + /* Pointers to the next slot for flags and the next slot for a HANDLE */ + LPBYTE osflags = + buffer + sizeof(int) + existing_count; + LPHANDLE oshandles = + (LPHANDLE)(buffer + sizeof(int) + fd[0] + 1 + + existing_count * sizeof(HANDLE)); + + /* Ensure the standard fds are populated. Unrolled to prevent cl requiring the + memset intrinsic. */ + switch (existing_count) { + case 0: + *osflags++ = 0; + *oshandles++ = INVALID_HANDLE_VALUE; + fallthrough; + case 1: + *osflags++ = 0; + *oshandles++ = INVALID_HANDLE_VALUE; + fallthrough; + case 2: + *osflags++ = 0; + *oshandles++ = INVALID_HANDLE_VALUE; + } + + /* Add h to the structure */ + *osflags = 1; + *oshandles = h; + + stinfo.cbReserved2 = buffer_size; + stinfo.lpReserved2 = buffer; + + SetEnvironmentVariable(L"__OCAML_EXEC_FD", fd); + search_and_exec_runtime(wruntime_path, rntm_bsz, + GetCommandLine(), dirname, &stinfo); } #else +/* Borrowed from libcamlrun */ +char * caml_search_in_system_path(const char *); +char * caml_executable_name(void); + int main(int argc, char *argv[]) { - char *truename, *runtime_path; + char *truename, *runtime_path, *argv0_dirname; + uint32_t rntm_strlen = 0; int fd; - truename = searchpath(argv[0]); + if (argc < 1) + exit_with_error("Unable to load bytecode image", NULL, NULL); + + truename = caml_executable_name(); + if (truename == NULL) truename = caml_search_in_system_path(argv[0]); + if (truename == NULL) truename = argv[0]; fd = open(truename, O_RDONLY | O_BINARY); - if (fd == -1 || (runtime_path = read_runtime_path(fd)) == NULL) + if (fd == -1 || (runtime_path = read_runtime_path(fd, &rntm_strlen)) == NULL) exit_with_error(NULL, truename, " not found or is not a bytecode executable file"); - close(fd); - argv[0] = truename; - execvp(runtime_path, argv); + size_t truename_len = strlen(truename); + char *value = (char *)malloc(10 + 1 + truename_len + 1); + snprintf(value, 11, "%u,", fd); + strcat(value, truename); +#ifdef HAS_SETENV_UNSETENV + setenv("__OCAML_EXEC_FD", value, 1); +#else +#error "Require a way to set environment variables" +#endif + +#ifdef HAS_LIBGEN_H + argv0_dirname = dirname(strdup(truename)); +#else + argv0_dirname = NULL; +#endif - exit_with_error("Cannot exec ", runtime_path, NULL); + /* read_runtime_path returns the actual size of RNTM, but the buffer returned + is guaranteed to have a null character following the final character of + RNTM. */ + search_and_exec_runtime(runtime_path, rntm_strlen + 1, argv, argv0_dirname, + NULL); } #endif /* defined(_WIN32) */ diff --git a/testsuite/Makefile b/testsuite/Makefile index 4cad7738954a..a8302c6922dc 100644 --- a/testsuite/Makefile +++ b/testsuite/Makefile @@ -166,7 +166,7 @@ all: @$(MAKE) --no-print-directory report .PHONY: new-without-report -new-without-report: +new-without-report: | tests @rm -f $(failstamp) @($(ocamltest) -find-test-dirs tests | while $(IFS_LINE) read -r dir; do \ echo Running tests from \'$$dir\' ... ; \ @@ -185,7 +185,7 @@ check-failstamp: fi .PHONY: all-% -all-%: +all-%: | tests @for dir in tests/$**; do \ $(MAKE) --no-print-directory exec-one DIR=$$dir; \ done 2>&1 | tee $(TESTLOG) @@ -223,7 +223,7 @@ all-%: J_ARGUMENT = $(filter-out -j,$(filter -j%,$(MAKEFLAGS))) .PHONY: parallel-% -parallel-%: +parallel-%: | tests @echo | parallel >/dev/null 2>/dev/null \ || (echo "Unable to run the GNU parallel tool;";\ echo "You should install it before using the parallel* targets.";\ @@ -327,3 +327,25 @@ distclean: clean report: @if [ ! -f $(TESTLOG) ]; then echo "No $(TESTLOG) file."; exit 1; fi @$(AWK) -f ./summarize.awk < $(TESTLOG) + +# When an archive is created by git-archive, this is expanded to the SHA of the +# commit. The filter-out causes this to be blank if it's run when the Format +# tag has not been expanded +GIT_ARCHIVE_SHA = $(filter-out ormat%, $Format:%H$ ) + +tests: + @echo "There are no tests in the tests directory!" + @echo "This happens when the sources of OCaml are extracted from a \ +tarball" + @echo "generated by git-archive (which includes those generated by \ +GitHub)" + @head -n 1 $(ROOTDIR)/VERSION | grep -Fq + || \ + echo "Note that the release tarballs published at \ +https://caml.inria.fr/pub/distrib/ include all the manual and testsuite sources" + @$(if $(GIT_ARCHIVE_SHA),,false) + @echo "The required files are in commit $(GIT_ARCHIVE_SHA), for \ +example:" + @echo " git clone https://github.com/ocaml/ocaml \ +--revision $(GIT_ARCHIVE_SHA) --depth 1 git-sources" + @echo " mv git-sources/$@ ." + @false diff --git a/testsuite/in_prefix/Makefile.test b/testsuite/in_prefix/Makefile.test index 6cdee3e706d3..d1c5f48f701d 100644 --- a/testsuite/in_prefix/Makefile.test +++ b/testsuite/in_prefix/Makefile.test @@ -23,9 +23,10 @@ DRIVER = ../tools/test_in_prefix$(EXE) endif DRIVER_ARGS = \ - $(VERBOSE_FLAG) --bindir "$(BINDIR)" --libdir "$(LIBDIR)" \ + $(VERBOSE_FLAG) --bindir "$(BINDIR)" --libdir '$(TARGET_LIBDIR)' \ $(call bool_to_with, ocamlnat, $(INSTALL_OCAMLNAT)) \ $(call bool_to_with, ocamlopt, $(NATIVE_COMPILER)) \ + $(RUNTIME_SEARCH_FLAG) \ $(OTHERLIBRARIES) --pwd "$(SRCDIR_ABS)/testsuite/in_prefix" default: $(DRIVER) @@ -38,11 +39,28 @@ else VERBOSE_FLAG = endif -test-in-prefix: $(DRIVER) ../tools/main_in_c.$(O) - @$< $(DRIVER_ARGS) +# Generates --without-$(1) if $(2) is empty or --with-$(1)=$(2) otherwise +RUNTIME_SEARCH_FLAG = \ + $(if $(RUNTIME_SEARCH),$\ + --with-runtime-search=$(RUNTIME_SEARCH),$\ + --without-runtime-search) + +export PATH := $(SRCDIR_ABS)/testsuite/in_prefix/poisoned-runtime:$(PATH) + +ifeq "$(SUFFIXING)" "true" +RUNTIME_NAME = ocamlrun-$(ZINC_RUNTIME_ID)$(EXE) +else +RUNTIME_NAME = ocamlrun$(EXE) +endif -SCRUB_ENV = \ - CAML_LD_LIBRARY_PATH OCAMLLIB CAMLLIB OCAMLPARAM OCAMLRUNPARAM CAMLRUNPARAM +test-in-prefix: $(DRIVER) ../tools/main_in_c.$(O) ../tools/dummy$(EXE) + @rm -f ocamlrun* + @$(LN) $(ROOTDIR)/runtime/ocamlrun$(EXE) test-$(RUNTIME_NAME) + @$(MKDIR) poisoned-runtime + @cd poisoned-runtime && $(LN) ../../tools/dummy$(EXE) $(RUNTIME_NAME) + @$< $(DRIVER_ARGS) + @rm -rf poisoned-runtime + @rm -f test-ocamlrun* # Generates --without-$(1) if $(2) = false or --with-$(1) otherwise bool_to_with = --with$(if $(filter false,$(2)),out)-$(strip $(1)) diff --git a/testsuite/in_prefix/README.md b/testsuite/in_prefix/README.md index 513b5d641d99..a7a063652a10 100644 --- a/testsuite/in_prefix/README.md +++ b/testsuite/in_prefix/README.md @@ -46,7 +46,8 @@ fifth test are re-run and then the entire battery is executed a second time. During this second execution, the test harness does whatever is physically possible to allow these tests to proceed: - Environment variables `CAML_LD_LIBRARY_PATH` and `OCAMLLIB` are manipulated to - allow the compiler to operate + allow the compiler to operate (unless the compiler has been configured with + `--with-relative-libdir`) - Bytecode executables which will no longer be able to find `ocamlrun` are explicitly passed to `ocamlrun`. The harness always verifies that this step is required by first executing the binary and ensuring that it fails and then @@ -70,11 +71,11 @@ Exercises: Shims: - On Unix, the bytecode toplevel contains the absolute location of `ocamlrun`, - so must be explicitly invoked via `ocamlrun` + so must be explicitly invoked via `ocamlrun`, unless the compiler is + configured with `--enable-runtime-search` - Both toplevels contain the absolute location of the Standard Library, - requiring `OCAMLLIB` to be set -- `ld.conf` contains the absolute location of the `stublibs` directory, - requiring `CAML_LD_LIBRARY_PATH` to be adjusted + requiring `OCAMLLIB` to be set, unless the compiler was configured with + `--with-relative-libdir` ### Loading archives/plugins (.cma / .cmxs) with `Dynlink` @@ -84,14 +85,13 @@ Shims: - For a bytecode-only build, `ocamlc` contains the absolute location of `ocamlrun`, so must be explicitly invoked via `ocamlrun` (if the native compiler is available, then both `ocamlc` and `ocamlopt` will be native - executables) + executables), unless the compiler is configured with `--enable-runtime-search` - Both compilers contain the absolute location of the Standard Library, - requiring `OCAMLLIB` to be set + requiring `OCAMLLIB` to be set, unless the comnpiler was configured with + `--with-relative-libdir` - The executable created by `ocamlc` contains the absolute location of - `ocamlrun`, so must be both explicitly invoked via `ocamlrun` and also have - `CAML_LD_LIBRARY_PATH` adjusted, as that `ocamlrun` will either not load - `ld.conf` or (with `OCAMLLIB` set) will be pointed to an `ld.conf` containing - the absolute location of the `stublibs` directory + `ocamlrun`, so must be explicitly invoked via `ocamlrun`, unless the + compiler is configured with `--enable-runtime-search-target` ### Executing installed bytecode binaries with `-vnum` @@ -113,7 +113,8 @@ Exercises: Shims: - On builds with shared library support, all the executables will contain the - absolute location of `ocamlrun` and will fail to execute + absolute location of `ocamlrun` and will fail to execute, unless the compiler + was configured with `--enable-runtime-search` - On builds without shared library support, executables using libraries with C stubs (in particular, `ocamldebug` and `ocamldoc`) are compiled with `-custom` and do succeed @@ -151,6 +152,8 @@ Exercises: Shims: - As with the `Dynlink` test, on bytecode-only builds the compiler must be - explicitly invoked via `ocamlrun` + explicitly invoked via `ocamlrun`, unless the compiler was configured with + `--enable-runtime-search` - The executable produced by `ocamlc` by default contains the absolute location - of `ocamlrun` and so has to be run explicitly via `ocamlrun` + of `ocamlrun` and so has to be run explicitly via `ocamlrun`, unless the + compiler was configured with `--enable-runtime-search-target` diff --git a/testsuite/tests/backtrace/backtrace2.reference b/testsuite/tests/backtrace/backtrace2.reference index dcd377bef5f5..0c3d90412ea5 100644 --- a/testsuite/tests/backtrace/backtrace2.reference +++ b/testsuite/tests/backtrace/backtrace2.reference @@ -35,7 +35,7 @@ Uncaught exception Invalid_argument("index out of bounds") Raised by primitive operation at Backtrace2.run in file "backtrace2.ml", line 62, characters 14-22 test_Not_found Uncaught exception Not_found -Raised at Stdlib__Hashtbl.find in file "hashtbl.ml", line 584, characters 13-28 +Raised at Stdlib__Hashtbl.find in file "hashtbl.ml", line 593, characters 13-28 Called from Backtrace2.test_Not_found in file "backtrace2.ml", line 43, characters 9-42 Re-raised at Backtrace2.test_Not_found in file "backtrace2.ml", line 43, characters 61-70 Called from Backtrace2.run in file "backtrace2.ml", line 62, characters 11-23 @@ -50,7 +50,7 @@ Called from CamlinternalLazy.do_force_block in file "camlinternalLazy.ml", line Re-raised at CamlinternalLazy.do_force_block in file "camlinternalLazy.ml", line 56, characters 4-11 Called from Backtrace2.run in file "backtrace2.ml", line 62, characters 11-23 Uncaught exception Not_found -Raised at Stdlib__Hashtbl.find in file "hashtbl.ml", line 584, characters 13-28 +Raised at Stdlib__Hashtbl.find in file "hashtbl.ml", line 593, characters 13-28 Called from Backtrace2.test_lazy.exception_raised_internally in file "backtrace2.ml", line 50, characters 8-41 Re-raised at CamlinternalLazy.do_force_block.(fun) in file "camlinternalLazy.ml", line 54, characters 43-50 Called from CamlinternalLazy.do_force_block in file "camlinternalLazy.ml", line 49, characters 17-27 diff --git a/testsuite/tests/lib-unix/common/cloexec.ml b/testsuite/tests/lib-unix/common/cloexec.ml index 3cb1d3a6ff3e..7e97f9138f70 100644 --- a/testsuite/tests/lib-unix/common/cloexec.ml +++ b/testsuite/tests/lib-unix/common/cloexec.ml @@ -1,6 +1,6 @@ (* TEST include unix; - readonly_files = "fdstatus_aux.c fdstatus_main.ml"; + readonly_files = "fdstatus_aux.c fdstatus_main.ml cloexec_leap.ml"; hasunix; { @@ -9,6 +9,9 @@ program = "${test_build_directory}/fdstatus.exe"; all_modules = "fdstatus_aux.c fdstatus_main.ml"; ocamlc.byte; + program = "${test_build_directory}/cloexec_leap.exe"; + all_modules = "cloexec_leap.ml"; + ocamlc.byte; program = "${test_build_directory}/cloexec.byte"; all_modules = "fdstatus_aux.c cloexec.ml"; ocamlc.byte; @@ -29,6 +32,9 @@ program = "${test_build_directory}/fdstatus.exe"; all_modules = "fdstatus_aux.c fdstatus_main.ml"; ocamlopt.byte; + program = "${test_build_directory}/cloexec_leap.exe"; + all_modules = "cloexec_leap.ml"; + ocamlopt.byte; program = "${test_build_directory}/cloexec.opt"; all_modules = "fdstatus_aux.c cloexec.ml"; ocamlopt.byte; @@ -49,7 +55,7 @@ external fd_of_file_descr : Unix.file_descr -> int = "caml_fd_of_filedescr" let string_of_fd fd = Int.to_string (fd_of_file_descr fd) -let status_checker = "fdstatus.exe" +let status_checker = "cloexec_leap.exe" let _ = let f0 = @@ -86,18 +92,19 @@ let _ = seen on slower machines, where the test begins running before this call has happened, and which results in tmp.txt still being locked when fdstatus_main.ml tries to delete it, leading to a "Permission denied" - exception. To prevent this, lock.txt is created and locked for writing - by this process. If the checker then sees that lock.txt exists, it - attempts to acquire a write lock on it, which will succeed only after - this process has completely exited and its lock has been automatically - released via process termination. + exception. To prevent this, lock1.txt is created and locked for writing + by this process, and then lock2.txt similarly by cloexec_leap.ml. If + the checker then sees that these exist, it attempts to acquire write + locks on them, which will succeed only after these processes have both + completely exited and their locks have been automatically released via + process termination. This dance is strictly done on native on Windows only, because execv hanging on to open files in this way with a Unix kernel is a very serious misimplementation of execv! *) if Sys.win32 then let lock = - Unix.(openfile "lock.txt" [O_WRONLY; O_CREAT; - O_TRUNC; O_CLOEXEC] 0o600) in + Unix.(openfile "lock1.txt" [O_WRONLY; O_CREAT; + O_TRUNC; O_CLOEXEC] 0o600) in Unix.lockf lock Unix.F_LOCK 0 in Unix.execv (Filename.concat Filename.current_dir_name status_checker) diff --git a/testsuite/tests/lib-unix/common/cloexec_leap.ml b/testsuite/tests/lib-unix/common/cloexec_leap.ml new file mode 100644 index 000000000000..2def123b7575 --- /dev/null +++ b/testsuite/tests/lib-unix/common/cloexec_leap.ml @@ -0,0 +1,21 @@ +let status_checker = "fdstatus.exe" + +let _ = + let args = Array.copy Sys.argv in + let image = Filename.concat Filename.current_dir_name status_checker in + args.(0) <- status_checker; + if Sys.argv.(1) = "execv" then + let () = + (* As in cloexec.ml, on Windows take out a write lock on a file so that + fdstatus_main.ml can be sure that both ancestor processes have actually + terminated before it tries to delete tmp.txt *) + if Sys.win32 then + let lock = + Unix.(openfile "lock2.txt" [O_WRONLY; O_CREAT; + O_TRUNC; O_CLOEXEC] 0o600) in + Unix.lockf lock Unix.F_LOCK 0 in + Unix.execv image args + else + let pid = + Unix.create_process image args Unix.stdin Unix.stdout Unix.stderr in + ignore (Unix.waitpid [] pid) diff --git a/testsuite/tests/lib-unix/common/fdstatus_main.ml b/testsuite/tests/lib-unix/common/fdstatus_main.ml index 6e881fa7f46e..f92ef7db9426 100644 --- a/testsuite/tests/lib-unix/common/fdstatus_main.ml +++ b/testsuite/tests/lib-unix/common/fdstatus_main.ml @@ -3,8 +3,8 @@ external delete_on_close : string -> unit = "caml_win32_delete_on_close" let () = if Sys.win32 then - (* Ensure the ancestor process has definitely terminated (and therefore - closed its handles to tmp.txt) *) + (* Ensure both ancestor processes have definitely terminated (and therefore + closed their handles to tmp.txt) *) let wait_until file = if Sys.file_exists file then let fd = Unix.openfile file [O_RDWR] 0o600 in @@ -12,7 +12,7 @@ let () = Unix.close fd; Sys.remove file in - wait_until "lock.txt" + List.iter wait_until ["lock1.txt"; "lock2.txt"] let () = (* Windows virus scanning can easily get in the way here on slower VMs. When diff --git a/testsuite/tests/native-debugger/linux-lldb-amd64.ml b/testsuite/tests/native-debugger/linux-lldb-amd64.ml index 205ee7d19495..cb8929087516 100644 --- a/testsuite/tests/native-debugger/linux-lldb-amd64.ml +++ b/testsuite/tests/native-debugger/linux-lldb-amd64.ml @@ -1,4 +1,5 @@ (* TEST + unset BUILD_PATH_PREFIX_MAP; native-compiler; no-tsan; (* Skip, TSan inserts extra frames into backtraces *) linux; diff --git a/testsuite/tests/native-debugger/linux-lldb-arm64.ml b/testsuite/tests/native-debugger/linux-lldb-arm64.ml index eaa5c952ca0a..6be608302973 100644 --- a/testsuite/tests/native-debugger/linux-lldb-arm64.ml +++ b/testsuite/tests/native-debugger/linux-lldb-arm64.ml @@ -1,4 +1,5 @@ (* TEST + unset BUILD_PATH_PREFIX_MAP; native-compiler; no-tsan; (* Skip, TSan inserts extra frames into backtraces *) linux; diff --git a/testsuite/tests/tool-debugger/find-artifacts/debuggee.ml b/testsuite/tests/tool-debugger/find-artifacts/debuggee.ml index f9fb5fa7fdca..442d600a303b 100644 --- a/testsuite/tests/tool-debugger/find-artifacts/debuggee.ml +++ b/testsuite/tests/tool-debugger/find-artifacts/debuggee.ml @@ -1,4 +1,5 @@ (* TEST + unset BUILD_PATH_PREFIX_MAP; debugger_script = "${test_source_directory}/input_script"; debugger; shared-libraries; diff --git a/testsuite/tests/tool-toplevel/pr6468.compilers.reference b/testsuite/tests/tool-toplevel/pr6468.compilers.reference index 297306afd4b0..3a523d6f3d75 100644 --- a/testsuite/tests/tool-toplevel/pr6468.compilers.reference +++ b/testsuite/tests/tool-toplevel/pr6468.compilers.reference @@ -10,5 +10,5 @@ Exception: Not_found. Raised at f in file "//toplevel//", line 2, characters 11-26 Called from g in file "//toplevel//", line 1, characters 11-15 Called from in file "//toplevel//", line 1, characters 0-4 -Called from Topeval.load_lambda in file "toplevel/byte/topeval.ml", line 93, characters 4-14 +Called from Topeval.load_lambda in file "toplevel/byte/topeval.ml", line 103, characters 4-14 diff --git a/testsuite/tests/tool-toplevel/pr9701.compilers.reference b/testsuite/tests/tool-toplevel/pr9701.compilers.reference index f1c15be91634..03ad45abb437 100644 --- a/testsuite/tests/tool-toplevel/pr9701.compilers.reference +++ b/testsuite/tests/tool-toplevel/pr9701.compilers.reference @@ -1,4 +1,4 @@ Exception: Failure "test". Raised at Stdlib.failwith in file "stdlib.ml", line 29, characters 17-33 Called from in file "pr9701.ml", line 5, characters 9-16 -Called from Topeval.load_lambda in file "toplevel/byte/topeval.ml", line 93, characters 4-14 +Called from Topeval.load_lambda in file "toplevel/byte/topeval.ml", line 103, characters 4-14 diff --git a/testsuite/tools/cmdline.ml b/testsuite/tools/cmdline.ml index 1fcc2944dc7b..1d6cb7ed2029 100644 --- a/testsuite/tools/cmdline.ml +++ b/testsuite/tools/cmdline.ml @@ -104,9 +104,10 @@ let parse argv = in let config = ref {has_ocamlnat = false; has_ocamlopt = false; has_relative_libdir = None; - has_runtime_search = None; launcher_searches_for_ocamlrun = false; + has_runtime_search = Absolute; launcher_searches_for_ocamlrun = false; target_launcher_searches_for_ocamlrun = false; - bytecode_shebangs_by_default = false; libraries = []} + bytecode_shebangs_by_default = false; filename_mangling = false; + libraries = []} in let error fmt = Printf.ksprintf (fun s -> raise (Arg.Bad s)) fmt in let check_tree () = @@ -160,23 +161,22 @@ let parse argv = let has_ocamlnat has_ocamlnat () = config := {!config with has_ocamlnat} in let has_ocamlopt has_ocamlopt () = config := {!config with has_ocamlopt} in let parse_search = function - | "enable" -> true - | "always" -> false + | Some "enable" -> Config.Absolute_then_search + | Some "always" -> Config.Search + | None -> Config.Absolute | _ -> raise (Arg.Bad "--with-runtime-search: argument should be either enable or always") in let has_runtime_search arg = - let has_runtime_search = Option.map parse_search arg in - if has_runtime_search <> None then - error "--with-runtime-search is not implemented!"; + let has_runtime_search = parse_search arg in config := {!config with has_runtime_search} in let args = Arg.align [ "--pwd", Arg.Set_string pwd, "\tCurrent working directory to use"; "--bindir", Arg.String (check_exists ~absolute:true bindir), "\ \tDirectory containing programs (must share a prefix with --libdir)"; - "--libdir", Arg.String (check_exists ~absolute:true libdir), "\ + "--libdir", Arg.String (check_exists ~absolute:false libdir), "\ \tDirectory containing stdlib.cma (must share a prefix with --bindir)"; "--summary", Arg.Set summary, ""; "--verbose", Arg.Set verbose, ""; diff --git a/testsuite/tools/dummy.c b/testsuite/tools/dummy.c new file mode 100644 index 000000000000..ef5f5be95ea0 --- /dev/null +++ b/testsuite/tools/dummy.c @@ -0,0 +1,27 @@ +/**************************************************************************/ +/* */ +/* OCaml */ +/* */ +/* David Allsopp, University of Cambridge & Tarides */ +/* */ +/* Copyright 2025 David Allsopp Ltd. */ +/* */ +/* All rights reserved. This file is distributed under the terms of */ +/* the GNU Lesser General Public License version 2.1, with the */ +/* special exception on linking described in the file LICENSE. */ +/* */ +/**************************************************************************/ + +/* Micro-program used to sit in PATH to test local path search for bytecode + executables. */ + +#define CAML_INTERNALS +#include +#include + +int main_os(int argc, char_os **argv) +{ + printf("The poisoned runtime has been invoked!\n" + "This suggests something is wrong in stdlib/header.c\n"); + return 1; +} diff --git a/testsuite/tools/environment.ml b/testsuite/tools/environment.ml index f0bd23acc678..e302c9a0542e 100644 --- a/testsuite/tools/environment.ml +++ b/testsuite/tools/environment.ml @@ -46,7 +46,7 @@ let libdir_suffix {libdir_suffix; _} = libdir_suffix (* Derived properties *) -let is_renamed {phase; _} = (phase = Renamed) +let is_renamed {phase; _} = (phase <> Original) let bindir {prefix; bindir_suffix; _} = Filename.concat prefix bindir_suffix @@ -67,19 +67,6 @@ let in_libdir env path = let in_test_root {test_root; _} path = Filename.concat test_root path -(* Reverse the quoting of single quotes done by Filename.quote on Unix (which is - used for the runtime name when embedded in sh-scripts. Any single quote - characters are transformed to "'\\''". If the string is split on the single - quote characters, the sequence ["\\"; ""] is a single quote character in the - unescaped version. *) -let dequote s = - let[@tail_mod_cons] rec loop = function - | "\\" :: "" :: rest -> "'" :: loop rest - | chunk :: rest -> chunk :: loop rest - | [] -> [] - in - String.concat "" (loop (String.split_on_char '\'' s)) - (* [classify_executable file] determines if [file] is : - Tendered bytecode with an executable header - Scripted bytecode invoking ocamlrun with a #! header @@ -91,49 +78,18 @@ let classify_executable file = try In_channel.with_open_bin file (fun ic -> let start = really_input_string ic 2 in - let is_RNTM = function - | Bytesections.{name = Name.RNTM; _} -> true - | _ -> false - in + let toc = Bytesections.read_toc ic in + let sections = Bytesections.all toc in let is_DLLS = function | Bytesections.{name = Name.DLLS; len} when len > 0 -> true | _ -> false in - let toc = Bytesections.read_toc ic in - let sections = Bytesections.all toc in - if start = "#!" then - let runtime = - seek_in ic 2; - let shebang = String.trim (input_line ic) in - if Filename.basename shebang = "sh" then - let exec_line = input_line ic in - if String.starts_with ~prefix:"exec '" exec_line - && String.ends_with ~suffix:"' \"$0\" \"$@\"" exec_line then - (* When the path to the runtime can't be directly used in a - shebang, the shell is used instead, the next line is then: - exec '' "$0" "$@" *) - dequote (String.sub exec_line 6 (String.length exec_line - 17)) - else - Harness.fail_because "%s contains an unexpected exec line: %S" - file exec_line - else - shebang - in - Tendered {header = Header_shebang; - dlls = List.exists is_DLLS sections; - runtime} - else if List.exists is_RNTM sections then - let rntm = - Bytesections.read_section_string toc ic Bytesections.Name.RNTM in - let len = String.length rntm in - if len = 0 || rntm.[len - 1] <> '\000' then - Harness.fail_because "%s contains corrupt RNTM: %S" file rntm; - let runtime = String.sub rntm 0 (len - 1) in - Tendered {header = Header_exe; - dlls = List.exists is_DLLS sections; - runtime} - else - Custom) + let tendered (runtime, id, search) = + let header = if start = "#!" then Header_shebang else Header_exe in + let dlls = List.exists is_DLLS sections in + Tendered {header; dlls; runtime; id; search} + in + Option.fold ~none:Custom ~some:tendered (Byterntm.read_runtime toc ic)) with End_of_file | Bytesections.Bad_magic_number -> Vanilla @@ -224,7 +180,7 @@ let make pp_path ~verbose ~test_root ~test_root_logical let value = String.sub binding (equals + 1) (String.length binding - equals - 1) in - if is_path_env name then + if phase <> Execution && is_path_env name then if Sys.win32 then if String.index_opt bindir ';' <> None then Printf.sprintf "%s=\"%s\";%s" name bindir value @@ -270,7 +226,7 @@ let string_of_process_status = function highlighted. If argv0 is specified, then the original program executable is also shown. *) let display_execution level status pid ~runtime program argv0 args - ({pp_path; verbose; serial; _} as env) = + ({pp_path; verbose; serial; phase; _} as env) = let pp_program style program f = function | Some argv0 -> Format.fprintf f "@{<%s>%s (from %a)@}" @@ -313,9 +269,11 @@ let display_execution level status pid ~runtime program argv0 args if serial <> !last_environment then begin last_environment := serial; Format.printf "\ - @{> @}@{Environment@}\n\ - @{> @} @{PATH=%a:$PATH@}\n" - pp_path (bindir env); + @{> @}@{Environment@}\n"; + if phase <> Execution then + Format.printf "\ + @{> @} @{PATH=%a:$PATH@}\n" + pp_path (bindir env); if not Sys.win32 then Format.printf "\ @{> @} @{%s=%a:$%s@}\n" @@ -472,9 +430,9 @@ let run_process ?(runtime = false) ?(stubs = false) ?(stdlib = false) (* The tests are easier to write with the assumption that shims are simply ignored in the Original phase (otherwise they all begin [Env.is_renamed env && (* ... *)] *) - let runtime = runtime && phase = Renamed in + let runtime = runtime && phase <> Original in let env = - if phase = Renamed && (stubs || stdlib) then + if phase <> Original && (stubs || stdlib) then apply_shims ~stubs ~stdlib env else env @@ -493,7 +451,7 @@ let run_process ?(runtime = false) ?(stubs = false) ?(stdlib = false) fails without each shim in turn. The final entry in the strategy must be the request itself. *) let test_without cond shim strategy = - if phase = Renamed && cond then + if phase <> Original && cond then shim env :: strategy else strategy diff --git a/testsuite/tools/environment.mli b/testsuite/tools/environment.mli index 192718ee3594..217b98f6d56d 100644 --- a/testsuite/tools/environment.mli +++ b/testsuite/tools/environment.mli @@ -37,7 +37,7 @@ val make : (Format.formatter -> string -> unit) -> verbose:bool with [LD_LIBRARY_PATH] / [DYLD_LIBRARY_PATH] set or updated). *) val is_renamed : t -> bool -(** [is_renamed t] if [~phase = Renamed] *) +(** [is_renamed t] if [~phase <> Original] *) val test_root : t -> string (** Retrieves the [~test_root] passed to {!make}. *) diff --git a/testsuite/tools/harness.ml b/testsuite/tools/harness.ml index e54ce32b7e45..b9f68fede3e9 100644 --- a/testsuite/tools/harness.ml +++ b/testsuite/tools/harness.ml @@ -16,11 +16,15 @@ module Import = struct type launch_mode = Header_exe | Header_shebang type executable = - | Tendered of {header: launch_mode; dlls: bool; runtime: string} + | Tendered of {header: launch_mode; + dlls: bool; + runtime: string; + id: Misc.RuntimeID.t option; + search: Byterntm.search_method} | Custom | Vanilla - type phase = Original | Renamed + type phase = Original | Execution | Renamed type mode = Bytecode | Native @@ -28,10 +32,11 @@ module Import = struct has_ocamlnat: bool; has_ocamlopt: bool; has_relative_libdir: string option; - has_runtime_search: bool option; + has_runtime_search: Config.search_method; launcher_searches_for_ocamlrun: bool; target_launcher_searches_for_ocamlrun: bool; bytecode_shebangs_by_default: bool; + filename_mangling: bool; libraries: string list list } end @@ -78,7 +83,7 @@ let files_for ?(source_and_cmi = true) mode name files = |> add_if source_and_cmi (name ^ ".ml") let fail_because fmt = - Format.ksprintf (fun s -> prerr_endline s; exit 1) fmt + Format.ksprintf (fun s -> flush stdout; prerr_endline s; exit 1) fmt (* ocamlc cannot be directly executed after renaming the prefix if native compilation is disabled (because ocamlc will be ocamlc.byte, since ocamlc.opt diff --git a/testsuite/tools/harness.mli b/testsuite/tools/harness.mli index 06430e1e8d68..6a3f39a3bb5e 100644 --- a/testsuite/tools/harness.mli +++ b/testsuite/tools/harness.mli @@ -23,7 +23,11 @@ module Import : sig (** Kinds of executable *) type executable = - | Tendered of {header: launch_mode; dlls: bool; runtime: string} + | Tendered of {header: launch_mode; + dlls: bool; + runtime: string; + id: Misc.RuntimeID.t option; + search: Byterntm.search_method} (** Tendered bytecode image. Executable uses the given mechanism to locate a suitable runtime to execute the image. [dlls] is [true] if the bytecode image requires additional C libraries to be loaded. [runtime] @@ -36,8 +40,11 @@ module Import : sig (** Test harness phases. *) type phase = - | Original (* Compiler installed in its original configured prefix. *) - | Renamed (* Compiler moved to a different prefix from its configuration. *) + | Original (* Compiler installed in its original configured prefix. *) + | Execution (* Executing programs built by the compiler installed in its + original prefix after the compiler has been moved to a + different prefix. *) + | Renamed (* Compiler moved to a different prefix from its configuration. *) (* Tooling modes. *) type mode = @@ -53,21 +60,24 @@ module Import : sig has_ocamlopt: bool; (** {v [$(NATIVE_COMPILER)] v} - {v Makefile.config v} *) has_relative_libdir: string option; - (** Not implemented; always None. *) - has_runtime_search: bool option; - (** Not implemented; always None. *) + (** {v $(TARGET_LIBDIR_IS_RELATIVE) v} and {v $(TARGET_LIBDIR) v} - + {v Makefile.build_config v} *) + has_runtime_search: Config.search_method; + (** {v $(RUNTIME_SEARCH) v} - {v Makefile.build_config v} *) launcher_searches_for_ocamlrun: bool; (** Indicates whether bytecode executables in the compiler distribution - use a launcher that is capable of searching PATH to find ocamlrun. At - present, only native Windows has this behaviour. *) + a launcher that is capable of searching PATH to find ocamlrun. This + used to be the behaviour for native Windows. *) target_launcher_searches_for_ocamlrun: bool; (** Indicates whether the executable launcher used by ocamlc is capable of - searching PATH to find ocamlrun. At present, only native Windows has - this behaviour. *) + searching PATH to find ocamlrun. This used to be the behaviour for + native Windows. *) bytecode_shebangs_by_default: bool; (** True if ocamlc uses a shebang-style header rather than an executable header for tendered bytecode executables. *) - libraries: string list list + filename_mangling: bool; + (** True if the Runtime ID is being used for filename mangling. *) + libraries: string list list; (** Sorted list of basenames of libraries to test. Derived from {v [$(OTHERLIBRARIES)] v} - {v Makefile.config v} *) } diff --git a/testsuite/tools/testBytecodeBinaries.ml b/testsuite/tools/testBytecodeBinaries.ml index 7a7da5cecaa8..ba068dc4d200 100644 --- a/testsuite/tools/testBytecodeBinaries.ml +++ b/testsuite/tools/testBytecodeBinaries.ml @@ -31,118 +31,154 @@ let run config env = let bindir = Environment.bindir env in Format.printf "\nTesting bytecode binaries in %a\n" (Environment.pp_path env) bindir; - let ocamlrun = Environment.ocamlrun env in - let exec_magic = - Environment.run_process env ocamlrun ["-M"] - in - let test_binary binary = + let test_binary failed binary = if String.starts_with ~prefix:"ocaml" binary - || String.starts_with ~prefix:"flexlink" binary then - let program = Filename.concat bindir binary in - if is_executable program then - let classification = Environment.classify_executable program in - if classification <> Vanilla then - let fails = - (* After the prefix has been renamed, bytecode executables compiled - with -custom will still work. Otherwise, only executables where the - header can search for ocamlrun and which do not require any C stubs - to be loaded will still work. *) - Environment.is_renamed env - && match classification with - | Tendered {dlls; _} -> - not config.launcher_searches_for_ocamlrun || dlls - | _ -> - false - in - match Environment.run_process ~fails env program ["-vnum"] with - | (0, ((output::rest) as all_output)) when not fails -> - if rest <> [] then begin - Environment.display_output all_output; - Harness.fail_because "%s: expected only one line of output" - program - end; - let runtime = - let compiled_by_boot_ocamlc = - let name = - if Filename.extension binary = ".exe" then - Filename.remove_extension binary - else - binary - in - name <> "ocamldoc" && name <> "ocamldebug" - in - match classification with - | Vanilla -> assert false - | Custom -> - if Config.supports_shared_libraries - || compiled_by_boot_ocamlc then - Harness.fail_because "%s: unexpected -custom runtime" - program - else - "compiled with -custom" - | Tendered {runtime; header; _} -> - let is_expected_runtime = - if Sys.win32 then - runtime = "ocamlrun" - else - runtime = ocamlrun - in - let expected_launch_mode = - if Config.shebangscripts then - Header_shebang + || String.starts_with ~prefix:"flexlink" binary then + let program = Filename.concat bindir binary in + if is_executable program then + let classification = Environment.classify_executable program in + if classification <> Vanilla then + let fails = + (* After the prefix has been renamed, bytecode executables compiled + with -custom will still work. Otherwise, the header needs to be + able to search for ocamlrun and, if applicable, ocamlrun needs to + be able to load C stubs (which will only happen if the runtime + locates the Standard Library using a relative directory, so that + it can find ld.conf) *) + Environment.is_renamed env + && match classification with + | Tendered {dlls; _} -> + not config.launcher_searches_for_ocamlrun + || dlls && config.has_relative_libdir = None + | _ -> + false + in + match Environment.run_process ~fails env program ["-vnum"] with + | (0, ((output::rest) as all_output)) when not fails -> + if rest <> [] then begin + Environment.display_output all_output; + Harness.fail_because "%s: expected only one line of output" + program + end; + let failed, runtime = + let compiled_by_boot_ocamlc = + let name = + if Filename.extension binary = ".exe" then + Filename.remove_extension binary else - Header_exe + binary in - if is_expected_runtime then - if header = expected_launch_mode then - runtime + name <> "ocamldoc" && name <> "ocamldebug" + in + match classification with + | Vanilla -> assert false + | Custom -> + if Config.supports_shared_libraries + || compiled_by_boot_ocamlc then + Harness.fail_because "%s: unexpected -custom runtime" + program else - Harness.fail_because "%s: unexpected launch mode" program - else - Harness.fail_because "%s: unexpected runtime %S" - program runtime - in - Printf.printf " Runtime: %s\n Output: %s\n" runtime output; - if Sys.win32 && Filename.extension binary = ".exe" then - (* This additional part of the test ensures that the executable - launcher on Windows can correctly hand-over to ocamlrun on - Windows. The check is that a binary named ocamlc.byte.exe - can be invoked as ocamlc.byte. -M is used as a previous bug - caused ocamlc.byte to act solely as ocamlrun, the test being - that ocamlrun -M returning the runtime's magic number would - be likely distinct from the behaviour of any of the - distribution's tools when called with -M. *) - let without_exe = Filename.remove_extension binary in - let (this_exit_code, _) as this = - let fails = not (String.contains without_exe '.') in - Environment.run_process - ~fails env program ~argv0:without_exe ["-M"] + failed, "compiled with -custom" + | Tendered {runtime; id; header; search; _} -> + let reported_runtime, search = + let id = + Option.map + (fun t -> "-" ^ Misc.RuntimeID.to_string t) id + |> Option.value ~default:"" + in + match search with + | Absolute dir -> + dir ^ runtime ^ id, Config.Absolute + | Absolute_then_search dir -> + Printf.sprintf "[%s]%s%s" dir runtime id, + Config.Absolute_then_search + | Search -> + runtime ^ id, Config.Search + in + let expected_id = + if config.filename_mangling then + match config.has_runtime_search with + | Config.Absolute | Config.Absolute_then_search -> + Some (Misc.RuntimeID.make_zinc ()) + | Config.Search -> + Some (Misc.RuntimeID.make_zinc ()) + else + None + in + let expected_launch_mode = + if Config.shebangscripts then + Header_shebang + else + Header_exe + in + let pp_runtime_id f = function + | None -> + Format.pp_print_string f "" + | Some id -> + Format.pp_print_string f (Misc.RuntimeID.to_string id) + in + let pp_search f = function + | Config.Absolute -> + Format.pp_print_string f "absolute" + | Config.Absolute_then_search -> + Format.pp_print_string f "fallback" + | Config.Search -> + Format.pp_print_string f "search" + in + let pp_launch f = function + | Header_shebang -> Format.pp_print_string f "shebang" + | Header_exe -> Format.pp_print_string f "executable" + in + let check expected actual description print failed = + if expected = actual then + failed + else + Format.kfprintf (Fun.const true) Format.err_formatter + " *** Unexpected %s (Expected: %a; got %a)\n%!" + description print expected print actual + in + let failed = + failed + |> check config.has_runtime_search search + "search mechanism" pp_search + |> check expected_id id + "runtime ID" pp_runtime_id + |> check "ocamlrun" runtime + "runtime" Format.pp_print_string + |> check expected_launch_mode header + "launch mode" pp_launch + in + failed, reported_runtime in - if this_exit_code = 0 then - if this = exec_magic then - let (that_exit_code, _) as that = - Environment.run_process - ~fails:true env program ~argv0:binary ["-M"] - in - if this = that then - Harness.fail_because - "Neither %s nor %s seem to load the bytecode image" - without_exe binary - else if that_exit_code = 0 then - Harness.fail_because - "%s is not expected to return with exit code 0" - binary - else if not (String.contains without_exe '.') then - Harness.fail_because - "%s is not expected to return the exec magic number!" - without_exe - else () (* Expected outcome was the exec magic number *) - else () (* Expected outcome is a zero exit code *) - else () (* Expected outcome is a non-zero exit code *) - | _ -> - if not fails then - Harness.fail_because "%s: not expected to have failed" program + Printf.printf " Runtime: %s\n Output: %s\n" runtime output; + if Sys.win32 && Filename.extension binary = ".exe" then begin + (* This additional part of the test ensures that the executable + launcher on Windows can correctly hand-over to ocamlrun on + Windows. The check is that a binary named ocamlc.byte.exe + can be invoked as ocamlc.byte. -M is used as a previous bug + caused ocamlc.byte to act solely as ocamlrun, the test being + that ocamlrun -M returning the runtime's magic number would + be likely distinct from the behaviour of any of the + distribution's tools when called with -M. *) + let without_exe = Filename.remove_extension binary in + let _exit_code, _output = + Environment.run_process + ~fails:true env program ~argv0:without_exe ["-M"] in () + end; + failed + | _ -> + if not fails then + Harness.fail_because "%s: not expected to have failed" program + else + failed + else + failed + else + failed + else + failed in let binaries = Sys.readdir bindir in Array.sort String.compare binaries; - Array.iter test_binary binaries + if Array.fold_left test_binary false binaries then + Harness.fail_because "Binaries didn't all match expectation" diff --git a/testsuite/tools/testDynlink.ml b/testsuite/tools/testDynlink.ml index d34563ff88e8..74076270a3f5 100644 --- a/testsuite/tools/testDynlink.ml +++ b/testsuite/tools/testDynlink.ml @@ -48,14 +48,25 @@ let () = let compile ?(custom = false) () = if Sys.file_exists test_program then Harness.erase_file test_program; - let args = if custom then "-custom" :: args else args in + let args = + if custom then + "-custom" :: args + else + (* Hardening to ensure that Bytecode Dynlink is using the runtime's + search path, not compiler's (i.e. unix.cma should be located using + Config.standard_library_default but dllunixbyt.so should be located + using caml_runtime_standard_library_default) *) + "-set-runtime-default" :: "standard_library_default=/does-not-exist" + :: args + in (* In the Renamed phase for a bytecode-only build, ocamlc will be ocamlc.byte and will need to be called via ocamlrun *) let runtime = mode = Bytecode && Harness.ocamlc_fails_after_rename config in (* In the Renamed phase, Config.standard_library will still point to the - Original location *) - let stdlib = true in + Original location, unless the compiler has been configured with a + relative libdir *) + let stdlib = (config.has_relative_libdir = None) in let (_, output) = Environment.run_process ~runtime ~stdlib env compiler args in Environment.display_output output @@ -78,12 +89,15 @@ let () = mode = Bytecode && expected_exit_code = None && not config.target_launcher_searches_for_ocamlrun + && config.has_relative_libdir = None in (* If the library needs C stubs to be loaded dynamically, then the runtime will need CAML_LD_LIBRARY_PATH set in the Renamed phase. *) let stubs = has_c_stubs && expected_exit_code = None + && Config.supports_shared_libraries + && config.has_relative_libdir = None in let expected_exit_code = match expected_exit_code with @@ -93,10 +107,7 @@ let () = | None -> (* Systems configured with --disable-shared can't load bytecode libraries which need C stubs *) - if (Sys.cygwin && mode = Native && List.mem "unix" libraries) - || (not Config.supports_shared_libraries && has_c_stubs) then - (* cf. ocaml/flexdll#146 - Cygwin's natdynlink can't load - unix.cmxs *) + if not Config.supports_shared_libraries && has_c_stubs then 2 else 0 @@ -110,27 +121,19 @@ let () = Harness.fail_because "%s is expected to return with exit code %d" test_program expected_exit_code; in - let test_libraries_in_prog ?expected_exit_code env libraries = - if mode = Native && List.mem "threads" libraries then - (* cf. ocaml/ocaml#12250 - no threads.cmxs *) - let threads_plugin = - Environment.in_libdir env (Filename.concat "threads" "threads.cmxs") - in - if Sys.file_exists threads_plugin then - Harness.fail_because "threads.cmxs is not expected to exist" - else - () - else - test_libraries_in_prog ?expected_exit_code env libraries - in let not_dynlink l = not (List.mem "dynlink" l) in let files, re_compile = compile_test_program () in let expected_exit_code = - (* Bytecode executables launched using the executable header require - caml_executable_name to know where the runtime is. As the Standard - Library is only stored as an absolute path, this doesn't affect the - execution of the test driver (yet). *) - None in + (* Relocatable OCaml bytecode executables launched using the executable + header require caml_executable_name, or they end up being accidentally + relative, since the exec call leaves argv[0] as being the bytecode image + itself. *) + if mode = Bytecode && config.has_relative_libdir <> None + && Harness.no_caml_executable_name + && Environment.launched_via_stub test_program then + Some 2 + else + None in let libraries = List.filter not_dynlink config.libraries in let () = List.iter (test_libraries_in_prog ?expected_exit_code env) libraries; diff --git a/testsuite/tools/testLinkModes.ml b/testsuite/tools/testLinkModes.ml index c0eaff9255eb..e3ef486c6a42 100644 --- a/testsuite/tools/testLinkModes.ml +++ b/testsuite/tools/testLinkModes.ml @@ -125,7 +125,7 @@ let () = around some problems with shared runtimes on s390x and riscv which don't reliably fail. *) -let run_program env _config = +let run_program env config = let prefix = Environment.prefix env in let libdir_suffix = Environment.libdir_suffix env in let prefix, libdir_suffix = @@ -142,7 +142,7 @@ let run_program env _config = if Environment.is_renamed env then stdlib_exists_when_renamed else - true in + config.has_relative_libdir <> None in let args = [string_of_bool stdlib_exists; prefix; libdir_suffix] in let argv0 = if argv0 = test_program then @@ -259,7 +259,7 @@ type outcome = - Sys.argv.(0) doesn't equal Sys.argv.(3) - Config.standard_library exists when it shouldn't (or vice versa) *) let test_runs usr_bin_sh test_program_path test_program - _config env ~via_ocamlrun = + config env ~via_ocamlrun = let tests = let test_program_relative = Filename.concat Filename.current_dir_name test_program @@ -300,25 +300,27 @@ let test_runs usr_bin_sh test_program_path test_program Success {executable_name = test_program_path; argv0 = test_program_path} | Tendered {header = Header_exe; _} -> - if argv0_not_ocaml then - if Sys.win32 then - (* stdlib/header.c will find ocamlrun (because it effectively - uses caml_executable_name) but fails to hand off the bytecode - image, which causes ocamlrun to exit with code 127 *) - Fail 127 - else - (* stdlib/header.c will fail to find ocamlrun, because it never - uses caml_executable_name and so will either fail to find the - executable or will identify that it is not a bytecode - executable. Somewhat confusingly, it exits with code 2 *) - Fail 2 - else if Sys.win32 then - (* stdlib/header.c correctly preserves argv[0] for Windows *) - Success {executable_name = test_program_path; argv0} + if argv0_not_ocaml + && Harness.no_caml_executable_name then + (* stdlib/header.c will fail to find ocamlrun because + caml_executable_name isn't implemented so will either fail to + find the executable or will identify that it is not a bytecode + executable. Somewhat confusingly, it exits with code 2 *) + Fail 2 + else if Harness.no_caml_executable_name + && config.has_relative_libdir <> None then + (* Without caml_executable_name, ocamlrun will be forced to + interpret the relative standard library relative to argv[0], + which will fail. *) + Fail 134 else - (* stdlib/header.c does not preserve argv[0] for Unix *) - Success {executable_name = argv0_resolved; - argv0 = argv0_resolved} + let executable_name = + if Harness.no_caml_executable_name then + argv0_resolved + else + test_program_path + in + Success {executable_name; argv0} | Custom -> if Harness.no_caml_executable_name then if argv0_not_ocaml then @@ -329,12 +331,8 @@ let test_runs usr_bin_sh test_program_path test_program else Success {executable_name = argv0_resolved; argv0} else - if Sys.win32 || argv0_not_ocaml then - (* SearchPath will resolve the relative/implicit arguments to - absolute paths *) - Success {executable_name = test_program_path; argv0} - else - Success {executable_name = argv0_resolved; argv0} + (* -custom executables use caml_executable_name *) + Success {executable_name = test_program_path; argv0} | Vanilla -> if Harness.no_caml_executable_name then Success {executable_name = argv0_resolved; argv0} @@ -355,23 +353,53 @@ let test_runs usr_bin_sh test_program_path test_program run in the Renamed phase for other reasons. *) let make_test_runner ~stdlib_exists_when_renamed ~may_segfault ~with_unix ~tendered ~target_launcher_searches_for_ocamlrun usr_bin_sh - test_program_path test_program config _env = - (* Bytecode executables with absolute headers will need to be - invoked via ocamlrun after the prefix has been renamed. *) + test_program_path test_program config env = + (* Bytecode executables with absolute headers will need to be invoked via + ocamlrun after the prefix has been renamed. *) let via_ocamlrun = tendered && not target_launcher_searches_for_ocamlrun + && (config.has_relative_libdir = None || not (Environment.is_renamed env)) in - let rec run env = + let rec run ~re_executing env = let runs = test_runs usr_bin_sh test_program_path test_program config env ~via_ocamlrun in let execute ({argv0; prefix_path_with_cwd}, outcome) = let expected_executable_name, expected_exit_code, expected_argv0 = match outcome with - | Fail code -> "", code, "" - | Success {executable_name; argv0} -> executable_name, 0, argv0 + | Fail code -> + "", code, "" + | Success {executable_name; argv0} -> + (* Systems which don't have caml_executable_name get particularly + fiddly here, because they can fail for multiple reasons in this + test! Any tendered executable which was expected to succeed is + set to fail here, since the shim for CAML_LD_LIBRARY_PATH will + not be applied. *) + if tendered && with_unix && Harness.no_caml_executable_name + (* Passing the executable directly to ocamlrun will fail if + ocamlrun isn't configured with a relative libdir *) + && (not via_ocamlrun || config.has_relative_libdir = None) + && (re_executing || Environment.is_renamed env + && config.has_relative_libdir = None) then + "", 134, "" + else + executable_name, 0, argv0 + in + let stubs = + tendered && with_unix + (* The programs compiled before the prefix is renamed are intentionally + run without the runtime in PATH in order to test the bytecode + launcher's searching in the image directory before PATH. A side + effect of this is that ld.conf then can't be found, because the + runtime copied to the testsuite directory doesn't have ld.conf in the + correct place. The shim is skipped for systems which don't have + caml_executable_name because otherwise we'd have a test which fails + in the Original phase and succeeds in the Execution phase, which is a + special case too far! *) + && (not Harness.no_caml_executable_name + && (config.has_relative_libdir = None + || not via_ocamlrun && re_executing)) in - let stubs = tendered && with_unix in run_program env config ~runtime:via_ocamlrun ~stubs test_program_path ~prefix_path_with_cwd expected_executable_name @@ -383,14 +411,14 @@ let make_test_runner ~stdlib_exists_when_renamed ~may_segfault ~with_unix if Environment.is_renamed env then (Harness.erase_file test_program_path; `None) else - `Some run + `Some (run ~re_executing:true) in - `Some run + `Some (run ~re_executing:false) (* Describe the various ways in which executables can be produced by our two compilers... *) type linkage = -| Default_ocamlc of launch_mode +| Default_ocamlc of launch_mode * Config.search_method | Default_ocamlopt | Custom_runtime of runtime_mode | Output_obj of compiler * runtime_mode @@ -459,8 +487,34 @@ let compile_test usr_bin_sh config env test test_program description = 0 in match test with - | Default_ocamlc _launch_method -> - f ~tendered:true [] + | Default_ocamlc(launch_method, search_method) -> + let args = + match launch_method with + | Header_exe when config.bytecode_shebangs_by_default -> + ["-launch-method"; "exe"] + | Header_shebang when not config.bytecode_shebangs_by_default -> + ["-launch-method"; "sh"] + | _ -> + [] in + let target_launcher_searches_for_ocamlrun = + if search_method = Config.search_method then + None + else + Some (search_method <> Config.Absolute) + in + let param = + match search_method with + | Absolute -> "disable" + | Absolute_then_search -> "enable" + | Search -> "always" + in + let args = + if search_method = Config.search_method then + args + else + "-runtime-search" :: param :: args + in + f ?target_launcher_searches_for_ocamlrun ~tendered:true args | Default_ocamlopt -> f ~mode:Native [] | Custom_runtime Static -> @@ -494,22 +548,13 @@ let compile_test usr_bin_sh config env test test_program description = ~clibs:["-lcomprmarsh"; "-lunixnat"; Config.compression_c_libraries] ~linker_exit_code ["-output-obj"] | Output_complete_obj(C_ocamlc, Static) -> - (* At the moment, the partial linker will pass -lws2_32 and -ladvapi32 - on to the partial linker on mingw-w64 which causes a failure. Until - this is fixed, pass the libraries manually, using -noautolink. *) - f ~clibs:[] - ["-output-complete-obj"; "-noautolink"; "-cclib"; "-lunixbyt"] + f ~clibs:[] ["-output-complete-obj"] | Output_complete_obj(C_ocamlc, Shared) -> - (* The partial linker doesn't correctly process - -runtime-variant _shared, as the .so gets passed to the partial - linker. On macOS, this causes a warning; on other systems, it's an - error. *) - let compilation_exit_code = fails_if (Config.system <> "macosx") in (* Shared compilation isn't available on native Windows and fails on Cygwin *) let linker_exit_code = fails_if (Sys.win32 || Sys.cygwin) in - f ~use_shared_runtime:true ~clibs:[] ~compilation_exit_code - ~linker_exit_code ["-output-complete-obj"] + f ~use_shared_runtime:true ~clibs:[] ~linker_exit_code + ["-output-complete-obj"] | Output_complete_obj(C_ocamlopt, Static) -> let linker_exit_code = (* cf. ocaml/ocaml#13692 - linking fails on ppc64 *) @@ -518,20 +563,18 @@ let compile_test usr_bin_sh config env test test_program description = else 0 in - (* At the moment, the partial linker will pass -lzstd to ld -r which - will (normally) fail). Until this is done, pass the libraries - manually, using -noautolink. *) f ~mode:Native ~clibs:[Config.compression_c_libraries] - ~linker_exit_code - ["-output-complete-obj"; "-noautolink"; "-cclib"; "-lunixnat"; - "-cclib"; "-lcomprmarsh"] + ~linker_exit_code ["-output-complete-obj"] | Output_complete_obj(C_ocamlopt, Shared) -> - (* ocamlopt doesn't correctly implement -runtime-variant _shared *) - let compilation_exit_code = fails_if true in - f ~mode:Native ~use_shared_runtime:true - ~compilation_exit_code ~clibs:[Config.compression_c_libraries] - ["-output-complete-obj"; "-noautolink"; "-cclib"; "-lunixnat"; - "-cclib"; "-lcomprmarsh"] + (* cf. ocaml/ocaml#13693 - on Fedora/RHEL, this executable + segfaults *) + let may_segfault = List.mem Config.architecture ["s390x"; "riscv"] in + (* Shared compilation isn't available on native Windows and fails on + Cygwin *) + let linker_exit_code = fails_if (Sys.win32 || Sys.cygwin) in + f ~mode:Native ~use_shared_runtime:true ~may_segfault + ~linker_exit_code ~clibs:[Config.compression_c_libraries] + ["-output-complete-obj"] | Output_complete_exe Static -> f ~calls_linker:true ["-output-complete-exe"] | Output_complete_exe Shared -> @@ -557,7 +600,7 @@ let compile_test usr_bin_sh config env test test_program description = test_program_path in let with_unix = (Config.supports_shared_libraries || not tendered) in - let is_randomized = false in + let is_randomized = Environment.is_renamed env in let verbose = Environment.verbose env in write_test_program ~verbose ~is_randomized ~with_unix description; let options = @@ -566,6 +609,21 @@ let compile_test usr_bin_sh config env test test_program description = else options in + let options = + if Environment.is_renamed env || config.has_relative_libdir <> None then + options + else + let new_libdir = + Filename.concat (Environment.prefix env ^ ".new") + (Environment.libdir_suffix env) in + let stdlib_default = "standard_library_default=" ^ new_libdir in + let options = "-set-runtime-default" :: stdlib_default :: options in + if tendered then + let libdir = Environment.libdir env in + "-dllpath" :: (Filename.concat libdir "stublibs") :: options + else + options + in let args = "-o" :: output :: "test_install_script.ml" :: options @@ -579,6 +637,12 @@ let compile_test usr_bin_sh config env test test_program description = let args = "-I" :: "+compiler-libs" :: Harness.lib mode "ocamlcommon" :: args in + let args = + if is_randomized then + "-set-runtime-default" :: "R" :: args + else + args + in let args = if verbose then "-verbose" :: args @@ -592,14 +656,11 @@ let compile_test usr_bin_sh config env test test_program description = need to be invoked via ocamlrun in the Renamed phase *) let runtime = mode = Bytecode && Harness.ocamlc_fails_after_rename config in - (* If shared libraries are being used, ocamlc will need to be able to - load the stub libraries to check the primitives table *) - let stubs = with_unix && tendered in (* In the Renamed phase, Config.standard_library will still point to - the Original location *) - let stdlib = true in - Environment.run_process - ~fails ~runtime ~stubs ~stdlib env compiler args + the Original location, unless the compiler has been configured + with a relative libdir *) + let stdlib = (config.has_relative_libdir = None) in + Environment.run_process ~fails ~runtime ~stdlib env compiler args in Environment.display_output output; exit_code @@ -626,9 +687,21 @@ let compile_test usr_bin_sh config env test test_program description = `None else let stdlib_exists_when_renamed = - (* Config.standard_library is an absolute path, and therefore will - always point to the Original location in the Renamed phase. *) - false + if config.has_relative_libdir = None then + (* In the Original phase, for a compiler with an absolute libdir, + -set-runtime-default is used to set standard_library_default to + the Renamed phase's location. When the tests are recompiled in + the Renamed phase, this is not done. The effect is that if any + test is being run in the Renamed phase, Config.standard_library + will be correct. *) + not (Environment.is_renamed env) + else + (* When the compiler has a relative libdir, -set-runtime-default + is implicitly being tested by the build process, and we wish to + test the opposite in the harness - thus the test programs + compiled in the Original phase will _not_ be able to find the + Standard Library in the Renamed phase. *) + Environment.is_renamed env in make_test_runner ~stdlib_exists_when_renamed ~may_segfault ~with_unix ~tendered ~target_launcher_searches_for_ocamlrun @@ -660,15 +733,13 @@ let run ~sh config env = Format.printf "ocamlc -where: %a\nocamlopt -where: %a\n%!" pp_path ocamlc_where pp_path ocamlopt_where; let compile_test = compile_test sh config env in - let launch_method = - if config.bytecode_shebangs_by_default then - Header_shebang - else - Header_exe - in let tests = [ - compile_test (Default_ocamlc launch_method) - "byt_default" "with tender"; + compile_test (Default_ocamlc(Header_exe, Absolute)) + "byt_default_exe_disable" "with absolute tender"; + compile_test (Default_ocamlc(Header_exe, Absolute_then_search)) + "byt_default_exe_enable" "with fallback tender"; + compile_test (Default_ocamlc(Header_exe, Search)) + "byt_default_exe_always" "with relocatable tender"; compile_test (Custom_runtime Static) "custom_static" "-custom static runtime"; compile_test (Custom_runtime Shared) @@ -696,5 +767,16 @@ let run ~sh config env = compile_test (Output_complete_obj(C_ocamlopt, Shared)) "nat_complete_obj_shared" "-output-complete-obj shared runtime"; ] in + let tests = + if Config.shebangscripts then + (compile_test (Default_ocamlc(Header_shebang, Absolute)) + "byt_default_sh_disable" "with absolute #!") :: + (compile_test (Default_ocamlc(Header_shebang, Absolute_then_search)) + "byt_default_sh_enable" "with fallback #!") :: + (compile_test (Default_ocamlc(Header_shebang, Search)) + "byt_default_sh_always" "with relocatable #!") :: + tests + else + tests in Printf.printf "Running programs\n%!"; List.map (function `Some f -> f env | `None -> `None) tests diff --git a/testsuite/tools/testRelocation.ml b/testsuite/tools/testRelocation.ml index 1fb7b2baf3d8..57cfe676c7a6 100644 --- a/testsuite/tools/testRelocation.ml +++ b/testsuite/tools/testRelocation.ml @@ -24,12 +24,17 @@ end) (* Augment toolchain properties with information from the configuration (this essentially goes from "is foo capable of doing bar" to "foo does bar in this context". *) -let effective_toolchain _config = +let effective_toolchain config = let c_compiler_debug_paths_are_absolute = Toolchain.c_compiler_debug_paths_can_be_absolute + && (not Config.c_has_debug_prefix_map || config.has_relative_libdir = None) in let assembler_embeds_build_path = Toolchain.assembler_embeds_build_path + && (not Config.as_has_debug_prefix_map + || Config.architecture = "riscv" + || Config.as_is_cc + || config.has_relative_libdir = None) in ~c_compiler_debug_paths_are_absolute, ~assembler_embeds_build_path @@ -59,18 +64,20 @@ let bindir_rules config file = (* Determine if the installation prefix should be found in this file *) let prefix = let code_embeds_stdlib_location = - (* The runtime binaries all contain OCAML_STDLIB_DIR and everything - except flexlink and ocamllex link with the Config module, either - directly or via ocamlcommon *) - not (List.mem basename ["flexlink.byte"; "flexlink.opt"; "flexlink"; - "ocamllex.byte"; "ocamllex.opt"; "ocamllex"; - "ocamlyacc"]) + (* If the compiler is configured with an absolute libdir, the runtime + binaries all contain OCAML_STDLIB_DIR and everything except flexlink + and ocamllex link with the Config module, either directly or via + ocamlcommon *) + config.has_relative_libdir = None + && not (List.mem basename ["flexlink.byte"; "flexlink.opt"; "flexlink"; + "ocamllex.byte"; "ocamllex.opt"; "ocamllex"; + "ocamlyacc"]) in let linker_embeds_stdlib_location = (* If the launcher doesn't search for ocamlrun, then either the #! stub will include the absolute path or the RNTM section will *) match classification with - | Tendered _ when not config.launcher_searches_for_ocamlrun -> true + | Tendered _ when config.has_runtime_search <> Config.Search -> true | _ -> false in if code_embeds_stdlib_location || linker_embeds_stdlib_location then @@ -104,7 +111,7 @@ let bindir_rules config file = else (* Bytecode runtimes and ocamlyacc of which only ocamlrund is linked with -g *) - `Other, (basename = "ocamlrund") + `Other, (List.mem "ocamlrund" (String.split_on_char '-' basename)) in (* Combine this with the properties of the platform to determine whether the executable will contain the build path. *) @@ -124,7 +131,7 @@ let bindir_rules config file = stripped. However, since the C objects in libcamlrun are compiled with -g, this will still result in debug information for -custom runtime executables. *) - linked_with_debug + linked_with_debug && config.has_relative_libdir = None || (classification = Custom && Toolchain.linker_propagates_debug_information && c_compiler_debug_paths_are_absolute) @@ -160,18 +167,19 @@ let libdir_rules config file = ~ocaml_debug:has_ocaml_debug_info, ~c_debug:has_c_debug_info, ~s:contains_assembled_objects) = - if List.mem basename ["Makefile.config"; - "ld.conf"; - "runtime-launch-info"] then - (* These files all embed the Standard Library location *) + if basename = "Makefile.config" then + (* Embeds the Standard Library location *) (~stdlib:true, ~ocaml_debug:false, ~c_debug:false, ~s:false) else if basename = "config.cmx" then (* config.cmx contains Config.standard_library for inlining *) - (~stdlib:true, ~ocaml_debug:false, ~c_debug:false, ~s:false) + let stdlib = + config.has_relative_libdir = None && not Config.flambda in + (~stdlib, ~ocaml_debug:false, ~c_debug:false, ~s:false) else if List.mem ext [".cma"; ".cmo"; ".cmt"; ".cmti"] then let stdlib = (* via Config.standard_library *) - List.mem basename ["config.cmt"; "config_main.cmt"; - "ocamlcommon.cma"] in + config.has_relative_libdir = None + && (basename = "config.cmt" || basename = "ocamlcommon.cma") in + (* The compiler's artefacts are all compiled with -g *) (~stdlib, ~ocaml_debug:true, ~c_debug:false, ~s:false) else if ext = ".cmxs" then (* All the .cmxs files built by the distribution at present include C @@ -191,12 +199,6 @@ let libdir_rules config file = (libcamlrun.a, libcamlrund.a, libcamlrun_shared.so, etc. Note that these properties are _not_ used for libasmrun* (see below) *) - let is_camlrun = - let dir = Filename.basename (Filename.dirname file) in - dir <> "stublibs" - && String.starts_with ~prefix:"libcamlrun" basename - && not (String.starts_with ~prefix:"libcamlruntime" basename) - in if ext = Config.ext_lib then (* Any archive produced by ocamlopt will have a .cmxa file with it *) let is_ocaml = @@ -204,14 +206,13 @@ let libdir_rules config file = (* Config.standard_library is in ocamlcommon and the bytecode runtime embeds the Standard Library location *) let stdlib = - is_camlrun - || Filename.remove_extension basename = "ocamlcommon" - in + config.has_relative_libdir = None + && Filename.remove_extension basename = "ocamlcommon" in (~stdlib, ~ocaml_debug:false, ~c_debug:(not is_ocaml), ~s:is_ocaml) else (* DLLs are either the shared versions of the runtime libraries or C stubs. All of these are compiled with -g *) - (~stdlib:is_camlrun, ~ocaml_debug:false, ~c_debug:true, ~s:false) + (~stdlib:false, ~ocaml_debug:false, ~c_debug:true, ~s:false) else (~stdlib:false, ~ocaml_debug:false, ~c_debug:false, ~s:false) in @@ -229,7 +230,7 @@ let libdir_rules config file = || Toolchain.linker_embeds_build_path) then Toolchain.linker_embeds_build_path else - has_ocaml_debug_info + has_ocaml_debug_info && config.has_relative_libdir = None || has_c_debug_info && c_compiler_debug_paths_are_absolute || contains_assembled_objects && assembler_embeds_build_path || ext = Config.ext_obj @@ -241,6 +242,13 @@ let libdir_rules config file = else LocationSet.empty in + let prefix = + if config.has_relative_libdir <> None + && basename = "Makefile.config" then + LocationSet.add Relative prefix + else + prefix + in if contains_build_path then LocationSet.add Build prefix else @@ -483,17 +491,6 @@ let run ~reproducible config env = |> scan Environment.libdir "$libdir" libdir_rules in flush stderr; - (* Abort the harness if there are files which didn't match a ruleset *) - let () = - if results_are_reproducible && not consistent then - Harness.fail_because - "Internal error: bindir_rules and libdir_rules disagree with \ - reproducible_rules" - else if results_are_reproducible <> reproducible then - Harness.fail_because - "The build is %sexpected to be reproducible" - (if not reproducible then "not " else "") - in (* Summarise the results, using wildcards to bring them to a readable length *) let sections = @@ -612,7 +609,15 @@ let run ~reproducible config env = let pp_results = Format.(pp_print_list ~pp_sep pp_print_string) in Format.printf "@[ %a@]@." pp_results results in + (* Abort the harness if there are files which didn't match a ruleset *) if failed then - Harness.fail_because "Installed files don't match expectation" - else - List.iter display sections + Harness.fail_because "Installed files don't match expectation"; + List.iter display sections; + if results_are_reproducible && not consistent then + Harness.fail_because + "Internal error: bindir_rules and libdir_rules disagree with \ + reproducible_rules" + else if results_are_reproducible <> reproducible then + Harness.fail_because + "The build is %sexpected to be reproducible" + (if not reproducible then "not " else "") diff --git a/testsuite/tools/testToplevel.ml b/testsuite/tools/testToplevel.ml index 665371261129..6d2b9235aedc 100644 --- a/testsuite/tools/testToplevel.ml +++ b/testsuite/tools/testToplevel.ml @@ -34,27 +34,6 @@ let run config env mode = (* dynlink.cmxs does not exist, for obvious reasons, but we can check loading the library in ocamlnat "works". *) "cmxa" - else if library = "threads" then - let threads_plugin = - let plugin = Filename.concat "threads" "threads.cmxs" in - Environment.in_libdir env plugin - in - if Sys.file_exists threads_plugin then - Harness.fail_because - "threads.cmxs is not expected to exist" - else if Sys.win32 then - (* cf. note in ocaml/ocaml#13520 - threads.cmxa is - correctly compiled assuming winpthreads is statically - in the same image (so without defining - WINPTHREADS_USE_DLLIMPORT), but this is incorrect for - threads.cmxs, as threads.cmxs may load more than 2GiB - away from the main executable. For native Windows, it's - not possible to rely on ocamlnat's automatic - cmxa -> cmxs recompilation. *) - "cmxs" - else - (* cf. ocaml/ocaml#12250 - no threads.cmxs *) - "cmxa" else "cmxs" | Bytecode -> @@ -77,12 +56,7 @@ let run config env mode = let expected_exit_code = (* Systems configured with --disable-shared can't load bytecode libraries which need C stubs *) - if Sys.cygwin && mode = Native && List.mem "unix" libraries - || Sys.win32 && mode = Native && List.mem "threads" libraries - || has_c_stubs && not Config.supports_shared_libraries then - (* cf. ocaml/flexdll#146 - Cygwin's ocamlnat can't load unix.cmxs and - the lines above will have triggered native Windows being unable to - load threads.cmxs *) + if has_c_stubs && not Config.supports_shared_libraries then 125 else 0 @@ -97,8 +71,7 @@ let run config env mode = Environment.run_process ~fails:(expected_exit_code <> 0) ~runtime:(mode = Bytecode && not config.launcher_searches_for_ocamlrun) - ~stubs:(mode = Bytecode && has_c_stubs) - ~stdlib:true env toplevel args + ~stdlib:(config.has_relative_libdir = None) env toplevel args in Environment.display_output output; if exit_code <> expected_exit_code then diff --git a/testsuite/tools/test_in_prefix.ml b/testsuite/tools/test_in_prefix.ml index 291a0e01dabb..5c922028e30d 100644 --- a/testsuite/tools/test_in_prefix.ml +++ b/testsuite/tools/test_in_prefix.ml @@ -49,13 +49,13 @@ let print_summary config header_size ~prefix ~bindir_suffix ~libdir_suffix \ @{libdir@} = [$prefix/]%s\n\ \ - C compiler is %s [%s] for %s\n\ \ - OCaml is %a%a; target binaries by default are %a\n\ - \ - Executable header size is %.2fKiB (%d bytes)\n\ + \ - Executable header size is %.2fKiB (%Ld bytes)\n\ \ - Testing %s\n@?" prefix bindir_suffix libdir_suffix Config.c_compiler Config.c_compiler_vendor Config.target pp_relocatable relocatable pp_reproducible reproducible pp_relocatable target_relocatable - (float_of_int header_size /. 1024.0) header_size summary + (Int64.to_float header_size /. 1024.0) header_size summary let run_tests ~sh config env = TestDynlink.run config env Bytecode; @@ -68,6 +68,10 @@ let run_tests ~sh config env = TestBytecodeBinaries.run config env; TestLinkModes.run ~sh config env +let rename_exe_in_test_root env from_base to_base = + Sys.rename (Environment.in_test_root env (Harness.exe from_base)) + (Environment.in_test_root env (Harness.exe to_base)) + let () = let ~config, ~pwd, ~prefix, ~bindir:_, ~bindir_suffix, ~libdir, ~libdir_suffix, ~summarise_only, ~verbose = @@ -115,21 +119,23 @@ let () = in List.map add_dependencies libraries in - let runtime_launch_info = + let header_size, filename_mangling = let file = Filename.concat libdir "runtime-launch-info" in - Bytelink.read_runtime_launch_info file in - let header_size = - let {Bytelink.buffer; executable_offset; _} = runtime_launch_info in - String.length buffer - executable_offset in + In_channel.with_open_bin file @@ fun ic -> + In_channel.length ic, (input_char ic <> '\000') + in let bytecode_shebangs_by_default = - runtime_launch_info.launcher <> Bytelink.Executable in - let launcher_searches_for_ocamlrun = Sys.win32 in - let target_launcher_searches_for_ocamlrun = Sys.win32 in + Config.launch_method <> Config.Executable in + let launcher_searches_for_ocamlrun = + (config.has_runtime_search <> Config.Absolute) in + let target_launcher_searches_for_ocamlrun = + (Config.search_method <> Config.Absolute) in let config = {config with libraries; launcher_searches_for_ocamlrun; target_launcher_searches_for_ocamlrun; - bytecode_shebangs_by_default} + bytecode_shebangs_by_default; + filename_mangling} in (* A compiler distribution is _Relocatable_ if its build, for a given system, satisfies the following three properties: @@ -144,7 +150,9 @@ let () = For the compiler's files to be reproducible, the compiler needs to be both relocatable and also required support from the assembler and C compiler. *) - let relocatable = false in + let relocatable = + config.has_relative_libdir <> None + && config.has_runtime_search <> Config.Absolute in let reproducible = relocatable (* At present, the compiler build doesn't actually take advantage of this @@ -157,7 +165,7 @@ let () = && (not Toolchain.c_compiler_always_embeds_build_path || not Toolchain.c_compiler_debug_paths_can_be_absolute) in - let target_relocatable = false in + let target_relocatable = (Config.search_method <> Config.Absolute) in (* Use Harness.pp_path unless --verbose was specified *) let pp_path = if verbose then @@ -219,11 +227,26 @@ let () = pp_path prefix; Sys.rename new_prefix prefix); let env = - make_env ~phase:Renamed ~prefix:new_prefix ~bindir_suffix ~libdir_suffix in + make_env ~phase:Execution ~prefix:new_prefix ~bindir_suffix ~libdir_suffix + in (* 3. Re-run the test programs compiled with the normal prefix *) Printf.printf "Re-running test programs\n%!"; - List.iter - (function `Some f -> assert (f env = `None) | `None -> ()) programs; + (* Verify that the searching runtimes are searching the directory containing + the program itself first. *) + let runtime = + if config.filename_mangling then + Misc.RuntimeID.(ocamlrun "" (make_zinc ())) + else + "ocamlrun" + in + rename_exe_in_test_root env ("test-" ^ runtime) runtime; + Fun.protect + ~finally:(fun () -> rename_exe_in_test_root env runtime ("test-" ^ runtime)) + (fun () -> + List.iter + (function `Some f -> assert (f env = `None) | `None -> ()) programs); + let env = + make_env ~phase:Renamed ~prefix:new_prefix ~bindir_suffix ~libdir_suffix in (* 4. Finally re-run the main test battery in the new prefix *) Compmisc.init_path ~standard_library:libdir (); let programs = run_tests env in diff --git a/testsuite/tools/test_ld_conf.ml b/testsuite/tools/test_ld_conf.ml index a6d9eacebde2..9e9b50dcb994 100644 --- a/testsuite/tools/test_ld_conf.ml +++ b/testsuite/tools/test_ld_conf.ml @@ -39,12 +39,13 @@ type ld_conf_test = { and var_setting = Unset | Empty | Set of string list (* Set of tests to run in a given environment *) -let tests _config env = +let tests config env = (* Convenience function - [if_ld_conf_found outcome] returns the empty list in the Renamed phase. *) let if_ld_conf_found outcome = - (* ocamlrun can't find ld.conf after the prefix has been renamed *) - if Environment.is_renamed env then + (* ocamlrun can only find ld.conf after the prefix has been renamed if it's + configured with --with-relative-libdir *) + if Environment.is_renamed env && config.has_relative_libdir = None then [] else outcome @@ -63,22 +64,30 @@ let tests _config env = Environment.libdir env else Config.standard_library in + let libdir = + if config.has_relative_libdir = None then + libdir + else + try Unix.realpath libdir + with Invalid_argument _ -> libdir in let (/) = Filename.concat in let data = [ + (* Blank line - should be ignored on all platforms *) + "", "", None; (* Root directory (both forms) preserved *) "/", "/", None; "//", "//", None; (* Current and Parent directory names *) - ".", ".", None; - "..", "..", None; + ".", libdir, None; + "..", libdir / "..", None; (* Current and Parent directory names with OS-default trailing separator (i.e. ./ and ../ on Unix and .\ and ..\ on Windows) *) - "." / "", "." / "", None; - ".." / "", ".." / "", None; + "." / "", libdir / "", None; + ".." / "", libdir / ".." / "", None; (* "stublibs" relative to the Current and Parent directory (using OS- default separator) *) - "." / "stublibs", "." / "stublibs", None; - ".." / "stublibs", ".." / "stublibs", None; + "." / "stublibs", libdir / "stublibs", None; + ".." / "stublibs", libdir / ".." / "stublibs", None; (* Other cases - implicit and absolute entries, and entries beginning with the Current and Parent directory names *) "stublibs", "stublibs", None; @@ -88,64 +97,25 @@ let tests _config env = "/lib/ocaml", "/lib/ocaml", Some "/lib/ocaml\r"; ] in let fold (main, main_outcome, main_outcome_cr) (line, outcome, cr) = - let cr = match cr with - | Some cr -> cr - | None -> - (* Windows opens ld.conf in text mode, so the \r are stripped *) - if Sys.win32 then - outcome - else - outcome ^ "\r" - in + let cr = Option.value ~default:outcome cr in line::main, outcome::main_outcome, cr::main_outcome_cr in List.fold_left fold ([], [], []) (List.rev data) in + let main_outcome = List.tl main_outcome in + let main_outcome_cr = List.tl main_outcome_cr in let tests = (* Various test lines above all fed via ld.conf in the Standard Library *) - let outcome = - (* Known issue: Windows strips out the blank entries in the search path - (somewhat counterintuitively!) *) - if Sys.win32 then - main_outcome - else - "." :: main_outcome - in [{base with description = "Base ld.conf test"; - stdlib = "" :: main; - outcome = if_ld_conf_found outcome}] in + stdlib = main; + outcome = if_ld_conf_found main_outcome}] in let tests = (* As first, but with the same entries in CAML_LD_LIBRARY_PATH too *) - let stdlib = - if Sys.win32 then - (* Known issue: Windows ignores empty entries in the search path, and - it's slightly easier to test this only once in this test *) - main - else - "" :: main - in - (* Part of the outcome from ld.conf *) - let outcome_ld_conf = - if Sys.win32 then - main_outcome - else - "." :: main_outcome - in - (* Part of the outcome from CAML_LD_LIBRARY_PATH *) - let outcome_caml_ld_library_path = - if Sys.win32 then - (* No blank entry at the start: Windows returns the same entries *) - main - else - (* Unix displays "." for the blank, but otherwise returns the same - entries *) - "." :: main - in {base with description = "Base ld.conf + CAML_LD_LIBRARY_PATH"; - caml_ld_library_path = Set stdlib; - stdlib; - outcome = outcome_caml_ld_library_path - @ if_ld_conf_found outcome_ld_conf} :: tests in + caml_ld_library_path = Set main; + stdlib = main; + outcome = List.tl main + @ if_ld_conf_found main_outcome} :: tests in let tests = (* As first, but with entries in CAML_LD_LIBRARY_PATH including quotes and separators. No effect on Unix, as the colon separator is always @@ -184,70 +154,47 @@ let tests _config env = @ if_ld_conf_found main_outcome} :: tests in let tests = (* As first, but with a CR at the end of each line *) - let outcome = - (* Windows opens ld.conf in text mode, so the line with just \r is - read as an empty string and consequently stripped *) - if Sys.win32 then - main_outcome_cr - else - "\r" :: main_outcome_cr - in {base with description = "Base ld.conf with CRLF endings"; stdlib = List.map (Fun.flip (^) "\r") ("" :: main); - outcome = if_ld_conf_found outcome} :: tests in + outcome = if_ld_conf_found main_outcome_cr} :: tests in tests in (* Batch 2: effects of empty (vs unset) environment variables *) let tests = let tests = - (* Empty CAML_LD_LIBRARY_PATH should add "." to the start of the search - path *) - let outcome_caml_ld_library_path = - if Sys.win32 then - [] - else - ["."] - in + (* Empty CAML_LD_LIBRARY_PATH - should be ignored *) {base with description = "Empty CAML_LD_LIBRARY_PATH"; caml_ld_library_path = Empty; stdlib = ["ld.conf"]; - outcome = outcome_caml_ld_library_path - @ if_ld_conf_found ["ld.conf"]} :: tests in + outcome = if_ld_conf_found ["ld.conf"]} :: tests in let tests = - (* Embedded empty entries in CAML_LD_LIBRARY_PATH should add equivalent - "." entries to the search path *) - let outcome_caml_ld_library_path = - if Sys.win32 then - [] - else - ["."; "."] - in + (* Empty segments in CAML_LD_LIBRARY_PATH - should be ignored *) {base with description = "Embedded empty entry in CAML_LD_LIBRARY_PATH"; caml_ld_library_path = Set [""; ""]; stdlib = ["ld.conf"]; - outcome = outcome_caml_ld_library_path - @ if_ld_conf_found ["ld.conf"]} :: tests in + outcome = if_ld_conf_found ["ld.conf"]} :: tests in + let ld_conf_outcome = if_ld_conf_found ["masked-stdlib"] in let tests = - (* An empty CAMLLIB should cause ld.conf in the Standard Library to be - ignored, but not CAML_LD_LIBRARY PATH *) + (* An empty CAMLLIB shouldn't hide ld.conf in the Standard Library *) {base with description = "Empty CAMLLIB"; caml_ld_library_path = Set ["env"]; camllib = Empty; stdlib = ["masked-stdlib"]; - outcome = ["env"]} :: tests in + outcome = "env" :: ld_conf_outcome} :: tests in let tests = - (* An empty OCAMLLIB should cause ld.conf in both the Standard Library and - CAMLLIB to be ignored, but not CAML_LD_LIBRARY_PATH *) + (* An empty OCAMLLIB shouldn't hide ld.conf in either the Standard Library + or CAMLLIB\ld.conf *) {description = "Empty OCAMLLIB"; caml_ld_library_path = Set ["env"]; ocamllib = Empty; camllib = Set ["masked-camllib"]; stdlib = ["masked-stdlib"]; - outcome = ["env"]} :: tests in + outcome = ["env"; "masked-camllib"] @ ld_conf_outcome} :: tests in tests in (* Batch 3: load priority, embedded NUL characters, EOL-at-EOF, etc. *) let tests = + let ld_conf_outcome = if_ld_conf_found ["libdir"] in let tests = (* OCAMLLIB should have priority over CAMLLIB and the Standard Library *) {description = "$OCAMLLIB/ld.conf"; @@ -255,19 +202,19 @@ let tests _config env = ocamllib = Set ["ocamllib\000"; "hidden"]; camllib = Set ["camllib\000"; "hidden"]; stdlib = ["libdir"]; - outcome = ["env"; "ocamllib"]} :: tests in + outcome = ["env"; "ocamllib"; "camllib"] @ ld_conf_outcome} :: tests in let tests = (* CAMLLIB should have priority over the Standard Library *) {base with description = "$CAMLLIB/ld.conf"; caml_ld_library_path = Set ["env"]; camllib = Set ["camllib\000"; "hidden"]; stdlib = ["libdir"]; - outcome = ["env"; "camllib"]} :: tests in + outcome = ["env"; "camllib"] @ ld_conf_outcome} :: tests in let tests = (* EOL-at-EOF should not add a blank entry to the search path *) {base with description = "EOF-at-EOF"; stdlib = (if Sys.win32 then ["libdir\r\n"] else ["libdir\n"]); - outcome = if_ld_conf_found ["libdir"]} :: tests in + outcome = ld_conf_outcome} :: tests in tests in tests @@ -304,17 +251,8 @@ let () = && Sys.getenv_opt "OCAMLLIB" <> Some "") let () = - let print s = - (* Known issue: ocamlrun -config suppresses blank lines on Windows, but - displays them as "." on other platforms. Do a similar transformation - here, but suppress the lines entirely on Windows. *) - if s <> "" then - print_endline s - else if not Sys.win32 then - print_endline "." - in Dll.init_compile false; - List.iter print (Dll.search_path ()) + List.iter print_endline (Dll.search_path ()) |}) in let compile_test_program mode files test_program description = @@ -333,8 +271,9 @@ let () = let runtime = mode = Bytecode && Harness.ocamlc_fails_after_rename config in (* In the Renamed phase, Config.standard_library will still point to the - Original location *) - let stdlib = true in + Original location, unless the compiler has been configured with a + relative libdir *) + let stdlib = (config.has_relative_libdir = None) in let (_, output) = Environment.run_process ~runtime ~stdlib env compiler args in Environment.display_output output; @@ -349,104 +288,18 @@ let () = in (* In the Renamed phase, the test driver will need to be launched with ocamlrun, unless executables produced by the compiler are capable of - searching for the runtime (as the Windows executable launcher does) *) + searching for the runtime (as the Windows executable launcher does) or + the compiler has been configured with a relative libdir (as in this mode + the bytecode header will have the correct location) *) let runtime = mode = Bytecode - && not config.target_launcher_searches_for_ocamlrun in - let run run_process test = + && not config.target_launcher_searches_for_ocamlrun + && config.has_relative_libdir = None in + let run run_process _test = let code, lines = run_process ~runtime test_program [] in if code = 0 then - let lines = - (* Known issue: Sys.getenv processes blank environment variables - differently from _wgetenv which in the tests will cause it load - ld.conf files. The tests have been written to allow for this by - having the lines which are _not_ expected to appear on Unix be - prefixed with "masked-". *) - if Sys.win32 then - if ((test.camllib = Empty - && not (Environment.is_renamed env)) - || test.ocamllib = Empty) then - let unmask s = not (String.starts_with ~prefix:"masked-" s) in - let lines' = List.filter unmask lines in - (* If Windows behaviour has been harmonised, then the filtered - list of lines would be the same as the unfiltered list. If this - happens, insert an extra line to "poison" the test output to - prevent this behaviour from being silently fixed. *) - if lines = lines' then - "poisoned"::lines - else - lines' - else - lines - else - lines - in - let lines = - (* Known issue: ocamlc opens ld.conf in text mode on Cygwin but - ocamlrun opens it in binary mode (the default). This means that - ocamlrun will return lines ending with \r, but ocamlc will both - strip the \r and ignore a line consisting of just \r (because that - appears blank in text mode). This is mitigated by ensuring that the - \r line is always first in the test, and then adding back the \r to - the output on Cygwin. This will clearly fail if the behaviour of - ocamlrun and ocamlc is harmonised. *) - match test.stdlib with - | "\r" :: _ when Sys.cygwin && lines <> [] -> - "\r" :: List.map (Fun.flip (^) "\r") (List.tl lines) - | _ -> - lines - in - let lines = - (* Known issue: Misc.split_path_contents ignores empty strings where - caml_decompose_path does not. Mitigate it by detecting the - environment setting and simulating the line. *) - if test.caml_ld_library_path = Set [] - || test.caml_ld_library_path = Empty then - "." :: lines - else - lines - in - (* Known issue: Windows strips out the blank entries in the search path - (somewhat counterintuitively!) *) - let lines = - if not Sys.win32 then - lines - else - List.drop_while (String.equal ".") lines - in - let lines = - (* Known issue: Dll.ld_conf_contents preserves NUL characters in lines - where caml_parse_ld_conf terminates processing. This is mitigated - in the test by putting a single line "hidden" after the line with - an embedded NUL. *) - let includes_nulls = - let includes_nulls = function - | Unset | Empty -> false - | Set l -> List.exists (Fun.flip String.contains '\000') l - in - includes_nulls test.ocamllib || includes_nulls test.camllib - in - if includes_nulls then - let strip_null s = - match String.index s '\000' with - | index -> - String.sub s 0 index - | exception Not_found -> - s - in - let lines' = List.map strip_null lines in - if lines <> lines' then - List.filter ((<>) "hidden") lines' - else - (* As with empty environment variables above, if this behaviour - appears to have been fixed, poison the output of the test so - that doesn't happen silently. *) - "poisoned" :: lines - else - lines - in description :: lines else Harness.fail_because "%s is expected to exit with code 0" @@ -626,7 +479,7 @@ let run config env = if not (Sys.file_exists dir) then Sys.mkdir dir 0o775 else if not (Sys.is_directory dir) then begin - Sys.rmdir dir; + Sys.remove dir; Sys.mkdir dir 0o775 end in diff --git a/tools/ci/actions/runner.sh b/tools/ci/actions/runner.sh index 512e57d23829..93df87463541 100755 --- a/tools/ci/actions/runner.sh +++ b/tools/ci/actions/runner.sh @@ -16,7 +16,8 @@ set -xe -PREFIX=~/local +# The prefix is designed to be usable as an opam local switch +PREFIX=~/local/_opam MAKE="make $MAKE_ARG" SHELL=dash @@ -56,11 +57,12 @@ EOF # $CONFIG_ARG also appears last to allow settings specified here to be # overridden by the workflows. call-configure --prefix="$PREFIX" \ + --docdir="$PREFIX/doc/ocaml" \ --enable-flambda-invariants \ --enable-ocamltest \ --enable-native-toplevel \ --disable-dependency-generation \ - $CONFIG_ARG + -C $CONFIG_ARG } Build () { @@ -125,13 +127,116 @@ API_Docs () { } Install () { - $MAKE install + $MAKE INSTALL_MODE=list install | grep '^->' | sort | uniq -d > duplicates + if [ -s duplicates ]; then + echo "The installation duplicates targets:" + cat duplicates + exit 1 + fi + rm duplicates + $MAKE DESTDIR="$PWD/install" install + find $PWD/install -name _opam -type d + $MAKE INSTALL_MODE=clone install + ret="$PWD" + script="$PWD/ocaml-compiler-clone.sh" + cd "$(find $PWD/install -name _opam -type d)" + mkdir -p "share/ocaml" + cp "$ret/config.status" "$ret/config.cache" "share/ocaml" + cp "$ret/ocaml-compiler-clone.sh" "share/ocaml/clone" + sh $script ~/local/_opam + cd "$ret" + rm -rf install + rm ocaml-compiler-clone.sh } +target_libdir_is_relative='^ *TARGET_LIBDIR_IS_RELATIVE *= *false' + Test-In-Prefix () { + { set +x + echo 'Checking that compilers invoked with alternate runtimes use their' + echo "configured location, not the alternate runtime's" + expected1="$(realpath "$PREFIX/lib/ocaml")" + } 2>/dev/null + if [[ ! -d "$PREFIX.new" ]]; then + # In Re-Test-In-Prefix, $PREFIX is the original compiler built by the + # workflow and then $PREFIX.new is the "alternate configuration". The first + # time round, we clone whichever compiler has just been built for this test. + cp -a "$PREFIX" "$PREFIX.new" + remove="$PREFIX.new" + if grep -q "$target_libdir_is_relative" Makefile.build_config; then + # Compiler configured absolutely - both should return the same answer + expected2="$expected1" + else + # Compiler configured relatively + expected2="$(realpath "$PREFIX").new/lib/ocaml" + fi + else + # The alternate configuration path should be returned, regardless of whether + # the runtime invoking it is an absolute or a relative one from another + # location. + expected2="$(realpath "$PREFIX").new/lib/ocaml-lib" + remove='' + fi + { set +x + lib1="$($PREFIX.new/bin/ocamlrun $PREFIX/bin/ocamlc.byte -where)" + lib2="$($PREFIX/bin/ocamlrun $PREFIX.new/bin/ocamlc.byte -where)" + echo "$PREFIX/bin/ocamlc.byte OSLD: $($PREFIX/bin/ocamlrun \ + $PREFIX/bin/ocamlobjinfo.byte $PREFIX/bin/ocamlc.byte \ + | sed -ne 's/^caml_standard_library_default: //p')" + echo -n "$PREFIX.new/bin/ocamlrun standard_library_default: " + $PREFIX.new/bin/ocamlrun -config | sed -ne 's/standard_library_default: //p' + echo "$PREFIX.new/bin/ocamlrun $PREFIX/bin/ocamlc.byte -where: $lib1" + if [[ $lib1 != $expected1 ]]; then + echo -e ' \e[31mEXPECTED\e[0m:' "$expected1" + fi + echo + echo "$PREFIX.new/bin/ocamlc.byte OSLD: $($PREFIX.new/bin/ocamlrun \ + $PREFIX.new/bin/ocamlobjinfo.byte $PREFIX.new/bin/ocamlc.byte \ + | sed -ne 's/^caml_standard_library_default: //p')" + echo -n "$PREFIX/bin/ocamlrun standard_library_default: " + $PREFIX/bin/ocamlrun -config | sed -ne 's/standard_library_default: //p' + echo "$PREFIX/bin/ocamlrun $PREFIX.new/bin/ocamlc.byte -where: $lib2" + if [[ $lib2 != $expected2 ]]; then + echo -e ' \e[31mEXPECTED\e[0m:' "$expected2" + fi + [[ $lib1 = $expected1 && $lib2 = $expected2 ]] && echo 'Correct.' || exit 1 + } 2>/dev/null + [[ -z $remove ]] || rm -rf "$remove" $MAKE -C testsuite/in_prefix -f Makefile.test test-in-prefix } +Re-Test-In-Prefix () { + mkdir -p bak + mv Makefile.config Makefile.build_config config.status bak + git clean -dfX &>/dev/null + mv bak/Makefile.config bak/Makefile.build_config bak/config.status . + rmdir bak + # The libdir is configured to be $PREFIX.new/lib/ocaml-lib in order to + # "poison" the cross-runtime test (otherwise if $PREFIX/bin/ocamlc.byte is + # missing OSLD, then $PREFIX.new/bin/ocamlrun would still supply the correct + # ../lib/ocaml. This way, it supplies ../lib/ocaml-lib and the test correctly + # fails) + if grep -q "$target_libdir_is_relative" Makefile.build_config; then + # Compiler configured absolutely - reconfigure relatively + echo '::group::Re-building the compiler with a relative libdir' + $MAKE COMPUTE_DEPS=false reconfigure \ + 'ADDITIONAL_CONFIGURE_ARGS=--with-relative-libdir=../lib/ocaml-lib \ +--enable-runtime-search=always --enable-runtime-search-target \ +--prefix='"$PREFIX"'.new' + else + # Compiler configured relatively - reconfigure absolutely + echo '::group::Re-building the compiler with an absolute libdir' + $MAKE COMPUTE_DEPS=false reconfigure \ + 'ADDITIONAL_CONFIGURE_ARGS=--without-relative-libdir \ +--disable-runtime-search --disable-runtime-search-target \ +--prefix='"$PREFIX"'.new --libdir='"$PREFIX"'.new/lib/ocaml-lib' + fi + $MAKE + $MAKE install + echo '::endgroup::' + Test-In-Prefix +} + Checks () { if fgrep 'SUPPORTS_SHARED_LIBRARIES=true' Makefile.config &>/dev/null ; then echo Check the code examples in the manual @@ -141,7 +246,7 @@ Checks () { # we would need to redo (small parts of) world.opt afterwards to # use the compiler again $MAKE check_all_arches - # Ensure that .gitignore is up-to-date - this will fail if any untreacked or + # Ensure that .gitignore is up-to-date - this will fail if any untracked or # altered files exist. test -z "$(git status --porcelain)" # check that the 'clean' target also works @@ -150,7 +255,9 @@ Checks () { $MAKE -C manual distclean # check that the `distclean` target definitely cleans the tree $MAKE distclean - # Check the working tree is clean + # Check the working tree is clean - config.cache is intentionally not deleted + # by any of the clean targets + rm config.cache test -z "$(git status --porcelain)" # Check that there are no ignored files test -z "$(git ls-files --others -i --exclude-standard)" @@ -214,6 +321,23 @@ BasicCompiler () { ReportBuildStatus 0 } +CreateSwitch () { + # This can be switched to use the Ubuntu package when Ubuntu 26.04 is deployed + # (opam 2.1.5 in Ubuntu 24.04 is too old) + curl -Lo opam \ + 'https://github.com/ocaml/opam/releases/download/2.4.1/opam-2.4.1-x86_64-linux' + chmod +x opam + ./opam init --bare --disable-sandboxing --yes --auto-setup + # This is intentionally done before the switch is created - if the install + # target creates _opam then the switch creation will fail. + $MAKE INSTALL_MODE=opam OPAM_PACKAGE_NAME=ocaml-variants install + ./opam switch create ~/local --empty + ./opam switch --switch ~/local set-invariant --no-action ocaml-option-flambda + ./opam pin add --switch ~/local --no-action --kind=path ocaml-variants . + ./opam install --switch ~/local --yes --assume-built ocaml-variants + ./opam exec --switch ~/local -- ocamlopt -v +} + case $1 in configure) Configure;; build) Build;; @@ -223,9 +347,11 @@ test_prefix) TestPrefix $2;; api-docs) API_Docs;; install) Install;; test-in-prefix) Test-In-Prefix;; +re-test-in-prefix) Re-Test-In-Prefix;; manual) BuildManual;; other-checks) Checks;; basic-compiler) BasicCompiler;; +opam) CreateSwitch;; *) echo "Unknown CI instruction: $1" exit 1;; esac diff --git a/tools/ci/appveyor/appveyor_build.cmd b/tools/ci/appveyor/appveyor_build.cmd index 48c2ad7e8a2e..be5e4022819d 100644 --- a/tools/ci/appveyor/appveyor_build.cmd +++ b/tools/ci/appveyor/appveyor_build.cmd @@ -22,7 +22,7 @@ chcp 65001 > nul set BUILD_PREFIX=🐫реализация -set OCAMLROOT=%PROGRAMFILES%\Бактріан🐫 +set OCAMLROOT=C:\Бактріан🐫 if "%1" neq "install" goto %1 setlocal enabledelayedexpansion @@ -69,7 +69,13 @@ if %CYGWIN_UPGRADE_REQUIRED% equ 1 ( ) ) if "%CYGWIN_INSTALL_PACKAGES%" neq "" "%CYG_ROOT%\setup-x86_64.exe" --quiet-mode --no-shortcuts --no-startmenu --no-desktop --only-site --root "%CYG_ROOT%" --site "%CYG_MIRROR%" --local-package-dir "%CYG_CACHE%" %CYGWIN_FLAGS% --packages %CYGWIN_INSTALL_PACKAGES:~1% -for %%P in (%CYGWIN_COMMANDS%) do "%CYG_ROOT%\bin\%%P.exe" --version 2> nul > nul || set CYGWIN_UPGRADE_REQUIRED=1 +for %%P in (%CYGWIN_COMMANDS%) do ( + if %%P equ unzip ( + "%CYG_ROOT%\bin\%%P.exe" -v 2> nul > nul || set CYGWIN_UPGRADE_REQUIRED=1 + ) else ( + "%CYG_ROOT%\bin\%%P.exe" --version 2> nul > nul || set CYGWIN_UPGRADE_REQUIRED=1 + ) +) "%CYG_ROOT%\bin\bash.exe" -lc "cygcheck -dc %CYGWIN_PACKAGES%" if %CYGWIN_UPGRADE_REQUIRED% equ 1 ( echo Cygwin package upgrade required - please go and drink coffee @@ -87,6 +93,11 @@ if not defined SDK ( if "%PORT%" equ "mingw32" set SDK=call "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars32.bat" ) %SDK% +rem The environment block becomes very large on AppVeyor, which can cause +rem problems for xargs in Cygwin. These two environment variables from the SDK +rem infrastructure can be safely junked to reduce the size of the block. +set __VSCMD_PREINIT_PATH= +set EXTERNAL_INCLUDE= goto :EOF :install @@ -103,6 +114,9 @@ if "%BOOTSTRAP_FLEXDLL%" equ "true" ( cd "%APPVEYOR_BUILD_FOLDER%" appveyor DownloadFile "https://github.com/ocaml/flexdll/archive/%FLEXDLL_VERSION%.tar.gz" -FileName "flexdll.tar.gz" || exit /b 1 appveyor DownloadFile "https://github.com/ocaml/flexdll/releases/download/%FLEXDLL_VERSION%/flexdll-bin-%FLEXDLL_VERSION%.zip" -FileName "flexdll.zip" || exit /b 1 +appveyor DownloadFile "https://github.com/ocaml/opam/releases/download/2.4.1/opam-2.4.1-x86_64-windows.exe" -FileName "opam.exe" || exit /b 1 +md "%PROGRAMFILES%\flexdll" +move opam.exe "%PROGRAMFILES%\flexdll" rem flexdll.zip is processed here, rather than in appveyor_build.sh because the rem unzip command comes from MSYS2 (via Git for Windows) and it has to be rem invoked via cmd /c in a bash script which is weird(er). @@ -115,8 +129,8 @@ rem in the list just so that the Cygwin version is always displayed on the log). rem CYGWIN_COMMANDS is a corresponding command to run with --version to test rem whether the package works. This is used to verify whether the installation rem needs upgrading. -set CYGWIN_PACKAGES=cygwin make diffutils -set CYGWIN_COMMANDS=cygcheck make diff +set CYGWIN_PACKAGES=cygwin make diffutils unzip +set CYGWIN_COMMANDS=cygcheck make diff unzip if "%PORT%" equ "mingw32" ( rem mingw64-i686-runtime does not need explicitly installing, but it's useful rem to have the version reported. diff --git a/tools/ci/appveyor/appveyor_build.sh b/tools/ci/appveyor/appveyor_build.sh index 2fc8426025c1..0899e1413de5 100755 --- a/tools/ci/appveyor/appveyor_build.sh +++ b/tools/ci/appveyor/appveyor_build.sh @@ -59,7 +59,10 @@ function set_configuration { CACHE_FILE_PREFIX="$CACHE_DIRECTORY/config.cache-$1" CACHE_FILE="$CACHE_FILE_PREFIX-$CACHE_KEY" - args=('--cache-file' "$CACHE_FILE" '--prefix' "$2" '--enable-ocamltest') + args=('--cache-file' "$CACHE_FILE" \ + '--prefix' "$2/_opam" \ + '--docdir' "$2/_opam/doc/ocaml" \ + '--enable-ocamltest') case "$1" in cygwin*) @@ -76,6 +79,15 @@ function set_configuration { args+=('--host=x86_64-pc-windows' '--enable-dependency-generation' \ '--enable-native-toplevel');; esac + case "$RELOCATABLE,$1" in + true,cygwin*) + args+=('--with-relative-libdir=../lib/ocaml');; + true,*) + args+=('--with-relative-libdir=..\lib\ocaml');; + esac + if [[ $RELOCATABLE = 'true' ]]; then + args+=('--enable-runtime-search=always' '--enable-runtime-search-target') + fi # Remove old configure cache if the configure script or the OS # have changed @@ -92,14 +104,17 @@ function set_configuration { if ((failed)) ; then cat config.log ; exit $failed ; fi fi + cp "$CACHE_FILE" config.cache + # FILE=$(pwd | cygpath -f - -m)/Makefile.config # run "Content of $FILE" cat Makefile.config } -PARALLEL_URL='https://git.savannah.gnu.org/cgit/parallel.git/plain/src/parallel' +PARALLEL_URL=\ +'https://git.savannah.gnu.org/cgit/parallel.git/plain/src/parallel?h=20241222' APPVEYOR_BUILD_FOLDER=$(echo "$APPVEYOR_BUILD_FOLDER" | cygpath -f -) FLEXDLLROOT="$PROGRAMFILES/flexdll" -OCAMLROOT=$(echo "$OCAMLROOT" | cygpath -f - -m) +export OPAMSWITCH="$OCAMLROOT" if [[ $BOOTSTRAP_FLEXDLL = 'false' ]] ; then case "$PORT" in @@ -115,14 +130,7 @@ fi case "$1" in install) - mkdir -p "$CACHE_DIRECTORY" - if [ ! -e "$CACHE_DIRECTORY/parallel-source" ] || \ - [ "$PARALLEL_URL" != "$(cat "$CACHE_DIRECTORY/parallel-source")" ] ; then - # Download latest version directly from the repo - curl -Ls $PARALLEL_URL -o "$CACHE_DIRECTORY/parallel" - echo "$PARALLEL_URL" > "$CACHE_DIRECTORY/parallel-source" - fi - cp "$CACHE_DIRECTORY/parallel" /usr/bin + cp "$APPVEYOR_BUILD_FOLDER/tools/ci/appveyor/parallel" /usr/bin chmod +x /usr/bin/parallel parallel --version if [[ $install_flexdll = 'true' ]] ; then @@ -142,7 +150,7 @@ case "$1" in ;; test) FULL_BUILD_PREFIX="$APPVEYOR_BUILD_FOLDER/../$BUILD_PREFIX" - run 'ocamlc.opt -version' "$FULL_BUILD_PREFIX-$PORT/ocamlc.opt" -version + #run 'ocamlc.opt -version' "$FULL_BUILD_PREFIX-$PORT/ocamlc.opt" -version if [[ $PORT =~ mingw* ]] ; then run "Check runtime symbols" \ "$FULL_BUILD_PREFIX-$PORT/tools/check-symbol-names" \ @@ -161,15 +169,49 @@ case "$1" in # tests now (to include natdynlink) run "test dynlink $PORT" \ $MAKE -C "$FULL_BUILD_PREFIX-$PORT/testsuite" parallel-lib-dynlink - # Now reconfigure ocamltest to run in bytecode-only mode - sed -i '/native_/s/true/false/' \ - "$FULL_BUILD_PREFIX-$PORT/ocamltest/ocamltest_config.ml" - $MAKE -C "$FULL_BUILD_PREFIX-$PORT" -j ocamltest ocamltest.opt + case "$PORT" in + *64) + # Now reconfigure ocamltest to run in bytecode-only mode + sed -i '/native_/s/true/false/' \ + "$FULL_BUILD_PREFIX-$PORT/ocamltest/ocamltest_config.ml" + $MAKE -C "$FULL_BUILD_PREFIX-$PORT" -j ocamltest ocamltest.opt;; + esac # And run the entire testsuite, skipping all the native-code tests run "test $PORT" \ make -C "$FULL_BUILD_PREFIX-$PORT/testsuite" SHOW_TIMINGS=1 all fi run "install $PORT" $MAKE -C "$FULL_BUILD_PREFIX-$PORT" install + make -C "$FULL_BUILD_PREFIX-$PORT" INSTALL_MODE=clone install + ( + cd "$OCAMLROOT" + mv _opam destdir + #ret="$PWD" + #script="$PWD/ocaml-compiler-clone.sh" + #cd "$(find $PWD/install -name _opam -type d)" + mkdir -p "destdir/share/ocaml" + cp "$FULL_BUILD_PREFIX-$PORT/config."{cache,status} 'destdir/share/ocaml/' + cp "$FULL_BUILD_PREFIX-$PORT/ocaml-compiler-clone.sh" \ + 'destdir/share/ocaml/clone' + cd destdir + sh "$FULL_BUILD_PREFIX-$PORT/ocaml-compiler-clone.sh" "$OCAMLROOT/_opam" + ) + rm -rf "$OCAMLROOT" + $MAKE -C "$FULL_BUILD_PREFIX-$PORT" OPAM_PACKAGE_NAME=ocaml-variants \ + INSTALL_MODE=opam install + ( + cd "$FULL_BUILD_PREFIX-$PORT" + export PATH="$FLEXDLLROOT:$PATH" + opam init --bare --yes --disable-sandboxing --auto-setup \ + --cygwin-local-install + opam switch create "$OPAMSWITCH" --empty + opam pin add --no-action --kind=path ocaml-variants . + opam pin add --no-action flexdll flexdll + opam install --yes flexdll winpthreads + opam install --yes --assume-built ocaml-variants + git checkout -- ocaml-variants.install + rm -f config.cache ocaml-variants-fixup.sh ocaml-compiler-clone.sh + opam exec -- ocamlc -v + ) run "test $PORT in prefix" \ $MAKE -f Makefile.test -C "$FULL_BUILD_PREFIX-$PORT/testsuite/in_prefix" \ test-in-prefix diff --git a/tools/ci/appveyor/parallel b/tools/ci/appveyor/parallel new file mode 100755 index 000000000000..5c0adb415791 --- /dev/null +++ b/tools/ci/appveyor/parallel @@ -0,0 +1,15971 @@ +#!/usr/bin/env perl + +# Copyright (C) 2007-2024 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2007-2024 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later + +# open3 used in Job::start +use IPC::Open3; +use POSIX; +# gensym used in Job::start +use Symbol qw(gensym); +# tempfile used in Job::start +use File::Temp qw(tempfile tempdir); +# mkpath used in openresultsfile +use File::Path; +# GetOptions used in get_options_from_array +use Getopt::Long; +# Used to ensure code quality +use strict; +use File::Basename; + +sub set_input_source_header($$) { + my ($command_ref,$input_source_fh_ref) = @_; + if(defined $opt::header and not $opt::pipe) { + # split with colsep or \t + # $header force $colsep = \t if undef? + my $delimiter = defined $opt::colsep ? $opt::colsep : "\t"; + # regexp for {= + my $left = "\Q$Global::parensleft\E"; + my $l = $Global::parensleft; + # regexp for =} + my $right = "\Q$Global::parensright\E"; + my $r = $Global::parensright; + if($opt::header ne "0") { + my $id = 1; + for my $fh (@$input_source_fh_ref) { + my $line = <$fh>; + chomp($line); + $line =~ s/\r$//; + ::debug("init", "Delimiter: '$delimiter'"); + for my $s (split /$delimiter/o, $line) { + ::debug("init", "Colname: '$s'"); + # Replace {colname} with {2} + for(@$command_ref, @Global::ret_files, + @Global::transfer_files, $opt::tagstring, + $opt::workdir, $opt::results, $opt::retries, + @Global::template_contents, @Global::template_names, + @opt::filter) { + # Skip if undefined + $_ or next; + s<\{$s( + (\.\d+)? # Sub position = {3.2} + (|/|//|\.|/\.) # Known replacement strings + (:%.*?)? # Formatting + )\}><\{$id$1\}>gx; + # {=header1 ... =} => {=1 ... =} + s:$left $s (.*?) $right:$l$id$1$r:gx; + } + $Global::input_source_header{$id} = $s; + $id++; + } + } + } + # Make it possible to do: + # parallel --header 0 echo {file2} {file1} :::: file1 file2 + my $id = 1; + for my $s (@opt::a) { + # ::: are put into files and given a filehandle + # ignore these and only keep the filenames. + fileno $s and next; + for(@$command_ref, @Global::ret_files, + @Global::transfer_files, $opt::tagstring, + $opt::workdir, $opt::results, $opt::retries, + @Global::template_contents, @Global::template_names, + @opt::filter) { + # Skip if undefined + $_ or next; + s:\{\Q$s\E(|/|//|\.|/\.)\}:\{$id$1\}:g; + # {=header1 ... =} => {=1 ... =} + s:$left \Q$s\E (.*?) $right:$l$id$1$r:gx; + } + $Global::input_source_header{$id} = $s; + $id++; + } + } else { + my $id = 1; + for my $fh (@$input_source_fh_ref) { + $Global::input_source_header{$id} = $id; + $id++; + } + } +} + +sub max_jobs_running() { + # Compute $Global::max_jobs_running as the max number of jobs + # running on each sshlogin. + # Returns: + # $Global::max_jobs_running + if(not $Global::max_jobs_running) { + for my $sshlogin (values %Global::host) { + $sshlogin->max_jobs_running(); + } + } + if(not $Global::max_jobs_running) { + ::error("Cannot run any jobs."); + wait_and_exit(255); + } + return $Global::max_jobs_running; +} + +sub halt() { + # Compute exit value, + # wait for children to complete + # and exit + if($opt::halt and $Global::halt_when ne "never") { + if(not defined $Global::halt_exitstatus) { + if($Global::halt_pct) { + $Global::halt_exitstatus = + ::ceil($Global::total_failed / + ($Global::total_started || 1) * 100); + } elsif($Global::halt_count) { + $Global::halt_exitstatus = + ::min(undef_as_zero($Global::total_failed),101); + } + } + wait_and_exit($Global::halt_exitstatus); + } else { + if($Global::semaphore) { + # --semaphore runs a single job: + # Use exit value of that + wait_and_exit($Global::halt_exitstatus); + } else { + # 0 = all jobs succeeded + # 1-100 = n jobs failed + # 101 = >100 jobs failed + wait_and_exit(min(undef_as_zero($Global::exitstatus),101)); + } + } +} + + +sub __FAST_MODE__() {} + + +sub fast() { + my $jobslots = $Global::host{':'}->user_requested_processes($opt::jobs); + my $i; + my $jobs_per_chunk; + my $buffer_len = 100; + my $splitstring = ("\n#\0\n"); + # n = jobslots + # Read 100 jobs + # Split into n chunks + # Read 3*100 jobs + # Split into n chunks + # Read 3*3*100 jobs (up to at most 300K jobs) + # Split into n chunks + # Receiver: parallel --block 1k --pipe -N1 --recend '\n#\0\n' dash + # TODO --group? --halt? --tag? $PARALLEL_JOBSLOT + # _PARALLEL_TAG="foo" + # | tagger + # _PARALLEL_EXIT + # maybe_exit + my $executer_fh; + if($opt::D eq "fast") { + open($executer_fh, "|-", "cat") || die; + } else { + open($executer_fh, "|-", "parallel --plain -j $jobslots --block 1k --pipe -N1 --recend '\\n#\\0\\n' $Global::shell") || die; + } + # Do some testing of a stuck job - dns = 5 sec + my $block_start = q{ + error() { + _exit=$((_exit+1)) + # --halt-on-error stuff + if [ $_exit -gt 40 ]; then + echo "ERROR: More than 40" + exit 1 + fi + } + + _tagger() { + { + rm -f "$1" + # This will fail for either /1 or /2 + rmdir "$2" 2>/dev/null + # TODO use awk (see fasttag) + perl -pe 's{^}{$ENV{PARALLEL_TAGSTRING}}' + } < "$1" + } + + _taggerwrap() { + _dir=`mktemp -d` + mkfifo "$_dir"/1 "$_dir"/2 + _tagger "$_dir"/1 "$_dir" >&1 & + _tagger "$_dir"/2 "$_dir" >&2 & + # ( ... ) is needed to deal correctly with 'exit 1' + ( eval $@ ; ) >"$_dir"/1 2>"$_dir"/2 || error + wait + } + PARALLEL_TAGSTRING=dummy + export PARALLEL_TAGSTRING + }; + my $block_end = "\n# Define get exit value\necho EXIT=\$_exit\n".$splitstring."\n"; + do { + $i = 0; + my @jobbuffer; + while(my $job = $Global::JobQueue->get()) { + push (@jobbuffer, "PARALLEL_TAGSTRING=".Q($job->tag()). + "\n_taggerwrap ". Q($job->replaced())); + ($i++ < $buffer_len) or last; + } + my $jobs_per_chunk = 1 + ($i / $jobslots); + my $j = 0; + print $executer_fh $block_start; + for(@jobbuffer) { + print $executer_fh $_,"\n"; + (++$j % $jobs_per_chunk) or print $executer_fh $block_end,$block_start; + } + print $executer_fh $block_end; + if($buffer_len < 100000) { $buffer_len *= 3; } + } while ($i); + close $executer_fh; + exit(0); +} + + +sub __PIPE_MODE__() {} + + +sub pipepart_setup() { + # Compute the blocksize + # Generate the commands to extract the blocks + # Push the commands on queue + # Changes: + # @Global::cat_prepends + # $Global::JobQueue + if($opt::tee) { + # Prepend each command with + # < file + my $cat_string = "< ".Q($opt::a[0]); + for(1..$Global::JobQueue->total_jobs()) { + push @Global::cat_appends, $cat_string; + push @Global::cat_prepends, ""; + } + } else { + if(not $opt::blocksize) { + # --blocksize with 10 jobs per jobslot + $opt::blocksize = -10; + } + if($opt::roundrobin) { + # --blocksize with 1 job per jobslot + $opt::blocksize = -1; + } + if($opt::blocksize < 0) { + my $size = 0; + # Compute size of -a + for(@opt::a) { + if(-f $_) { + $size += -s $_; + } elsif(-b $_) { + $size += size_of_block_dev($_); + } elsif(-e $_) { + ::error("$_ is neither a file nor a block device"); + wait_and_exit(255); + } else { + ::error("File not found: $_"); + wait_and_exit(255); + } + } + # Run in total $job_slots*(- $blocksize) jobs + # Set --blocksize = size / no of proc / (- $blocksize) + $Global::dummy_jobs = 1; + $Global::blocksize = 1 + + int($size / max_jobs_running() / + -multiply_binary_prefix($opt::blocksize)); + } + @Global::cat_prepends = (map { pipe_part_files($_) } + # ::: are put into files and given a filehandle + # ignore these and only keep the filenames. + grep { ! fileno $_ } @opt::a); + # Unget the empty arg as many times as there are parts + $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget( + map { [Arg->new("\0noarg")] } @Global::cat_prepends + ); + } +} + +sub pipe_tee_setup() { + # Create temporary fifos + # Run 'tee fifo1 fifo2 fifo3 ... fifoN' in the background + # This will spread the input to fifos + # Generate commands that reads from fifo1..N: + # cat fifo | user_command + # Changes: + # @Global::cat_prepends + my @fifos; + for(1..$Global::JobQueue->total_jobs()) { + push @fifos, tmpfifo(); + } + # cat foo | tee fifo1 fifo2 fifo3 fifo4 fifo5 > /dev/null + if(not fork()){ + # Test if tee supports --output-error=warn-nopipe + `echo | tee --output-error=warn-nopipe /dev/null >/dev/null 2>/dev/null`; + my $opt = $? ? "" : "--output-error=warn-nopipe"; + ::debug("init","tee $opt"); + if($opt::dryrun) { + # This is not exactly what is run, but it gives the basic idea + print "mkfifo @fifos\n"; + print "tee $opt @fifos >/dev/null &\n"; + } else { + # Let tee inherit our stdin + # and redirect stdout to null + open STDOUT, ">","/dev/null"; + if($opt) { + exec "tee", $opt, @fifos; + } else { + exec "tee", @fifos; + } + } + exit(0); + } + # For each fifo + # (rm fifo1; grep 1) < fifo1 + # (rm fifo2; grep 2) < fifo2 + # (rm fifo3; grep 3) < fifo3 + # Remove the tmpfifo as soon as it is open + @Global::cat_prepends = map { "(rm $_;" } shell_quote(@fifos); + @Global::cat_appends = map { ") < $_" } shell_quote(@fifos); +} + + +sub parcat_script() { + # TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos + my $script = q'{ + use POSIX qw(:errno_h); + use IO::Select; + use strict; + use threads; + use Thread::Queue; + use Fcntl qw(:DEFAULT :flock); + + my $opened :shared; + my $q = Thread::Queue->new(); + my $okq = Thread::Queue->new(); + my @producers; + + if(not @ARGV) { + if(-t *STDIN) { + print "Usage:\n"; + print " parcat file(s)\n"; + print " cat argfile | parcat\n"; + } else { + # Read arguments from stdin + chomp(@ARGV = ); + } + } + my $files_to_open = 0; + # Default: fd = stdout + my $fd = 1; + for (@ARGV) { + # --rm = remove file when opened + /^--rm$/ and do { $opt::rm = 1; next; }; + # -1 = output to fd 1, -2 = output to fd 2 + /^-(\d+)$/ and do { $fd = $1; next; }; + push @producers, threads->create("producer", $_, $fd); + $files_to_open++; + } + + sub producer { + # Open a file/fifo, set non blocking, enqueue fileno of the file handle + my $file = shift; + my $output_fd = shift; + open(my $fh, "<", $file) || do { + print STDERR "parcat: Cannot open $file: $!\n"; + exit(1); + }; + # Remove file when it has been opened + if($opt::rm) { + unlink $file; + } + set_fh_non_blocking($fh); + $opened++; + # Pass the fileno to parent + $q->enqueue(fileno($fh),$output_fd); + # Get an OK that the $fh is opened and we can release the $fh + while(1) { + my $ok = $okq->dequeue(); + if($ok == fileno($fh)) { last; } + # Not ours - very unlikely to happen + $okq->enqueue($ok); + } + return; + } + + my $s = IO::Select->new(); + my %buffer; + + sub add_file { + my $infd = shift; + my $outfd = shift; + open(my $infh, "<&=", $infd) || die; + open(my $outfh, ">&=", $outfd) || die; + $s->add($infh); + # Tell the producer now opened here and can be released + $okq->enqueue($infd); + # Initialize the buffer + @{$buffer{$infh}{$outfd}} = (); + $Global::fh{$outfd} = $outfh; + } + + sub add_files { + # Non-blocking dequeue + my ($infd,$outfd); + do { + ($infd,$outfd) = $q->dequeue_nb(2); + if(defined($outfd)) { + add_file($infd,$outfd); + } + } while(defined($outfd)); + } + + sub add_files_block { + # Blocking dequeue + my ($infd,$outfd) = $q->dequeue(2); + add_file($infd,$outfd); + } + + + my $fd; + my (@ready,$infh,$rv,$buf); + do { + # Wait until at least one file is opened + add_files_block(); + while($q->pending or keys %buffer) { + add_files(); + while(keys %buffer) { + @ready = $s->can_read(0.01); + if(not @ready) { + add_files(); + } + for $infh (@ready) { + # There is only one key, namely the output file descriptor + for my $outfd (keys %{$buffer{$infh}}) { + # TODO test if 60800 is optimal (2^17 is used elsewhere) + $rv = sysread($infh, $buf, 60800); + if (!$rv) { + if($! == EAGAIN) { + # Would block: Nothing read + next; + } else { + # Nothing read, but would not block: + # This file is done + $s->remove($infh); + for(@{$buffer{$infh}{$outfd}}) { + syswrite($Global::fh{$outfd},$_); + } + delete $buffer{$infh}; + # Closing the $infh causes it to block + # close $infh; + add_files(); + next; + } + } + # Something read. + # Find \n or \r for full line + my $i = (rindex($buf,"\n")+1); + if($i) { + # Print full line + for(@{$buffer{$infh}{$outfd}}, substr($buf,0,$i)) { + syswrite($Global::fh{$outfd},$_); + } + # @buffer = remaining half line + $buffer{$infh}{$outfd} = [substr($buf,$i,$rv-$i)]; + } else { + # Something read, but not a full line + push @{$buffer{$infh}{$outfd}}, $buf; + } + redo; + } + } + } + } + } while($opened < $files_to_open); + + for (@producers) { + $_->join(); + } + + sub set_fh_non_blocking { + # Set filehandle as non-blocking + # Inputs: + # $fh = filehandle to be blocking + # Returns: + # N/A + my $fh = shift; + my $flags; + fcntl($fh, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags |= &O_NONBLOCK; # Add non-blocking to the flags + fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle + } + }'; + return ::spacefree(3, $script); +} + +sub sharder_script() { + my $script = q{ + use B; + # Column separator + my $sep = shift; + # Which columns to shard on (count from 1) + my $col = shift; + # Which columns to shard on (count from 0) + my $col0 = $col - 1; + # Perl expression + my $perlexpr = shift; + my $bins = @ARGV; + # Open fifos for writing, fh{0..$bins} + my $t = 0; + my %fh; + for(@ARGV) { + open $fh{$t++}, ">", $_; + # open blocks until it is opened by reader + # so unlink only happens when it is ready + unlink $_; + } + if($perlexpr) { + my $subref = eval("sub { no strict; no warnings; $perlexpr }"); + while() { + # Split into $col columns (no need to split into more) + @F = split $sep, $_, $col+1; + { + local $_ = $F[$col0]; + &$subref(); + $fh = $fh{ hex(B::hash($_))%$bins }; + } + print $fh $_; + } + } else { + while() { + # Split into $col columns (no need to split into more) + @F = split $sep, $_, $col+1; + $fh = $fh{ hex(B::hash($F[$col0]))%$bins }; + print $fh $_; + } + } + # Close all open fifos + close values %fh; + }; + return ::spacefree(1, $script); +} + +sub binner_script() { + my $script = q{ + use B; + # Column separator + my $sep = shift; + # Which columns to shard on (count from 1) + my $col = shift; + # Which columns to shard on (count from 0) + my $col0 = $col - 1; + # Perl expression + my $perlexpr = shift; + my $bins = @ARGV; + # Open fifos for writing, fh{0..$bins} + my $t = 0; + my %fh; + # Let the last output fifo be the 0'th + open $fh{$t++}, ">", pop @ARGV; + for(@ARGV) { + open $fh{$t++}, ">", $_; + # open blocks until it is opened by reader + # so unlink only happens when it is ready + unlink $_; + } + if($perlexpr) { + my $subref = eval("sub { no strict; no warnings; $perlexpr }"); + while() { + # Split into $col columns (no need to split into more) + @F = split $sep, $_, $col+1; + { + local $_ = $F[$col0]; + &$subref(); + $fh = $fh{ $_%$bins }; + } + print $fh $_; + } + } else { + while() { + # Split into $col columns (no need to split into more) + @F = split $sep, $_, $col+1; + $fh = $fh{ $F[$col0]%$bins }; + print $fh $_; + } + } + # Close all open fifos + close values %fh; + }; + return ::spacefree(1, $script); +} + +sub pipe_shard_setup() { + # Create temporary fifos + # Run 'shard.pl sep col fifo1 fifo2 fifo3 ... fifoN' in the background + # This will spread the input to fifos + # Generate commands that reads from fifo1..N: + # cat fifo | user_command + # Changes: + # @Global::cat_prepends + my @shardfifos; + my @parcatfifos; + # TODO $opt::jobs should be evaluated (100%) + # TODO $opt::jobs should be number of total_jobs if there are arguments + max_jobs_running(); + my $njobs = $Global::max_jobs_running; + for my $m (0..$njobs-1) { + for my $n (0..$njobs-1) { + # sharding to A B C D + # parcatting all As together + $parcatfifos[$n][$m] = $shardfifos[$m][$n] = tmpfifo(); + } + } + my $shardbin = ($opt::shard || $opt::bin); + my $script; + if($opt::bin) { + $script = binner_script(); + } else { + $script = sharder_script(); + } + + # cat foo | sharder sep col fifo1 fifo2 fifo3 ... fifoN + + if($shardbin =~ /^[a-z_][a-z_0-9]*(\s|$)/i) { + # Group by column name + # (Yes, this will also wrongly match a perlexpr like: chop) + my($read,$char,@line); + # A full line, but nothing more (the rest must be read by the child) + # $Global::header used to prepend block to each job + do { + $read = sysread(STDIN,$char,1); + push @line, $char; + } while($read and $char ne "\n"); + $Global::header = join "", @line; + } + my ($col, $perlexpr, $subref) = + column_perlexpr($shardbin, $Global::header, $opt::colsep); + if(not fork()) { + # Let the sharder inherit our stdin + # and redirect stdout to null + open STDOUT, ">","/dev/null"; + # The PERL_HASH_SEED must be the same for all sharders + # so B::hash will return the same value for any given input + $ENV{'PERL_HASH_SEED'} = $$; + exec qw(parallel -0 --block 100k -q --pipe -j), $njobs, + qw(--roundrobin -u perl -e), $script, ($opt::colsep || ","), + $col, $perlexpr, '{}', (map { (':::+', @{$_}) } @parcatfifos); + } + # For each fifo + # (rm fifo1; grep 1) < fifo1 + # (rm fifo2; grep 2) < fifo2 + # (rm fifo3; grep 3) < fifo3 + my $parcat = Q(parcat_script()); + if(not $parcat) { + ::error("'parcat' must be in path."); + ::wait_and_exit(255); + } + @Global::cat_prepends = + map { "perl -e $parcat ". + join(" ",shell_quote(@$_))." | "} @parcatfifos; +} + +sub pipe_part_files(@) { + # Given the bigfile: + # - find header and split positions + # - make commands that 'cat's the partial file + # Input: + # $file = the file to read + # Returns: + # @commands that will cat_partial each part + my ($file) = @_; + my $buf = ""; + if(not -f $file and not -b $file) { + ::error("--pipepart only works on seekable files, not streams/pipes.", + "$file is not a seekable file."); + ::wait_and_exit(255); + } + + my $fh = open_or_exit("<",$file); + my $firstlinelen = 0; + if($opt::skip_first_line) { + my $newline; + # Read a full line one byte at a time + while($firstlinelen += sysread($fh,$newline,1,0)) { + $newline eq "\n" and last; + } + } + my $header = find_header(\$buf,$fh); + # find positions + my @pos = find_split_positions($file,int($Global::blocksize), + $header,$firstlinelen); + # Make @cat_prepends + my @cat_prepends = (); + for(my $i=0; $i<$#pos; $i++) { + push(@cat_prepends, + cat_partial($file, $firstlinelen, $firstlinelen+length($header), + $pos[$i], $pos[$i+1])); + } + return @cat_prepends; +} + +sub find_header($$) { + # Compute the header based on $opt::header + # Input: + # $buf_ref = reference to read-in buffer + # $fh = filehandle to read from + # Uses: + # $opt::header + # $Global::blocksize + # $Global::header + # Returns: + # $header string + my ($buf_ref, $fh) = @_; + my $header = ""; + # $Global::header may be set in group_by_loop() + if($Global::header) { return $Global::header } + if($opt::header) { + if($opt::header eq ":") { $opt::header = "(.*\n)"; } + # Number = number of lines + $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e; + while(sysread($fh,$$buf_ref,int($Global::blocksize),length $$buf_ref)) { + if($$buf_ref =~ s/^($opt::header)//) { + $header = $1; + last; + } + } + } + return $header; +} + +sub find_split_positions($$$) { + # Find positions in bigfile where recend is followed by recstart + # Input: + # $file = the file to read + # $block = (minimal) --block-size of each chunk + # $header = header to be skipped + # Uses: + # $opt::recstart + # $opt::recend + # Returns: + # @positions of block start/end + my($file, $block, $header, $firstlinelen) = @_; + my $skiplen = $firstlinelen + length $header; + my $size = -s $file; + if(-b $file) { + # $file is a blockdevice + $size = size_of_block_dev($file); + } + $block = int $block; + if($opt::groupby) { + return split_positions_for_group_by($file,$size,$block, + $header,$firstlinelen); + } + # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20 + # The optimal dd blocksize for freebsd = 2^15..2^17 + # The optimal dd blocksize for ubuntu (AMD6376) = 2^16 + my $dd_block_size = 131072; # 2^17 + my @pos; + my ($recstart,$recend) = recstartrecend(); + my $recendrecstart = $recend.$recstart; + my $fh = ::open_or_exit("<",$file); + push(@pos,$skiplen); + for(my $pos = $block+$skiplen; $pos < $size; $pos += $block) { + my $buf; + if($recendrecstart eq "") { + # records ends anywhere + push(@pos,$pos); + } else { + # Seek the the block start + if(not sysseek($fh, $pos, 0)) { + ::error("Cannot seek to $pos in $file"); + edit(255); + } + while(sysread($fh,$buf,$dd_block_size,length $buf)) { + if($opt::regexp) { + # If match /$recend$recstart/ => Record position + if($buf =~ m:^(.*$recend)$recstart:os) { + # Start looking for next record _after_ this match + $pos += length($1); + push(@pos,$pos); + last; + } + } else { + # If match $recend$recstart => Record position + # TODO optimize to only look at the appended + # $dd_block_size + len $recendrecstart + # TODO increase $dd_block_size to optimize for longer records + my $i = index64(\$buf,$recendrecstart); + if($i != -1) { + # Start looking for next record _after_ this match + $pos += $i + length($recend); + push(@pos,$pos); + last; + } + } + } + } + } + if($pos[$#pos] != $size) { + # Last splitpoint was not at end of the file: add $size as the last + push @pos, $size; + } + close $fh; + return @pos; +} + +sub split_positions_for_group_by($$$$) { + my($fh); + my %value; + sub value_at($) { + my $pos = shift; + if(not defined $value{$pos}) { + if($pos != 0) { + seek($fh, $pos-1, 0) || die; + # Read half line + <$fh>; + } + # Read full line + my $linepos = tell($fh); + if(not defined $value{$linepos}) { + $_ = <$fh>; + if(defined $_) { + # Not end of file + my @F; + if(defined $group_by::col) { + $opt::colsep ||= "\t"; + @F = split /$opt::colsep/, $_; + $_ = $F[$group_by::col]; + } + eval $group_by::perlexpr; + } + $value{$linepos} = [$_,$linepos]; + } + $value{$pos} = $value{$linepos}; + } + return (@{$value{$pos}}); + } + + sub binary_search_end($$$) { + my ($s,$spos,$epos) = @_; + # value_at($spos) == $s + # value_at($epos) != $s + my $posdif = $epos - $spos; + my ($v,$vpos); + while($posdif) { + ($v,$vpos) = value_at($spos+$posdif); + if($v eq $s) { + $spos = $vpos; + $posdif = $epos - $spos; + } else { + $epos = $vpos; + } + $posdif = int($posdif/2); + } + return($v,$vpos); + } + + sub binary_search_start($$$) { + my ($s,$spos,$epos) = @_; + # value_at($spos) != $s + # value_at($epos) == $s + my $posdif = $epos - $spos; + my ($v,$vpos); + while($posdif) { + ($v,$vpos) = value_at($spos+$posdif); + if($v eq $s) { + $epos = $vpos; + } else { + $spos = $vpos; + $posdif = $epos - $spos; + } + $posdif = int($posdif/2); + } + return($v,$vpos); + } + + my ($file,$size,$block,$header,$firstlinelen) = @_; + my @pos; + $fh = open_or_exit("<",$file); + # Set $Global::group_by_column $Global::group_by_perlexpr + group_by_loop($fh,$opt::recsep); + if($opt::max_args) { + # Split after n values + my ($a,$apos); + # $xpos = linestart, $x = value at $xpos + $apos = $firstlinelen + length $header; + for(($a,$apos) = value_at($apos); $apos < $size;) { + push @pos, $apos; + ($a,$apos) = binary_search_end($a,$apos,$size); + if(eof($fh)) { + push @pos, $size; last; + } + } + # @pos = start of every value + # Merge n values + # -nX = keep every X'th position + my $i = 0; + @pos = grep { not ($i++ % $opt::max_args) } @pos; + } else { + # Split after any value group + # Preferable < $blocksize + my ($a,$b,$c,$apos,$bpos,$cpos); + # $xpos = linestart, $x = value at $xpos, $apos < $bpos < $cpos + $apos = $firstlinelen + length $header; + for(($a,$apos) = value_at($apos); $apos < $size;) { + push @pos, $apos; + $bpos = $apos + $block; + ($b,$bpos) = value_at($bpos); + if(eof($fh)) { + # EOF is less than 1 block away + push @pos, $size; last; + } + $cpos = $bpos + $block; + ($c,$cpos) = value_at($cpos); + if($a eq $b) { + while($b eq $c) { + # Move bpos, cpos a block forward until $a == $b != $c + $bpos = $cpos; + $cpos += $block; + ($c,$cpos) = value_at($cpos); + if($cpos >= $size) { + $cpos = $size; + last; + } + } + # $a == $b != $c + # Binary search for $b ending between ($bpos,$cpos) + ($b,$bpos) = binary_search_end($b,$bpos,$cpos); + } else { + if($b eq $c) { + # $a != $b == $c + # Binary search for $b starting between ($apos,$bpos) + ($b,$bpos) = binary_search_start($b,$apos,$bpos); + } else { + # $a != $b != $c + # Binary search for $b ending between ($bpos,$cpos) + ($b,$bpos) = binary_search_end($b,$bpos,$cpos); + } + } + ($a,$apos) = ($b,$bpos); + } + } + if($pos[$#pos] != $size) { + # Last splitpoint was not at end of the file: add it + push @pos, $size; + } + return @pos; +} + +sub cat_partial($@) { + # Efficient command to copy from byte X to byte Y + # Input: + # $file = the file to read + # ($start, $end, [$start2, $end2, ...]) = start byte, end byte + # Returns: + # Efficient command to copy $start..$end, $start2..$end2, ... to stdout + my($file, @start_end) = @_; + my($start, $i); + # Convert (start,end) to (start,len) + my @start_len = map { + if(++$i % 2) { $start = $_; } else { $_-$start } + } @start_end; + # The optimal block size differs + # It has been measured on: + # AMD 6376: n*4k-1; small n + # AMD Neo N36L: 44k-200k + # Intel i7-3632QM: 55k- + # ARM Cortex A53: 4k-28k + # Intel i5-2410M: 36k-46k + # + # I choose 2^15-1 = 32767 + # q{ + # expseq() { + # perl -E ' + # $last = pop @ARGV; + # $first = shift || 1; + # $inc = shift || 1.03; + # for($i=$first; $i<=$last;$i*=$inc) { say int $i } + # ' "$@" + # } + # + # seq 111111111 > big; + # f() { ppar --test $1 -a big --pipepart --block -1 'md5sum > /dev/null'; } + # export -f f; + # expseq 1000 1.001 300000 | shuf | parallel -j1 --jl jl-md5sum f; + # }; + my $script = spacefree + (0, + q{ + while(@ARGV) { + sysseek(STDIN,shift,0) || die; + $left = shift; + while($read = + sysread(STDIN,$buf, $left > 32767 ? 32767 : $left)){ + $left -= $read; + syswrite(STDOUT,$buf); + } + } + }); + return "<". Q($file) . + " perl -e '$script' @start_len |"; +} + +sub column_perlexpr($$$) { + # Compute the column number (if any), perlexpression from combined + # string (such as --shard key, --groupby key, {=n perlexpr=} + # Input: + # $column_perlexpr = string with column and perl expression + # $header = header from input file (if column is column name) + # $colsep = column separator regexp + # Returns: + # $col = column number + # $perlexpr = perl expression + # $subref = compiled perl expression as sub reference + my ($column_perlexpr, $header, $colsep) = @_; + my ($col, $perlexpr, $subref); + if($column_perlexpr =~ /^[-a-z0-9_]+(\s|$)/i) { + # Column name/number (possibly prefix) + if($column_perlexpr =~ s/^(-?\d+)(\s|$)//) { + # Column number (possibly prefix) + $col = $1; + } elsif($column_perlexpr =~ s/^([a-z0-9_]+)(\s+|$)//i) { + # Column name (possibly prefix) + my $colname = $1; + # Split on --copsep pattern + my @headers = split /$colsep/, $header; + my %headers; + @headers{@headers} = (1..($#headers+1)); + $col = $headers{$colname}; + if(not defined $col) { + ::error("Column '$colname' $colsep not found in header",keys %headers); + ::wait_and_exit(255); + } + } + } + # What is left of $column_perlexpr is $perlexpr (possibly empty) + $perlexpr = $column_perlexpr; + $subref = eval("sub { no strict; no warnings; $perlexpr }"); + return($col, $perlexpr, $subref); +} + +sub group_by_loop($$) { + # Generate perl code for group-by loop + # Insert a $recsep when the column value changes + # The column value can be computed with $perlexpr + my($fh,$recsep) = @_; + my $groupby = $opt::groupby; + if($groupby =~ /^[a-z_][a-z_0-9]*(\s|$)/i) { + # Group by column name + # (Yes, this will also wrongly match a perlexpr like: chop) + my($read,$char,@line); + # Read a full line, but nothing more + # (the rest must be read by the child) + # $Global::header used to prepend block to each job + do { + $read = sysread($fh,$char,1); + push @line, $char; + } while($read and $char ne "\n"); + $Global::header = join "", @line; + } + $opt::colsep ||= "\t"; + ($group_by::col, $group_by::perlexpr, $group_by::subref) = + column_perlexpr($groupby, $Global::header, $opt::colsep); + # Numbered 0..n-1 due to being used by $F[n] + if($group_by::col) { $group_by::col--; } + + my $loop = ::spacefree(0,q{ + BEGIN{ $last = "RECSEP"; } + { + local $_=COLVALUE; + PERLEXPR; + if(($last) ne $_) { + print "RECSEP"; + $last = $_; + } + } + }); + if(defined $group_by::col) { + $loop =~ s/COLVALUE/\$F[$group_by::col]/g; + } else { + $loop =~ s/COLVALUE/\$_/g; + } + $loop =~ s/PERLEXPR/$group_by::perlexpr/g; + $loop =~ s/RECSEP/$recsep/g; + return $loop; +} + +sub pipe_group_by_setup() { + # Record separator with 119 bit random value + $opt::recend = ''; + $opt::recstart = + join "", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20); + $opt::remove_rec_sep = 1; + my @filter; + push @filter, "perl"; + if($opt::groupby =~ /^[a-z0-9_]+(\s|$)/i) { + # This is column number/name + # Use -a (auto-split) + push @filter, "-a"; + $opt::colsep ||= "\t"; + my $sep = $opt::colsep; + $sep =~ s/\t/\\t/g; + $sep =~ s/\"/\\"/g; + # man perlrun: -Fpattern [...] You can't use literal whitespace + $sep =~ s/ /\\040/g; + push @filter, "-F$sep"; + } + push @filter, "-pe"; + push @filter, group_by_loop(*STDIN,$opt::recstart); + ::debug("init", "@filter\n"); + open(STDIN, '-|', @filter) || die ("Cannot start @filter"); + if(which("mbuffer")) { + # You get a speed up of 30% by going through mbuffer + open(STDIN, '-|', "mbuffer", "-q","-m6M","-b5") || + die ("Cannot start mbuffer"); + } +} + +sub spreadstdin() { + # read a record + # Spawn a job and print the record to it. + # Uses: + # $Global::blocksize + # STDIN + # $opt::r + # $Global::max_lines + # $Global::max_number_of_args + # $opt::regexp + # $Global::start_no_new_jobs + # $opt::roundrobin + # %Global::running + # Returns: N/A + + my $buf = ""; + my ($recstart,$recend) = recstartrecend(); + my $recendrecstart = $recend.$recstart; + my $chunk_number = 1; + my $one_time_through; + my $two_gb = 2**31-1; + my $blocksize = int($Global::blocksize); + my $in = *STDIN; + my $timeout = $Global::blocktimeout; + my @parts; + my $everything_read; + + if($opt::skip_first_line) { + my $newline; + # Read a full line one byte at a time + while(sysread($in,$newline,1,0)) { + $newline eq "\n" and last; + } + } + my $header = find_header(\$buf,$in); + my $anything_written; + my $eof; + my $garbage_read; + + sub read_with_alarm($) { + my ($readsize) = @_; + my ($nread,$alarm,$read_everything); + if($readsize < 0) { + $readsize = -$readsize; + $read_everything = 1; + } + # possibly interrupted by --blocktimeout + eval { + local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required + # --blocktimeout (or 0 if not set) + alarm $timeout; + if($] >= 5.026) { + do { + $nread = sysread $in, $buf, $readsize, length $buf; + if(not $read_everything) { $readsize -= $nread; } + } while($readsize and $nread); + } else { + # Less efficient reading, but 32-bit sysread compatible + do { + $nread = sysread($in,substr($buf,length $buf,0),$readsize,0); + if(not $read_everything) { $readsize -= $nread; } + } while($readsize and $nread); + } + alarm 0; + }; + if ($@) { + die unless $@ eq "alarm\n"; # propagate unexpected errors + $alarm = 1; + } else { + $alarm = 0; + } + # Is this EOF? + return not ($nread or $alarm); + } + + sub read_block_per_jobslot() { + if(not $everything_read) { + # Read everything in readsize of 2^17 + # 21 = 20s + # 20 = 16s + # 19 = 16s,15s + # 18 = 15s + # 17 = 14s + # 16 = 14s + # 15 = 14s + $eof = read_with_alarm(-2**17); + if($eof) { + $everything_read = 1; + # Trick the rest of the code to think we are not done reading blocks yet. + $eof = 0; + # Chop into parts + my $total_size = length($buf); + my $jobslots = $Global::max_jobs_running; + my $parts = -$Global::blocksize * $jobslots; + my $part_size = int($total_size / $parts); + for my $i (0 .. $parts - 1) { + my $start = $i * $part_size; + my $end = ($i == $parts - 1) ? $total_size : $start + $part_size; + # Extract the chunk from buffer + push @parts, substr($buf, $start, $end - $start); + } + $buf=""; + } else { + # What do we do here? Caused by alarm. + ::die_bug("Read block from pipe failed"); + } + } + if(@parts) { + # pop part and return that + $buf .= shift @parts; + } else { + # All parts are done: Let the rest of the code know we are EOF + $eof = 1; + } + } + + sub read_block() { + # Read a --blocksize from STDIN + if($Global::blocksize < 0) { + read_block_per_jobslot(); + } else { + # Add up to the next full block + my $readsize = $blocksize - (length $buf) % $blocksize; + $eof = read_with_alarm($readsize); + } + } + + sub pass_n_line_records() { + # Pass records of N lines + my $n_lines = $buf =~ tr/\n/\n/; + my $last_newline_pos = rindex64(\$buf,"\n"); + # Go backwards until there are full n-line records + while($n_lines % $Global::max_lines) { + $n_lines--; + $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1); + } + # Chop at $last_newline_pos as that is where n-line record ends + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$last_newline_pos+1); + shorten(\$buf,$last_newline_pos+1); + } + + sub pass_n_regexps() { + # Pass records of N regexps + # -N => (start..*?end){n} + # -L -N => (start..*?end){n*l} + if(not $garbage_read) { + $garbage_read = 1; + if($buf !~ /^$recstart/o) { + # Buf does not start with $recstart => There is garbage. + # Make a single record of the garbage + if($buf =~ + /(?s:^)( + (?:(?:(?!$recend$recstart)(?s:.))*?$recend) + ) + # Followed by recstart + (?=$recstart)/mox and length $1 > 0) { + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,length $1); + shorten(\$buf,length $1); + } + } + } + + my $n_records = + $Global::max_number_of_args * ($Global::max_lines || 1); + # (?!negative lookahead) is needed to avoid backtracking + # See: https://unix.stackexchange.com/questions/439356/ + # (?s:.) = (.|[\n]) but faster + while($buf =~ + /(?s:^)( + # n more times recstart.*recend + (?:$recstart(?:(?!$recend$recstart)(?s:.))*?$recend){$n_records} + ) + # Followed by recstart + (?=$recstart)/mox and length $1 > 0) { + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,length $1); + shorten(\$buf,length $1); + } + } + + sub pass_regexp() { + # Find the last recend-recstart in $buf + $eof and return; + # (?s:.) = (.|[\n]) but faster + if($buf =~ /^((?s:.)*$recend)$recstart(?s:.)*?$/mox) { + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,length $1); + shorten(\$buf,length $1); + } + } + + sub pass_csv_record() { + # Pass CVS record + # We define a CSV record as an even number of " + end of line + # This works if you use " as quoting character + my $last_newline_pos = length $buf; + # Go backwards from the last \n and search for a position + # where there is an even number of " + do { + # find last EOL + $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1); + # While uneven " + } while((substr($buf,0,$last_newline_pos) =~ y/"/"/)%2 + and $last_newline_pos >= 0); + # Chop at $last_newline_pos as that is where CSV record ends + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$last_newline_pos+1); + shorten(\$buf,$last_newline_pos+1); + } + + sub pass_n() { + # Pass n records of --recend/--recstart + # -N => (start..*?end){n} + my $i = 0; + my $read_n_lines = + $Global::max_number_of_args * ($Global::max_lines || 1); + while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1 + and + length $buf) { + $i += length $recend; # find the actual splitting location + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$i); + shorten(\$buf,$i); + } + } + + sub pass() { + # Pass records of --recend/--recstart + # Split record at fixed string + # Find the last recend+recstart in $buf + $eof and return; + my $i = rindex64(\$buf,$recendrecstart); + if($i != -1) { + $i += length $recend; # find the actual splitting location + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$i); + shorten(\$buf,$i); + } + } + + sub increase_blocksize_maybe() { + if(not $anything_written + and not $opt::blocktimeout + and not $Global::no_autoexpand_block) { + # Nothing was written - maybe the block size < record size? + # Increase blocksize exponentially up to 2GB-1 (2GB causes problems) + if($blocksize < $two_gb) { + my $old_blocksize = $blocksize; + $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb); + ::warning("A record was longer than $old_blocksize. " . + "Increasing to --blocksize $blocksize."); + } + } + } + + while(1) { + $anything_written = 0; + read_block(); + if($opt::r) { + # Remove empty lines + $buf =~ s/^\s*\n//gm; + if(length $buf == 0) { + if($eof) { + last; + } else { + next; + } + } + } + if($Global::max_lines and not $Global::max_number_of_args) { + # Pass n-line records + pass_n_line_records(); + } elsif($opt::csv) { + # Pass a full CSV record + pass_csv_record(); + } elsif($opt::regexp) { + # Split record at regexp + if($Global::max_number_of_args) { + pass_n_regexps(); + } else { + pass_regexp(); + } + } else { + # Pass normal --recend/--recstart record + if($Global::max_number_of_args) { + pass_n(); + } else { + pass(); + } + } + $eof and last; + increase_blocksize_maybe(); + ::debug("init", "Round\n"); + } + ::debug("init", "Done reading input\n"); + + # If there is anything left in the buffer write it + write_record_to_pipe($chunk_number++, \$header, \$buf, $recstart, + $recend, length $buf); + + if($opt::retries) { + $Global::no_more_input = 1; + # We need to start no more jobs: At most we need to retry some + # of the already running. + my @running = values %Global::running; + # Stop any virgins. + for my $job (@running) { + if(defined $job and $job->virgin()) { + close $job->fh(0,"w"); + } + } + # Wait for running jobs to be done + my $sleep = 1; + while($Global::total_running > 0) { + $sleep = ::reap_usleep($sleep); + start_more_jobs(); + } + } + $Global::start_no_new_jobs ||= 1; + if($opt::roundrobin) { + # Flush blocks to roundrobin procs + my $sleep = 1; + while(%Global::running) { + my $something_written = 0; + for my $job (values %Global::running) { + if($job->block_length()) { + $something_written += $job->non_blocking_write(); + } else { + close $job->fh(0,"w"); + } + } + if($something_written) { + $sleep = $sleep/2+0.001; + } + $sleep = ::reap_usleep($sleep); + } + } +} + +sub recstartrecend() { + # Uses: + # $opt::recstart + # $opt::recend + # Returns: + # $recstart,$recend with default values and regexp conversion + my($recstart,$recend); + if(defined($opt::recstart) and defined($opt::recend)) { + # If both --recstart and --recend is given then both must match + $recstart = $opt::recstart; + $recend = $opt::recend; + } elsif(defined($opt::recstart)) { + # If --recstart is given it must match start of record + $recstart = $opt::recstart; + $recend = ""; + } elsif(defined($opt::recend)) { + # If --recend is given then it must match end of record + $recstart = ""; + $recend = $opt::recend; + if($opt::regexp and $recend eq '') { + # --regexp --recend '' + $recend = '(?s:.)'; + } + } + + if($opt::regexp) { + # Do not allow /x comments - to avoid having to quote space + $recstart = "(?-x:".$recstart.")"; + $recend = "(?-x:".$recend.")"; + # If $recstart/$recend contains '|' + # the | should only apply to the regexp + $recstart = "(?:".$recstart.")"; + $recend = "(?:".$recend.")"; + } else { + # $recstart/$recend = printf strings (\n) + $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; + $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; + } + return ($recstart,$recend); +} + +sub nindex($$) { + # See if string is in buffer N times + # Returns: + # the position where the Nth copy is found + my ($buf_ref, $str, $n) = @_; + my $i = 0; + for(1..$n) { + $i = index64($buf_ref,$str,$i+1); + if($i == -1) { last } + } + return $i; +} + +{ + my @robin_queue; + my $sleep = 1; + + sub round_robin_write($$$$$) { + # Input: + # $header_ref = ref to $header string + # $block_ref = ref to $block to be written + # $recstart = record start string + # $recend = record end string + # $endpos = end position of $block + # Uses: + # %Global::running + # Returns: + # $something_written = amount of bytes written + my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_; + my $written = 0; + my $block_passed = 0; + while(not $block_passed) { + # Continue flushing existing buffers + # until one is empty and a new block is passed + if(@robin_queue) { + # Rotate queue once so new blocks get a fair chance + # to be given to another slot + push @robin_queue, shift @robin_queue; + } else { + # Make a queue to spread the blocks evenly + push @robin_queue, (sort { $a->seq() <=> $b->seq() } + values %Global::running); + } + do { + $written = 0; + for my $job (@robin_queue) { + if($job->block_length() > 0) { + $written += $job->non_blocking_write(); + } else { + $job->set_block($header_ref, $buffer_ref, + $endpos, $recstart, $recend); + $block_passed = 1; + $written += $job->non_blocking_write(); + last; + } + } + if($written) { + $sleep = $sleep/1.5+0.001; + } + # Don't sleep if something is written + } while($written and not $block_passed); + $sleep = ::reap_usleep($sleep); + } + return $written; + } +} + +sub index64($$$) { + # Do index on strings > 2GB. + # index in Perl < v5.22 does not work for > 2GB + # Input: + # as index except STR which must be passed as a reference + # Output: + # as index + my $ref = shift; + my $match = shift; + my $pos = shift || 0; + my $max2gb = 2**31-1; + my $strlen = length($$ref); + # No point in doing extra work if we don't need to. + if($strlen < $max2gb or $] > 5.022) { + return index($$ref, $match, $pos); + } + + my $matchlen = length($match); + my $ret; + my $offset = $pos; + while($offset < $strlen) { + $ret = index( + substr($$ref, $offset, $max2gb), + $match, $pos-$offset); + if($ret != -1) { + return $ret + $offset; + } + $offset += ($max2gb - $matchlen - 1); + } + return -1; +} + +sub rindex64($@) { + # Do rindex on strings > 2GB. + # rindex in Perl < v5.22 does not work for > 2GB + # Input: + # as rindex except STR which must be passed as a reference + # Output: + # as rindex + my $ref = shift; + my $match = shift; + my $pos = shift; + my $block_size = 2**31-1; + my $strlen = length($$ref); + # Default: search from end + $pos = defined $pos ? $pos : $strlen; + # No point in doing extra work if we don't need to. + if($strlen < $block_size or $] > 5.022) { + return rindex($$ref, $match, $pos); + } + + my $matchlen = length($match); + my $ret; + my $offset = $pos - $block_size + $matchlen; + if($offset < 0) { + # The offset is less than a $block_size + # Set the $offset to 0 and + # Adjust block_size accordingly + $block_size = $block_size + $offset; + $offset = 0; + } + while($offset >= 0) { + $ret = rindex( + substr($$ref, $offset, $block_size), + $match); + if($ret != -1) { + return $ret + $offset; + } + $offset -= ($block_size - $matchlen - 1); + } + return -1; +} + +sub shorten($$) { + # Do: substr($buf,0,$i) = ""; + # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks + # Input: + # $buf_ref = \$buf + # $i = position to shorten to + # Returns: N/A + my ($buf_ref, $i) = @_; + my $two_gb = 2**31-1; + while($i > $two_gb) { + substr($$buf_ref,0,$two_gb) = ""; + $i -= $two_gb; + } + substr($$buf_ref,0,$i) = ""; +} + +sub write_record_to_pipe($$$$$$) { + # Fork then + # Write record from pos 0 .. $endpos to pipe + # Input: + # $chunk_number = sequence number - to see if already run + # $header_ref = reference to header string to prepend + # $buffer_ref = reference to record to write + # $recstart = start string of record + # $recend = end string of record + # $endpos = position in $buffer_ref where record ends + # Uses: + # $Global::job_already_run + # $opt::roundrobin + # @Global::virgin_jobs + # Returns: + # Number of chunks written (0 or 1) + my ($chunk_number, $header_ref, $buffer_ref, + $recstart, $recend, $endpos) = @_; + if($endpos == 0) { return 0; } + if(vec($Global::job_already_run,$chunk_number,1)) { return 1; } + if($opt::roundrobin) { + # Write the block to one of the already running jobs + return round_robin_write($header_ref, $buffer_ref, + $recstart, $recend, $endpos); + } + # If no virgin found, backoff + my $sleep = 0.0001; # 0.01 ms - better performance on highend + while(not @Global::virgin_jobs) { + ::debug("pipe", "No virgin jobs"); + $sleep = ::reap_usleep($sleep); + # Jobs may not be started because of loadavg + # or too little time between each ssh login + # or retrying failed jobs. + start_more_jobs(); + } + my $job = shift @Global::virgin_jobs; + $job->set_block($header_ref, $buffer_ref, $endpos, $recstart, $recend); + $job->write_block(); + return 1; +} + + +sub __SEM_MODE__() {} + + +sub acquire_semaphore() { + # Acquires semaphore. If needed: spawns to the background + # Uses: + # @Global::host + # Returns: + # The semaphore to be released when jobs is complete + $Global::host{':'} = SSHLogin->new(":"); + my $sem = Semaphore->new($Semaphore::name, + $Global::host{':'}->max_jobs_running()); + $sem->acquire(); + if($Semaphore::fg) { + # skip + } else { + if(fork()) { + exit(0); + } else { + # If run in the background, the PID will change + $sem->pid_change(); + } + } + return $sem; +} + + +sub __PARSE_OPTIONS__() {} + +sub shell_completion() { + if($opt::shellcompletion eq "zsh") { + # if shell == zsh + zsh_competion(); + } elsif($opt::shellcompletion eq "bash") { + # if shell == bash + bash_competion(); + } elsif($opt::shellcompletion eq "auto") { + if($Global::shell =~ m:/zsh$|^zsh$:) { + # if shell == zsh + zsh_competion(); + } elsif($Global::shell =~ m:/bash$|^bash$:) { + # if shell == bash + bash_competion(); + } else { + ::error("--shellcompletion is not implemented for ". + "'$Global::shell'."); + wait_and_exit(255); + } + } else { + ::error("--shellcompletion is not implemented for ". + "'$opt::shellcompletion'."); + wait_and_exit(255); + } +} + +sub bash_competion() { + # Print: + # complete -F _comp_parallel parallel; + # _comp_parallel() { + # COMPREPLY=($(compgen -W "--options" -- + # "${COMP_WORDS[$COMP_CWORD]}")); + # }; + my @bash_completion = + ("complete -F _comp_parallel parallel;", + '_comp_parallel() { COMPREPLY=($(compgen -W "'); + my @och = options_completion_hash(); + while(@och) { + $_ = shift @och; + # Split input like: + # "joblog|jl=s[Logfile for executed jobs]:logfile:_files" + if(/^(.*?)(\[.*?])?(:[^:]*)?(:.*)?$/) { + my $opt = $1; + my $desc = $2; + my $argdesc = $3; + my $func = $4; + # opt=s => opt + $opt =~ s/[:=].$//; + if($opt =~ /^_/) { + # internal options start with --_ + # skip + } else { + push @bash_completion, + (map { (length $_ == 1) ? "-$_ " : "--$_ " } + split /\|/, $opt); + } + } + shift @och; + } + push @bash_completion,'" -- "${COMP_WORDS[$COMP_CWORD]}")); };'."\n"; + print @bash_completion; +} + +sub zsh_competion() { + # Print code used for completion in zsh + my @zsh_completion = + ("compdef _comp_parallel parallel; ", + "setopt localoptions extended_glob; ", + "_comp_parallel() { ", + "_arguments "); + my @och = options_completion_hash(); + while(@och) { + $_ = shift @och; + # Split input like: + # "joblog|jl=s[Logfile for executed jobs]:logfile:_files" + if(/^(.*?)(\[.*?])?(:[^:]*)?(:.*)?$/) { + my $opt = $1; + my $desc = $2; + my $argdesc = $3; + my $func = $4; + # opt=s => opt + $opt =~ s/[:=].$//; + if($opt =~ /^_/) { + # internal options start with --_ + # skip + } else { + # {-o,--option} + my $zsh_opt = join(",", + (map { (length $_ == 1) ? "-$_" : "--$_" } + split /\|/, $opt)); + if($zsh_opt =~ /,/) { $zsh_opt = "{$zsh_opt}"; } + $desc =~ s/'/'"'"'/g; + $argdesc =~ s/'/'"'"'/g; + $func =~ s/'/'"'"'/g; + push @zsh_completion, $zsh_opt."'".$desc.$argdesc.$func."' "; + } + } + shift @och; + } + push @zsh_completion, + q{'(-)1:command:{_command_names -e}' }, + q{'*::arguments:_normal'}, + "};\n"; + print @zsh_completion; +} + +sub options_hash() { + # Returns: + # %hash = for GetOptions + my %och = options_completion_hash(); + my %oh; + my ($k,$v); + while(($k,$v) = each %och) { + # Remove description + $k =~ s/\[.*//; + $oh{$k} = $v; + } + return %oh; +} + +sub options_completion_hash() { + # Returns: + # %hash = for GetOptions and shell completion + return + ("debug|D=s" => \$opt::D, + "xargs[Insert as many arguments as the command line length permits]" + => \$opt::xargs, + "m[Multiple arguments]" => \$opt::m, + ("X[Insert as many arguments with context as the command line ". + "length permits]" + => \$opt::X), + "v[Verbose]" => \@opt::v, + "sql=s[Use --sql-master instead (obsolete)]:DBURL" => \$opt::retired, + ("sql-master|sqlmaster=s". + "[Submit jobs via SQL server. DBURL must point to a table, which ". + "will contain --joblog, the values, and output]:DBURL" + => \$opt::sqlmaster), + ("sql-worker|sqlworker=s". + "[Execute jobs via SQL server. Read the input sources variables ". + "from the table pointed to by DBURL.]:DBURL" + => \$opt::sqlworker), + ("sql-and-worker|sqlandworker=s". + "[--sql-master DBURL --sql-worker DBURL]:DBURL" + => \$opt::sqlandworker), + ("joblog|jl=s[Logfile for executed jobs]:logfile:_files" + => \$opt::joblog), + ("results|result|res=s[Save the output into files]:name:_files" + => \$opt::results), + "resume[Resumes from the last unfinished job]" => \$opt::resume, + ("resume-failed|resumefailed". + "[Retry all failed and resume from the last unfinished job]" + => \$opt::resume_failed), + ("retry-failed|retryfailed[Retry all failed jobs in joblog]" + => \$opt::retry_failed), + "silent[Silent]" => \$opt::silent, + ("keep-order|keeporder|k". + "[Keep sequence of output same as the order of input]" + => \$opt::keeporder), + ("no-keep-order|nokeeporder|nok|no-k". + "[Overrides an earlier --keep-order (e.g. if set in ". + "~/.parallel/config)]" + => \$opt::nokeeporder), + "group[Group output]" => \$opt::group, + "g" => \$opt::retired, + ("ungroup|u". + "[Output is printed as soon as possible and bypasses GNU parallel ". + "internal processing]" + => \$opt::ungroup), + ("latest-line|latestline|ll". + "[Print latest line of each job]" + => \$opt::latestline), + ("line-buffer|line-buffered|linebuffer|linebuffered|lb". + "[Buffer output on line basis]" + => \$opt::linebuffer), + ("tmux". + "[Use tmux for output. Start a tmux session and run each job in a ". + "window in that session. No other output will be produced]" + => \$opt::tmux), + ("tmux-pane|tmuxpane". + "[Use tmux for output but put output into panes in the first ". + "window. Useful if you want to monitor the progress of less than ". + "100 concurrent jobs]" + => \$opt::tmuxpane), + "null|0[Use NUL as delimiter]" => \$opt::null, + "quote|q[Quote command]" => \$opt::quote, + # Replacement strings + ("parens=s[Use parensstring instead of {==}]:parensstring" + => \$opt::parens), + ('rpl=s[Define replacement string]:"tag perl expression"' + => \@opt::rpl), + "plus[Add more replacement strings]" => \$opt::plus, + ("I=s". + "[Use the replacement string replace-str instead of {}]:replace-str" + => \$opt::I), + ("extensionreplace|er=s". + "[Use the replacement string replace-str instead of {.} for input ". + "line without extension]:replace-str" + => \$opt::U), + "U=s" => \$opt::retired, + ("basenamereplace|bnr=s". + "[Use the replacement string replace-str instead of {/} for ". + "basename of input line]:replace-str" + => \$opt::basenamereplace), + ("dirnamereplace|dnr=s". + "[Use the replacement string replace-str instead of {//} for ". + "dirname of input line]:replace-str" + => \$opt::dirnamereplace), + ("basenameextensionreplace|bner=s". + "[Use the replacement string replace-str instead of {/.} for ". + "basename of input line without extension]:replace-str" + => \$opt::basenameextensionreplace), + ("seqreplace=s". + "[Use the replacement string replace-str instead of {#} for job ". + "sequence number]:replace-str" + => \$opt::seqreplace), + ("slotreplace=s". + "[Use the replacement string replace-str instead of {%} for job ". + "slot number]:replace-str" + => \$opt::slotreplace), + ("delay=s". + "[Delay starting next job by duration]:duration" => \$opt::delay), + ("ssh-delay|sshdelay=f". + "[Delay starting next ssh by duration]:duration" + => \$opt::sshdelay), + ("load=s". + "[Only start jobs if load is less than max-load]:max-load" + => \$opt::load), + "noswap[Do not start job is computer is swapping]" => \$opt::noswap, + ("max-line-length-allowed|maxlinelengthallowed". + "[Print maximal command line length]" + => \$opt::max_line_length_allowed), + ("number-of-cpus|numberofcpus". + "[Print the number of physical CPU cores and exit (obsolete)]" + => \$opt::number_of_cpus), + ("number-of-sockets|numberofsockets". + "[Print the number of CPU sockets and exit]" + => \$opt::number_of_sockets), + ("number-of-cores|numberofcores". + "[Print the number of physical CPU cores and exit]" + => \$opt::number_of_cores), + ("number-of-threads|numberofthreads". + "[Print the number of hyperthreaded CPU cores and exit]" + => \$opt::number_of_threads), + ("use-sockets-instead-of-threads|usesocketsinsteadofthreads". + "[Determine how GNU Parallel counts the number of CPUs]" + => \$opt::use_sockets_instead_of_threads), + ("use-cores-instead-of-threads|usecoresinsteadofthreads". + "[Determine how GNU Parallel counts the number of CPUs]" + => \$opt::use_cores_instead_of_threads), + ("use-cpus-instead-of-cores|usecpusinsteadofcores". + "[Determine how GNU Parallel counts the number of CPUs]" + => \$opt::use_cpus_instead_of_cores), + ("shell-quote|shellquote|shell_quote". + "[Does not run the command but quotes it. Useful for making ". + "quoted composed commands for GNU parallel]" + => \@opt::shellquote), + ('nice=i[Run the command at this niceness]:niceness:($(seq -20 19))' + => \$opt::nice), + "tag[Tag lines with arguments]" => \$opt::tag, + ("tag-string|tagstring=s". + "[Tag lines with a string]:str" => \$opt::tagstring), + "ctag[Color tag]:str" => \$opt::ctag, + "ctag-string|ctagstring=s[Colour tagstring]:str" => \$opt::ctagstring, + "color|colour[Colourize output]" => \$opt::color, + ("color-failed|colour-failed|colorfailed|colourfailed|". + "color-fail|colour-fail|colorfail|colourfail|cf". + "[Colour failed jobs red]" + => \$opt::colorfailed), + ("onall[Run all the jobs on all computers given with --sshlogin]" + => \$opt::onall), + "nonall[--onall with no arguments]" => \$opt::nonall, + ("filter-hosts|filterhosts|filter-host[Remove down hosts]" + => \$opt::filter_hosts), + ('sshlogin|S=s'. + '[Distribute jobs to remote computers]'. + ':[@hostgroups/][ncpus/]sshlogin'. + '[,[@hostgroups/][ncpus/]sshlogin[,...]] or @hostgroup'. + ':_users') => \@opt::sshlogin, + ("sshloginfile|slf=s". + "[File with sshlogins on separate lines. Lines starting with '#' ". + "are ignored.]:filename:_files" + => \@opt::sshloginfile), + ("controlmaster|M". + "[Use ssh's ControlMaster to make ssh connections faster]" + => \$opt::controlmaster), + ("ssh=s". + "[Use this command instead of ssh for remote access]:sshcommand" + => \$opt::ssh), + ("transfer-file|transferfile|transfer-files|transferfiles|tf=s". + "[Transfer filename to remote computers]:filename:_files" + => \@opt::transfer_files), + ("return=s[Transfer files from remote computers]:filename:_files" + => \@opt::return), + ("trc=s[--transfer --return filename --cleanup]:filename:_files" + => \@opt::trc), + "transfer[Transfer files to remote computers]" => \$opt::transfer, + "cleanup[Remove transferred files]" => \$opt::cleanup, + ("basefile|bf=s". + "[Transfer file to each sshlogin before first job is started]". + ":file:_files" + => \@opt::basefile), + ("template|tmpl=s". + "[Replace replacement strings in file and save it in repl]". + ":file=repl:_files" + => \%opt::template), + "B=s" => \$opt::retired, + "ctrl-c|ctrlc" => \$opt::retired, + "no-ctrl-c|no-ctrlc|noctrlc" => \$opt::retired, + ("work-dir|workdir|wd=s". + "[Jobs will be run in the dir mydir. (default: the current dir ". + "for the local machine, the login dir for remote computers)]". + ":mydir:_cd" + => \$opt::workdir), + "W=s" => \$opt::retired, + ("rsync-opts|rsyncopts=s[Options to pass on to rsync]:options" + => \$opt::rsync_opts), + ("tmpdir|tempdir=s[Directory for temporary files]:dirname:_cd" + => \$opt::tmpdir), + ("use-compress-program|compress-program|". + "usecompressprogram|compressprogram=s". + "[Use prg for compressing temporary files]:prg:_commands" + => \$opt::compress_program), + ("use-decompress-program|decompress-program|". + "usedecompressprogram|decompressprogram=s". + "[Use prg for decompressing temporary files]:prg:_commands" + => \$opt::decompress_program), + "compress[Compress temporary files]" => \$opt::compress, + "open-tty|o[Open terminal tty]" => \$opt::open_tty, + "tty[Open terminal tty]" => \$opt::tty, + "T" => \$opt::retired, + "H=i" => \$opt::retired, + ("dry-run|dryrun|dr". + "[Print the job to run on stdout (standard output), but do not ". + "run the job]" + => \$opt::dryrun), + "progress[Show progress of computations]" => \$opt::progress, + ("eta[Show the estimated number of seconds before finishing]" + => \$opt::eta), + "bar[Show progress as a progress bar]" => \$opt::bar, + ("total-jobs|totaljobs|total=s". + "[Set total number of jobs]" => \$opt::totaljobs), + "shuf[Shuffle jobs]" => \$opt::shuf, + ("arg-sep|argsep=s". + "[Use sep-str instead of ::: as separator string]:sep-str" + => \$opt::arg_sep), + ("arg-file-sep|argfilesep=s". + "[Use sep-str instead of :::: as separator string ". + "between command and argument files]:sep-str" + => \$opt::arg_file_sep), + ('trim=s[Trim white space in input]:trim_method:'. + '((n\:"No trim" l\:"Left\ trim" r\:"Right trim" '. + 'lr\:"Both trim" rl\:"Both trim"))' + => \$opt::trim), + "env=s[Copy environment variable var]:var:_vars" => \@opt::env, + "recordenv|record-env[Record environment]" => \$opt::record_env, + ('session'. + '[Record names in current environment in $PARALLEL_IGNORED_NAMES '. + 'and exit. Only used with env_parallel. '. + 'Aliases, functions, and variables with names i]' + => \$opt::session), + ('plain[Ignore --profile, $PARALLEL, and ~/.parallel/config]' + => \$opt::plain), + ("profile|J=s". + "[Use profile profilename for options]:profilename:_files" + => \@opt::profile), + "tollef" => \$opt::tollef, + "gnu[Behave like GNU parallel]" => \$opt::gnu, + "link|xapply[Link input sources]" => \$opt::link, + "linkinputsource|xapplyinputsource=i" => \@opt::linkinputsource, + # Before changing these lines, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be put in a public hall-of-shame by removing + # these lines + ("bibtex|citation". + "[Print the citation notice and BibTeX entry for GNU parallel, ". + "silence citation notice for all future runs, and exit. ". + "It will not run any commands]" + => \$opt::citation), + "will-cite|willcite|nn|nonotice|no-notice" => \$opt::willcite, + # Termination and retries + ('halt-on-error|haltonerror|halt=s'. + '[When should GNU parallel terminate]'. + ':when:((now\:"kill all running jobs and halt immediately" '. + 'soon\:"wait for all running jobs to complete, start no new jobs"))' + => \$opt::halt), + 'limit=s[Dynamic job limit]:"command args"' => \$opt::limit, + ("memfree=s". + "[Minimum memory free when starting another job]:size" + => \$opt::memfree), + ("memsuspend=s". + "[Suspend jobs when there is less memory available]:size" + => \$opt::memsuspend), + "retries=s[Try failing jobs n times]:n" => \$opt::retries, + ("timeout=s". + "[Time out for command. If the command runs for longer than ". + "duration seconds it will get killed as per --term-seq]:duration" + => \$opt::timeout), + ("term-seq|termseq=s". + "[Termination sequence]:sequence" => \$opt::termseq), + # xargs-compatibility - implemented, man, testsuite + ("max-procs|maxprocs|P|jobs|j=s". + "[Add N to/Subtract N from/Multiply N% with/ the number of CPU ". + "threads or read parameter from file]:+N/-N/N%/N/procfile:_files" + => \$opt::jobs), + ("delimiter|d=s[Input items are terminated by delim]:delim" + => \$opt::d), + ("max-chars|maxchars|s=s[Limit length of command]:max-chars" + => \$opt::max_chars), + ("arg-file|argfile|a=s". + "[Use input-file as input source]:input-file:_files" => \@opt::a), + "no-run-if-empty|norunifempty|r[Do not run empty input]" => \$opt::r, + ("replace|i:s". + "[This option is deprecated; use -I instead]:replace-str" + => \$opt::i), + "E=s" => \$opt::eof, + ("eof|e:s[Set the end of file string to eof-str]:eof-str" + => \$opt::eof), + ("process-slot-var|processslotvar=s". + "[Set this variable to job slot number]:varname" + => \$opt::process_slot_var), + ("max-args|maxargs|n=s". + "[Use at most max-args arguments per command line]:max-args" + => \$opt::max_args), + ("max-replace-args|maxreplaceargs|N=s". + "[Use at most max-args arguments per command line]:max-args" + => \$opt::max_replace_args), + "col-sep|colsep|C=s[Column separator]:regexp" => \$opt::colsep, + "match=s[Matching regexp]:regexp" => \@opt::match, + "csv[Treat input as CSV-format]"=> \$opt::csv, + ("help|h[Print a summary of the options to GNU parallel and exit]" + => \$opt::help), + ("L=s[When used with --pipe: Read records of recsize]:recsize" + => \$opt::L), + ("max-lines|maxlines|l:f". + "[When used with --pipe: Read records of recsize lines]:recsize" + => \$opt::max_lines), + "interactive|p[Ask user before running a job]" => \$opt::interactive, + ("verbose|t[Print the job to be run on stderr (standard error)]" + => \$opt::verbose), + ("version|V[Print the version GNU parallel and exit]" + => \$opt::version), + ('min-version|minversion=i'. + '[Print the version GNU parallel and exit]'. + ':version:($(parallel --minversion 0))' + => \$opt::minversion), + ("show-limits|showlimits". + "[Display limits given by the operating system]" + => \$opt::show_limits), + ("exit|x[Exit if the size (see the -s option) is exceeded]" + => \$opt::x), + # Semaphore + "semaphore[Work as a counting semaphore]" => \$opt::semaphore, + ("semaphore-timeout|semaphoretimeout|st=s". + "[If secs > 0: If the semaphore is not released within secs ". + "seconds, take it anyway]:secs" + => \$opt::semaphoretimeout), + ("semaphore-name|semaphorename|id=s". + "[Use name as the name of the semaphore]:name" + => \$opt::semaphorename), + "fg[Run command in foreground]" => \$opt::fg, + "bg[Run command in background]" => \$opt::bg, + "wait[Wait for all commands to complete]" => \$opt::wait, + # Shebang #!/usr/bin/parallel --shebang + ("shebang|hashbang". + "[GNU parallel can be called as a shebang (#!) command as the ". + "first line of a script. The content of the file will be treated ". + "as inputsource]" + => \$opt::shebang), + ("_pipe-means-argfiles[Internal: Called by --shebang-wrap]" + => \$opt::_pipe_means_argfiles), + "Y" => \$opt::retired, + ("skip-first-line|skipfirstline". + "[Do not use the first line of input]" + => \$opt::skip_first_line), + "_bug" => \$opt::_bug, + "_unsafe" => \$opt::_unsafe, + # --pipe + ("pipe|spreadstdin". + "[Spread input to jobs on stdin (standard input)]" => \$opt::pipe), + ("round-robin|roundrobin|round". + "[Distribute chunks of standard input in a round robin fashion]" + => \$opt::roundrobin), + "recstart=s" => \$opt::recstart, + ("recend=s". + "[Split record between endstring and startstring]:endstring" + => \$opt::recend), + ("regexp|regex". + "[Interpret --recstart and --recend as regular expressions]" + => \$opt::regexp), + ("remove-rec-sep|removerecsep|rrs". + "[Remove record separator]" => \$opt::remove_rec_sep), + ("output-as-files|outputasfiles|files[Save output to files]" + => \$opt::files), + ("output-as-files0|outputasfiles0|files0". + "[Save output to files separated by NUL]" + => \$opt::files0), + ("block-size|blocksize|block=s". + "[Size of block in bytes to read at a time]:size" + => \$opt::blocksize), + ("block-timeout|blocktimeout|bt=s". + "[Timeout for reading block when using --pipe]:duration" + => \$opt::blocktimeout), + "header=s[Use regexp as header]:regexp" => \$opt::header, + "cat[Create a temporary file with content]" => \$opt::cat, + "fifo[Create a temporary fifo with content]" => \$opt::fifo, + ("pipe-part|pipepart[Pipe parts of a physical file]" + => \$opt::pipepart), + "tee[Pipe all data to all jobs]" => \$opt::tee, + ("shard=s". + "[Use shardexpr as shard key and shard input to the jobs]:shardexpr" + => \$opt::shard), + ("bin=s". + "[Use binexpr as binning key and bin input to the jobs]:binexpr" + => \$opt::bin), + "group-by|groupby=s[Group input by value]:val" => \$opt::groupby, + # + ("hgrp|hostgrp|hostgroup|hostgroups[Enable hostgroups on arguments]" + => \$opt::hostgroups), + "embed[Embed GNU parallel in a shell script]" => \$opt::embed, + ("filter=s[Only run jobs where filter is true]:filter" + => \@opt::filter), + "combineexec|combine-exec|combineexecutable|combine-executable=s". + "[Embed GNU parallel in a shell script]" => \$opt::combineexec, + ("filter=s[Only run jobs where filter is true]:filter" + => \@opt::filter), + "fast[Run commands fast]" => \$opt::fast, + "_parset=s[Internal: Generate shell code for parset]" => \$opt::_parset, + ("shell-completion|shellcompletion=s". + "[Generate shell code for shell completion]:shell:(bash zsh)" + => \$opt::shellcompletion), + # Parameter for testing optimal values + "_test=s" => \$opt::_test, + ); +} + +sub get_options_from_array($@) { + # Run GetOptions on @array + # Input: + # $array_ref = ref to @ARGV to parse + # @keep_only = Keep only these options (e.g. --profile) + # Uses: + # @ARGV + # Returns: + # true if parsing worked + # false if parsing failed + # @$array_ref is changed + my ($array_ref, @keep_only) = @_; + if(not @$array_ref) { + # Empty array: No need to look more at that + return 1; + } + # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not + # supported everywhere + my @save_argv; + my $this_is_ARGV = (\@::ARGV == $array_ref); + if(not $this_is_ARGV) { + @save_argv = @::ARGV; + @::ARGV = @{$array_ref}; + } + # If @keep_only set: Ignore all values except @keep_only + my %options = options_hash(); + if(@keep_only) { + my (%keep,@dummy); + @keep{@keep_only} = @keep_only; + for my $k (grep { not $keep{$_} } keys %options) { + # Store the value of the option in @dummy + $options{$k} = \@dummy; + } + } + my $retval = GetOptions(%options); + if(not $this_is_ARGV) { + @{$array_ref} = @::ARGV; + @::ARGV = @save_argv; + } + return $retval; +} + +sub parse_parset() { + $Global::progname = "parset"; + @Global::parset_vars = split /[ ,]/, $opt::_parset; + my $var_or_assoc = shift @Global::parset_vars; + # Legal names: var _v2ar arrayentry[2] + my @illegal = (grep { not /^[a-zA-Z_][a-zA-Z_0-9]*(\[\d+\])?$/ } + @Global::parset_vars); + if(@illegal) { + ::error + ("@illegal is an invalid variable name.", + "Variable names must be letter followed by letters or digits.", + "Usage:", + " parset varname GNU Parallel options and command"); + wait_and_exit(255); + } + if($var_or_assoc eq "assoc") { + my $var = shift @Global::parset_vars; + print "$var=("; + $Global::parset = "assoc"; + $Global::parset_endstring=")\n"; + } elsif($var_or_assoc eq "var") { + if($#Global::parset_vars > 0) { + $Global::parset = "var"; + } else { + my $var = shift @Global::parset_vars; + print "$var=("; + $Global::parset = "array"; + $Global::parset_endstring=")\n"; + } + } else { + ::die_bug("parset: unknown '$opt::_parset'"); + } +} + +sub parse_options(@) { + # Returns: N/A + init_globals(); + my @argv_before = @ARGV; + @ARGV = read_options(); + # Before changing these line, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be added to a public hall-of-shame by removing the lines + if(defined $opt::citation) { + citation(\@argv_before,\@ARGV); + wait_and_exit(0); + } + # no-* overrides * + if($opt::nokeeporder) { $opt::keeporder = undef; } + + if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2 + if($opt::_bug) { ::die_bug("test-bug"); } + $Global::debug = $opt::D; + # + ## Shell + # + if($opt::fast) { + $Global::shell = $ENV{'PARALLEL_SHELL'} || which("dash") || "/bin/sh"; + } else { + $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$) + || $ENV{'SHELL'} || "/bin/sh"; + } + if(not -x $Global::shell and not which($Global::shell)) { + ::error("Shell '$Global::shell' not found."); + wait_and_exit(255); + } + ::debug("init","Global::shell $Global::shell\n"); + $Global::cshell = $Global::shell =~ m:(/[-a-z]*)?csh:; + $Global::fish = $Global::shell =~ m:(/[-a-z]*)?fish:; + if(defined $opt::_parset) { parse_parset(); } + if(defined $opt::X) { $Global::ContextReplace = 1; } + if(defined $opt::silent) { $Global::verbose = 0; } + if(defined $opt::null) { $/ = "\0"; } + if(defined $opt::files) { $Global::files = 1; $Global::files_sep = "\n"; } + if(defined $opt::files0) { $Global::files = 1; $Global::files_sep = "\0"; } + if(defined $opt::d) { $/ = unquote_printf($opt::d) } + parse_replacement_string_options(); + $opt::tag ||= $opt::ctag; + $opt::tagstring ||= $opt::ctagstring; + if(defined $opt::ctag or defined $opt::ctagstring + or defined $opt::color) { + $Global::color = 1; + } + if($opt::linebuffer or $opt::latestline) { + $Global::linebuffer = 1; + Job::latestline_init(); + } + if(defined $opt::tag and not defined $opt::tagstring) { + # Default = {} + $opt::tagstring = $Global::parensleft.$Global::parensright; + } + if(defined $opt::tagstring) { + $opt::tagstring = unquote_printf($opt::tagstring); + if($opt::tagstring =~ + /\Q$Global::parensleft\E.*\S+.*\Q$Global::parensright\E/ + and + $Global::linebuffer) { + # --tagstring contains {= ... =} and --linebuffer => + # recompute replacement string for each use (do not cache) + $Global::cache_replacement_eval = 0; + } + } + if(defined $opt::interactive) { $Global::interactive = $opt::interactive; } + if(defined $opt::quote) { $Global::quoting = 1; } + if(defined $opt::r) { $Global::ignore_empty = 1; } + if(defined $opt::verbose) { $Global::stderr_verbose = 1; } + if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; } + if(defined $opt::max_args) { + $opt::max_args = multiply_binary_prefix($opt::max_args); + $Global::max_number_of_args = $opt::max_args; + if($opt::pipepart and $opt::groupby) { $Global::max_number_of_args = 1; } + } + if(defined $opt::blocktimeout) { + $Global::blocktimeout = int(multiply_time_units($opt::blocktimeout)); + if($Global::blocktimeout < 1) { + ::error("--block-timeout must be at least 1"); + wait_and_exit(255); + } + } + if(defined $opt::timeout) { + $Global::timeoutq = TimeoutQueue->new($opt::timeout); + } + if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; } + $ENV{'PARALLEL_RSYNC_OPTS'} = $opt::rsync_opts || + $ENV{'PARALLEL_RSYNC_OPTS'} || '-rlDzR'; + # Default: Same nice level as GNU Parallel is started at + $opt::nice ||= eval { getpriority(0,0) } || 0; + if(defined $opt::help) { usage(); exit(0); } + if(defined $opt::shellcompletion) { shell_completion(); exit(0); } + if(defined $opt::embed) { embed(); exit(0); } + if(defined $opt::sqlandworker) { + $opt::sqlmaster = $opt::sqlworker = $opt::sqlandworker; + } + if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; } + if(defined $opt::colsep) { $Global::trim = 'lr'; } + if(defined $opt::csv) { + if(not $Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;") { + ::error("The perl module Text::CSV is not installed."); + ::error("Try installing libtext-csv-perl or perl-Text-CSV."); + wait_and_exit(255); + } + $opt::colsep = defined $opt::colsep ? $opt::colsep : ","; + my $csv_setting = { binary => 1, sep_char => $opt::colsep }; + my $sep = $csv_setting->{sep_char}; + $Global::csv = Text::CSV->new($csv_setting) + or die "Cannot use CSV: ".Text::CSV->error_diag (); + } + if(defined $opt::header) { + $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t"; + } + if(defined $opt::trim) { $Global::trim = $opt::trim; } + if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; } + if(defined $opt::arg_file_sep) { + $Global::arg_file_sep = $opt::arg_file_sep; + } + if(not defined $opt::process_slot_var) { + $opt::process_slot_var = 'PARALLEL_JOBSLOT0'; + } + if(defined $opt::number_of_sockets) { + print SSHLogin::no_of_sockets(),"\n"; wait_and_exit(0); + } + if(defined $opt::number_of_cpus) { + print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0); + } + if(defined $opt::number_of_cores) { + print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0); + } + if(defined $opt::number_of_threads) { + print SSHLogin::no_of_threads(),"\n"; wait_and_exit(0); + } + if(defined $opt::max_line_length_allowed) { + print Limits::Command::real_max_length(),"\n"; wait_and_exit(0); + } + if(defined $opt::max_chars) { + $opt::max_chars = multiply_binary_prefix($opt::max_chars); + } + if(defined $opt::version) { version(); wait_and_exit(0); } + if(defined $opt::record_env) { record_env(); wait_and_exit(0); } + if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; } + if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); } + if(@opt::return) { push @Global::ret_files, @opt::return; } + if($opt::transfer) { + push @Global::transfer_files, $opt::i || $opt::I || "{}"; + } + push @Global::transfer_files, @opt::transfer_files; + if(%opt::template) { + while (my ($source, $template_name) = each %opt::template) { + push @Global::template_names, $template_name; + push @Global::template_contents, slurp_or_exit($source); + } + } + if(not defined $opt::recstart and + not defined $opt::recend) { $opt::recend = "\n"; } + $Global::blocksize = multiply_binary_prefix($opt::blocksize || "1M"); + if($Global::blocksize > 2**31-1 and not $opt::pipepart) { + warning("--blocksize >= 2G causes problems. Using 2G-1."); + $Global::blocksize = 2**31-1; + } + if($^O eq "cygwin" and + ($opt::pipe or $opt::pipepart or $opt::roundrobin) + and $Global::blocksize > 65535) { + warning("--blocksize >= 64K causes problems on Cygwin."); + } + $opt::memfree = multiply_binary_prefix($opt::memfree); + $opt::memsuspend = multiply_binary_prefix($opt::memsuspend); + $Global::memlimit = $opt::memsuspend + $opt::memfree; + check_invalid_option_combinations(); + if((defined $opt::fifo or defined $opt::cat) and not $opt::pipepart) { + $opt::pipe = 1; + } + if(defined $opt::minversion) { + print $Global::version,"\n"; + if($Global::version < $opt::minversion) { + wait_and_exit(255); + } else { + wait_and_exit(0); + } + } + if(not defined $opt::delay) { + # Set --delay to --sshdelay if not set + $opt::delay = $opt::sshdelay; + } + $Global::sshdelayauto = $opt::sshdelay =~ s/auto$//; + $opt::sshdelay = multiply_time_units($opt::sshdelay); + $Global::delayauto = $opt::delay =~ s/auto$//; + $opt::delay = multiply_time_units($opt::delay); + if($opt::compress_program) { + $opt::compress = 1; + $opt::decompress_program ||= $opt::compress_program." -dc"; + } + + if(defined $opt::results) { + # Is the output a dir or CSV-file? + if($opt::results =~ /\.csv$/i) { + # CSV with , as separator + $Global::csvsep = ","; + $Global::membuffer ||= 1; + } elsif($opt::results =~ /\.tsv$/i) { + # CSV with TAB as separator + $Global::csvsep = "\t"; + $Global::membuffer ||= 1; + } elsif($opt::results =~ /\.json$/i) { + # JSON output + $Global::jsonout ||= 1; + $Global::membuffer ||= 1; + } + } + if($opt::compress) { + my ($compress, $decompress) = find_compression_program(); + $opt::compress_program ||= $compress; + $opt::decompress_program ||= $decompress; + if(($opt::results and not $Global::csvsep) or $Global::files) { + # No need for decompressing + $opt::decompress_program = "cat >/dev/null"; + } + } + if(defined $opt::dryrun) { + # Force grouping due to bug #51039: --dry-run --timeout 3600 -u breaks + $opt::ungroup = 0; + $opt::group = 1; + } + if(defined $opt::nonall) { + # Append a dummy empty argument if there are no arguments + # on the command line to avoid reading from STDIN. + # arg_sep = random 50 char + # \0noarg => nothing (not the empty string) + $Global::arg_sep = join "", + map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..50); + push @ARGV, $Global::arg_sep, "\0noarg"; + } + if(defined $opt::tee) { + if(not defined $opt::jobs) { + $opt::jobs = 0; + } + } + if(defined $opt::tty) { + # Defaults for --tty: -j1 -u + # Can be overridden with -jXXX -g + if(not defined $opt::jobs) { + $opt::jobs = 1; + } + if(not defined $opt::group) { + $opt::ungroup = 1; + } + } + if(@opt::trc) { + push @Global::ret_files, @opt::trc; + if(not @Global::transfer_files) { + # Defaults to --transferfile {} + push @Global::transfer_files, $opt::i || $opt::I || "{}"; + } + $opt::cleanup = 1; + } + if(defined $opt::max_lines) { + if($opt::max_lines eq "-0") { + # -l -0 (swallowed -0) + $opt::max_lines = 1; + $opt::null = 1; + $/ = "\0"; + } else { + $opt::max_lines = multiply_binary_prefix($opt::max_lines); + if ($opt::max_lines == 0) { + # If not given (or if 0 is given) => 1 + $opt::max_lines = 1; + } + } + + $Global::max_lines = $opt::max_lines; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; + } + } + + # Read more than one arg at a time (-L, -N) + if(defined $opt::L) { + $opt::L = multiply_binary_prefix($opt::L); + $Global::max_lines = $opt::L; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; + } + } + if(defined $opt::max_replace_args) { + $opt::max_replace_args = + multiply_binary_prefix($opt::max_replace_args); + $Global::max_number_of_args = $opt::max_replace_args; + $Global::ContextReplace = 1; + } + if((defined $opt::L or defined $opt::max_replace_args) + and + not ($opt::xargs or $opt::m)) { + $Global::ContextReplace = 1; + } + # Deal with ::: :::+ :::: ::::+ and -a +file + my @ARGV_with_argsep = @ARGV; + @ARGV = read_args_from_command_line(); + if(defined $opt::combineexec) { + pack_combined_executable(\@argv_before,\@ARGV_with_argsep,\@ARGV); + exit(0); + } + parse_semaphore(); + + if(defined $opt::eta) { $opt::progress = $opt::eta; } + if(defined $opt::bar) { $opt::progress = $opt::bar; } + if(defined $opt::bar or defined $opt::latestline) { + my $fh = $Global::status_fd || *STDERR; + # Activate decode_utf8 + eval q{ + # Enable utf8 if possible + use utf8; + binmode $fh, "encoding(utf8)"; + *decode_utf8 = \&Encode::decode_utf8; + }; + if(eval { decode_utf8("x") }) { + # Great: decode works + } else { + # UTF8-decode not supported: Dummy decode + eval q{sub decode_utf8($;$) { $_[0]; }}; + } + # Activate decode_utf8 + eval q{ + # Enable utf8 if possible + use utf8; + use Encode qw( encode_utf8 ); + use Text::CharWidth qw( mbswidth ); + use Unicode::Normalize qw( NFC NFD ); + }; + if(eval { mbswidth("ヌー平行") }) { + # Great: mbswidth works + } else { + # mbswidth not supported: Dummy mbswidth + eval q{ sub mbswidth { return length @_; } }; + } + } + + # If you want GNU Parallel to be maintained in the future you + # should keep this. + # _YOU_ will be harming free software by removing the notice. + # + # Funding a free software project is hard. GNU Parallel is no + # exception. On top of that it seems the less visible a project + # is, the harder it is to get funding. And the nature of GNU + # Parallel is that it will never be seen by "the guy with the + # checkbook", but only by the people doing the actual work. + # + # This problem has been covered by others - though no solution has + # been found: + # https://www.slideshare.net/NadiaEghbal/consider-the-maintainer + # https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/ + # + # The FAQ tells you why the citation notice exists: + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # + # If you want GNU Parallel to be maintained in the future, and not + # just wither away like so many other free software tools, you + # need to help finance the development. + # + # The citation notice is a simple way of doing so, as citations + # makes it possible to me to get a job where I can maintain GNU + # Parallel as part of the job. + # + # This means you can help financing development + # + # WITHOUT PAYING A SINGLE CENT! + # + # Before implementing the citation notice it was discussed with + # the users: + # https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html + # + # Having to spend 10 seconds on running 'parallel --citation' once + # is no doubt not an ideal solution, but no one has so far come up + # with an ideal solution - neither for funding GNU Parallel nor + # other free software. + # + # If you believe you have the perfect solution, you should try it + # out, and if it works, you should post it on the email + # list. Ideas that will cost work and which have not been tested + # are, however, unlikely to be prioritized. + # + # _YOU_ will be harming free software by removing the notice. You + # accept to be added to a public hall of shame by removing the + # line. That includes you, George and Andreas. + # + # Please note that GPL version 3 gives you the right to fork GNU + # Parallel under a new name, but it does not give you the right to + # distribute modified copies with the citation notice disabled in + # a way where the software can be confused with GNU Parallel. To + # do that you need to be the owner of the GNU Parallel + # trademark. The xt:Commerce case shows this. + # + # Description of the xt:Commerce case in OLG Duesseldorf + # https://web.archive.org/web/20180715073746/http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx + # + # The verdict in German + # https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09 + # https://web.archive.org/web/20180715073717/https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09 + # + # Other free software limiting derivates by the same name: + # https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives#Legal_aspects + # https://tm.joomla.org/trademark-faq.html + # https://www.mozilla.org/en-US/foundation/trademarks/faq/ + # + # Running 'parallel --citation' one single time takes less than 10 + # seconds, and will silence the citation notice for future + # runs. If that is too much trouble for you, why not use one of + # the alternatives instead? + # See a list in: 'man parallel_alternatives' + # + # If you want GNU Parallel to be maintained in the future, you + # should keep this line: + citation_notice(); + # This is because _YOU_ actively make it harder to justify + # spending time developing GNU Parallel by removing it. + + # If you disagree, please read (especially 77-): + # https://www.fordfoundation.org/media/2976/roads-and-bridges-the-unseen-labor-behind-our-digital-infrastructure.pdf + parse_halt(); + + if($ENV{'PARALLEL_ENV'}) { + # Read environment and set $Global::parallel_env + # Must be done before is_acceptable_command_line_length() + my $penv = $ENV{'PARALLEL_ENV'}; + # unset $PARALLEL_ENV: It should not be given to children + # because it takes up a lot of env space + delete $ENV{'PARALLEL_ENV'}; + if(-e $penv) { + # This is a file/fifo: Replace envvar with content of file + $penv = slurp_or_exit($penv); + } + # Map \001 to \n to make it easer to quote \n in $PARALLEL_ENV + $penv =~ s/\001/\n/g; + if($penv =~ /\0/) { + ::warning('\0 (NUL) in environment is not supported'); + } + $Global::parallel_env = $penv; + } + + parse_sshlogin(); + if(defined $opt::show_limits) { show_limits(); } + + if(remote_hosts() and + (defined $opt::X or defined $opt::m or defined $opt::xargs)) { + # As we do not know the max line length on the remote machine + # long commands generated by xargs may fail + # If $opt::max_replace_args is set, it is probably safe + ::warning("Using -X or -m with --sshlogin may fail."); + } + + if(not defined $opt::jobs) { $opt::jobs = "100%"; } + open_joblog(); + open_json_csv(); + if(defined $opt::sqlmaster or defined $opt::sqlworker) { + $Global::sql = SQL->new($opt::sqlmaster || $opt::sqlworker); + } + if(defined $opt::sqlworker) { $Global::membuffer ||= 1; } + # The sqlmaster groups the arguments, so the should just read one + if(defined $opt::sqlworker and not defined $opt::sqlmaster) { + $Global::max_number_of_args = 1; + } + if(defined $Global::color or defined $opt::colorfailed) { + Job::init_color(); + } +} + +sub check_invalid_option_combinations() { + if(defined $opt::timeout and + $opt::timeout !~ /^\d+(\.\d+)?%?$|^(\d+(\.\d+)?[dhms])+$/i) { + ::error("--timeout must be seconds or percentage."); + wait_and_exit(255); + } + if(defined $opt::fifo and defined $opt::cat) { + ::error("--fifo cannot be combined with --cat."); + ::wait_and_exit(255); + } + if(defined $opt::retries and defined $opt::roundrobin) { + ::error("--retries cannot be combined with --roundrobin."); + ::wait_and_exit(255); + } + if(defined $opt::pipepart and + (defined $opt::L or defined $opt::max_lines + or defined $opt::max_replace_args)) { + ::error("--pipepart is incompatible with --max-replace-args, ". + "--max-lines, and -L."); + wait_and_exit(255); + } + if(defined $opt::group and defined $opt::ungroup) { + ::error("--group cannot be combined with --ungroup."); + ::wait_and_exit(255); + } + if(defined $opt::group and defined $opt::linebuffer) { + ::error("--group cannot be combined with --line-buffer."); + ::wait_and_exit(255); + } + if(defined $opt::ungroup and defined $opt::linebuffer) { + ::error("--ungroup cannot be combined with --line-buffer."); + ::wait_and_exit(255); + } + if(defined $opt::tollef and not defined $opt::gnu) { + ::error("--tollef has been retired.", + "Remove --tollef or use --gnu to override --tollef."); + ::wait_and_exit(255); + } + if(defined $opt::retired) { + ::error("-g has been retired. Use --group.", + "-B has been retired. Use --bf.", + "-T has been retired. Use --tty.", + "-U has been retired. Use --er.", + "-W has been retired. Use --wd.", + "-Y has been retired. Use --shebang.", + "-H has been retired. Use --halt.", + "--sql has been retired. Use --sqlmaster.", + "--ctrlc has been retired.", + "--noctrlc has been retired."); + ::wait_and_exit(255); + } + if(defined $opt::groupby) { + if(not defined $opt::pipe and not defined $opt::pipepart) { + $opt::pipe = 1; + } + if(defined $opt::remove_rec_sep) { + ::error("--remove-rec-sep is not compatible with --groupby"); + ::wait_and_exit(255); + } + if(defined $opt::recstart) { + ::error("--recstart is not compatible with --groupby"); + ::wait_and_exit(255); + } + if($opt::recend ne "\n") { + ::error("--recend is not compatible with --groupby"); + ::wait_and_exit(255); + } + } + sub unsafe_warn { + # use --_unsafe to only generate a warning + if($opt::_unsafe) { ::warning(@_); } else { ::error(@_); exit(255); } + } + if(defined $opt::results) { + if($opt::nonall or $opt::onall) { + unsafe_warn("--(n)onall + --results not supported (yet)."); + } + } + sub test_safe_chars { + my $var = shift; + if($ENV{$var} =~ m{^[-a-z0-9_+,.%:/= ]*$}is) { + # OK + } else { + unsafe_warn("\$$var can only contain [-a-z0-9_+,.%:/= ]."); + } + } + if($ENV{'TMPDIR'} =~ /\n/) { + if(defined $opt::files) { + ::warning("Use --files0 when \$TMPDIR contains newline."); + } elsif($Global::cshell + and + (defined $opt::cat or defined $opt::fifo)) { + ::warning("--cat/--fifo fails under csh ". + "if \$TMPDIR contains newline."); + } + } elsif($ENV{'TMPDIR'} =~ /\177/) { + unsafe_warn("\$TMPDIR with \\177 (\177) is not supported."); + } else{ + test_safe_chars('TMPDIR'); + } + map { test_safe_chars($_); } qw(PARALLEL_HOME XDG_CONFIG_DIRS + PARALLEL_REMOTE_TMPDIR XDG_CACHE_HOME); +} + +sub init_globals() { + # Defaults: + $Global::version = 20241222; + $Global::progname = 'parallel'; + $::name = "GNU Parallel"; + $Global::infinity = 2**31; + $Global::debug = 0; + $Global::verbose = 0; + # Don't quote every part of the command line + $Global::quoting = 0; + # Quote replacement strings + $Global::quote_replace = 1; + $Global::total_completed = 0; + $Global::cache_replacement_eval = 1; + # Read only table with default --rpl values + %Global::replace = + ( + '{}' => '', + '{#}' => '1 $_=$job->seq()', + '{%}' => '1 $_=$job->slot()', + '{/}' => 's:.*/::', + '{//}' => + ('$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; '. + '$_ = dirname($_);'), + '{/.}' => 's:.*/::; s:\.[^/.]*$::;', + '{.}' => 's:\.[^/.]*$::', + ); + %Global::plus = + ( + # {} = {+/}/{/} + # = {.}.{+.} = {+/}/{/.}.{+.} + # = {..}.{+..} = {+/}/{/..}.{+..} + # = {...}.{+...} = {+/}/{/...}.{+...} + '{+/}' => 's:/[^/]*$:: || s:.*$::', + # a.b => b; a => '' + '{+.}' => 's:.*\.:: || s:.*$::', + # a.b.c => b.c; a.b => ''; a => '' + '{+..}' => 's:.*\.([^/.]*\.[^/.]*)$:$1: || s:.*$::', + '{+...}' => 's:.*\.([^/.]*\.[^/.]*\.[^/.]*)$:$1: || s:.*$::', + '{..}' => 's:\.[^/.]*\.[^/.]*$::', + '{...}' => 's:\.[^/.]*\.[^/.]*\.[^/.]*$::', + '{/..}' => 's:.*/::; s:\.[^/.]*\.[^/.]*$::', + '{/...}' => 's:.*/::; s:\.[^/.]*\.[^/.]*\.[^/.]*$::', + # n choose k = Binomial coefficient + '{choose_k}' => ('for $t (2..$#arg)'. + '{ if($arg[$t-1] ge $arg[$t]) { skip() } }'), + # unique values: Skip job if any args are the same + '{uniq}' => 'if(::uniq(@arg) != @arg) { skip(); }', + # {##} = number of jobs + '{##}' => '1 $_=total_jobs()', + # {0#} = 0-padded seq + '{0#}' => ('1 $f=1+int((log(total_jobs())/log(10)));'. + '$_=sprintf("%0${f}d",seq())'), + # {0%} = 0-padded jobslot + '{0%}' => ('1 $f=1+int((log($Global::max_jobs_running||1)/log(10)));'. + '$_=sprintf("%0${f}d",slot())'), + # {seq-1} = seq-1 = counting from 0 + '{seq(.*?)}' => '$_=eval q{$job->seq()}.qq{$$1}', + # {seq-1} = jobslot-1 = counting from 0 + '{slot(.*?)}' => '$_=eval q{$job->slot()}.qq{$$1}', + + ## Bash inspired replacement strings + # Bash ${a:-myval} + '{:-([^}]+?)}' => '$_ ||= $$1', + # Bash ${a:2} + '{:(\d+?)}' => 'substr($_,0,$$1) = ""', + # Bash ${a:2:3} + '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);', + # echo {#z.*z.} ::: z.z.z.foo => z.foo + # echo {##z.*z.} ::: z.z.z.foo => foo + # Bash ${a#bc} + '{#([^#}][^}]*?)}' => + '$nongreedy=::make_regexp_ungreedy($$1);s/^$nongreedy(.*)/$1/;', + # Bash ${a##bc} + '{##([^#}][^}]*?)}' => 's/^$$1//;', + # echo {%.z.*z} ::: foo.z.z.z => foo.z + # echo {%%.z.*z} ::: foo.z.z.z => foo + # Bash ${a%def} + '{%([^}]+?)}' => + '$nongreedy=::make_regexp_ungreedy($$1);s/(.*)$nongreedy$/$1/;', + # Bash ${a%%def} + '{%%([^}]+?)}' => 's/$$1$//;', + # Bash ${a/def/ghi} ${a/def/} + '{/([^#%}/]+?)/([^}]*?)}' => 's/$$1/$$2/;', + # Bash ${a/#def/ghi} ${a/#def/} + '{/#([^}]+?)/([^}]*?)}' => 's/^$$1/$$2/g;', + # Bash ${a/%def/ghi} ${a/%def/} + '{/%([^}]+?)/([^}]*?)}' => 's/$$1$/$$2/g;', + # Bash ${a//def/ghi} ${a//def/} + '{//([^}]+?)/([^}]*?)}' => 's/$$1/$$2/g;', + # Bash ${a^a} + '{^([^}]+?)}' => 's/^($$1)/uc($1)/e;', + # Bash ${a^^a} + '{^^([^}]+?)}' => 's/($$1)/uc($1)/eg;', + # Bash ${a,A} + '{,([^}]+?)}' => 's/^($$1)/lc($1)/e;', + # Bash ${a,,A} + '{,,([^}]+?)}' => 's/($$1)/lc($1)/eg;', + + # {slot} = $PARALLEL_JOBSLOT + '{slot}' => '1 $_="\${PARALLEL_JOBSLOT}";uq()', + # {host} = ssh host + '{host}' => '1 $_="\${PARALLEL_SSHHOST}";uq()', + # {sshlogin} = sshlogin + '{sshlogin}' => '1 $_="\${PARALLEL_SSHLOGIN}";uq()', + # {hgrp} = hostgroups of the host + '{hgrp}' => '1 $_="\${PARALLEL_HOSTGROUPS}";uq()', + # {agrp} = hostgroups of the argument + '{agrp}' => '1 $_="\${PARALLEL_ARGHOSTGROUPS}";uq()', + ); + # Modifiable copy of %Global::replace + %Global::rpl = %Global::replace; + $/ = "\n"; + $Global::ignore_empty = 0; + $Global::interactive = 0; + $Global::stderr_verbose = 0; + $Global::default_simultaneous_sshlogins = 9; + $Global::exitstatus = 0; + $Global::arg_sep = ":::"; + $Global::arg_file_sep = "::::"; + $Global::trim = 'n'; + $Global::max_jobs_running = 0; + $Global::job_already_run = ''; + $ENV{'TMPDIR'} ||= "/tmp"; + $ENV{'PARALLEL_REMOTE_TMPDIR'} ||= "/tmp"; + # bug #55398: set $OLDPWD when using --wd + $ENV{'OLDPWD'} = $ENV{'PWD'}; + if(not $ENV{HOME}) { + # $ENV{HOME} is sometimes not set if called from PHP + ::warning("\$HOME not set. Using /tmp."); + $ENV{HOME} = "/tmp"; + } + # no warnings to allow for undefined $XDG_* + no warnings 'uninitialized'; + # If $PARALLEL_HOME is set, but does not exist, try making it. + if(defined $ENV{'PARALLEL_HOME'}) { + eval { File::Path::mkpath($ENV{'PARALLEL_HOME'}); }; + } + # $xdg_config_home is needed to make env_parallel.fish stop complaining + my $xdg_config_home = $ENV{'XDG_CONFIG_HOME'} || $ENV{'HOME'} . "/.config"; + # Use the first config dir that exists from: + # $PARALLEL_HOME + # $XDG_CONFIG_HOME/parallel + # $(each XDG_CONFIG_DIRS)/parallel + # $HOME/.parallel + # + # Keep only dirs that exist + @Global::config_dirs = + (grep { -d $_ } + $ENV{'PARALLEL_HOME'}, + (map { "$_/parallel" } + $xdg_config_home, + split /:/, $ENV{'XDG_CONFIG_DIRS'}), + $ENV{'HOME'} . "/.parallel"); + # Use first dir as config dir + $Global::config_dir = $Global::config_dirs[0] || + $ENV{'HOME'} . "/.parallel"; + if($ENV{'PARALLEL_HOME'} =~ /./ and not -d $ENV{'PARALLEL_HOME'}) { + ::warning("\$PARALLEL_HOME ($ENV{'PARALLEL_HOME'}) does not exist."); + ::warning("Using $Global::config_dir"); + } + # Use the first cache dir that exists from: + # $PARALLEL_HOME + # $XDG_CACHE_HOME/parallel + # Keep only dirs that exist + my $xdg_cache_home = $ENV{'XDG_CACHE_HOME'} || $ENV{'HOME'} . "/.cache"; + @Global::cache_dirs = (grep { -d $_ } + $ENV{'PARALLEL_HOME'}, + $xdg_cache_home."/parallel"); + $Global::cache_dir = $Global::cache_dirs[0] || $ENV{'HOME'} . "/.parallel"; + Job::init_color(); +} + +sub parse_halt() { + # $opt::halt flavours + # Uses: + # $opt::halt + # $Global::halt_when + # $Global::halt_fail + # $Global::halt_success + # $Global::halt_pct + # $Global::halt_count + if(defined $opt::halt) { + my %halt_expansion = ( + "0" => "never", + "1" => "soon,fail=1", + "2" => "now,fail=1", + "-1" => "soon,success=1", + "-2" => "now,success=1", + ); + # Expand -2,-1,0,1,2 into long form + $opt::halt = $halt_expansion{$opt::halt} || $opt::halt; + # --halt 5% == --halt soon,fail=5% + $opt::halt =~ s/^(\d+)%$/soon,fail=$1%/; + # Split: soon,fail=5% + my ($when,$fail_success,$pct_count) = split /[,=]/, $opt::halt; + if(not grep { $when eq $_ } qw(never soon now)) { + ::error("--halt must have 'never', 'soon', or 'now'."); + ::wait_and_exit(255); + } + $Global::halt_when = $when; + if($when ne "never") { + if($fail_success eq "fail") { + $Global::halt_fail = 1; + } elsif($fail_success eq "success") { + $Global::halt_success = 1; + } elsif($fail_success eq "done") { + $Global::halt_done = 1; + } else { + ::error("--halt $when must be followed by ,success or ,fail."); + ::wait_and_exit(255); + } + if($pct_count =~ /^(\d+)%$/) { + $Global::halt_pct = $1/100; + } elsif($pct_count =~ /^(\d+)$/) { + $Global::halt_count = $1; + } else { + ::error("--halt $when,$fail_success ". + "must be followed by ,number or ,percent%."); + ::wait_and_exit(255); + } + } + } +} + +sub parse_replacement_string_options() { + # Deal with --rpl + # Uses: + # %Global::rpl + # $Global::parensleft + # $Global::parensright + # $opt::parens + # $Global::parensleft + # $Global::parensright + # $opt::plus + # %Global::plus + # $opt::I + # $opt::U + # $opt::i + # $opt::basenamereplace + # $opt::dirnamereplace + # $opt::seqreplace + # $opt::slotreplace + # $opt::basenameextensionreplace + + sub rpl($$) { + # Modify %Global::rpl + # Replace $old with $new + my ($old,$new) = @_; + if($old ne $new) { + $Global::rpl{$new} = $Global::rpl{$old}; + delete $Global::rpl{$old}; + } + } + my $parens = "{==}"; + if(defined $opt::parens) { $parens = $opt::parens; } + my $parenslen = 0.5*length $parens; + $Global::parensleft = substr($parens,0,$parenslen); + $Global::parensright = substr($parens,$parenslen); + if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); } + if(defined $opt::I) { rpl('{}',$opt::I); } + if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); } + if(defined $opt::U) { rpl('{.}',$opt::U); } + if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); } + if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); } + if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); } + if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); } + if(defined $opt::basenameextensionreplace) { + rpl('{/.}',$opt::basenameextensionreplace); + } + for(@opt::rpl) { + # Create $Global::rpl entries for --rpl options + # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;" + my ($shorthand,$long) = split/\s/,$_,2; + $Global::rpl{$shorthand} = $long; + } +} + +sub parse_semaphore() { + # Semaphore defaults + # Must be done before computing number of processes and max_line_length + # because when running as a semaphore GNU Parallel does not read args + # Uses: + # $opt::semaphore + # $Global::semaphore + # $opt::semaphoretimeout + # $Semaphore::timeout + # $opt::semaphorename + # $Semaphore::name + # $opt::fg + # $Semaphore::fg + # $opt::wait + # $Semaphore::wait + # $opt::bg + # @opt::a + # @Global::unget_argv + # $Global::default_simultaneous_sshlogins + # $opt::jobs + # $Global::interactive + $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem' + if(defined $opt::semaphore) { $Global::semaphore = 1; } + if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; } + if(defined $opt::semaphorename) { $Global::semaphore = 1; } + if(defined $opt::fg and not $opt::tmux and not $opt::tmuxpane) { + $Global::semaphore = 1; + } + if(defined $opt::bg) { $Global::semaphore = 1; } + if(defined $opt::wait and not $opt::sqlmaster) { + $Global::semaphore = 1; @ARGV = "true"; + } + if($Global::semaphore) { + if(@opt::a) { + # Assign the first -a to STDIN + open(STDIN,"<",shift @opt::a); + if(@opt::a) { + # We currently have no way of dealing with more -a + ::error("A semaphore cannot take input from more files\n"); + ::wait_and_exit(255); + } + } + @opt::a = ("/dev/null"); + # Append a dummy empty argument + # \0 => nothing (not the empty string) + push(@Global::unget_argv, [Arg->new("\0noarg")]); + $Semaphore::timeout = int(multiply_time_units($opt::semaphoretimeout)) + || 0; + if(defined $opt::semaphorename) { + $Semaphore::name = $opt::semaphorename; + } else { + local $/ = "\n"; + $Semaphore::name = `tty`; + chomp $Semaphore::name; + } + $Semaphore::fg = $opt::fg; + $Semaphore::wait = $opt::wait; + $Global::default_simultaneous_sshlogins = 1; + if(not defined $opt::jobs) { + $opt::jobs = 1; + } + if($Global::interactive and $opt::bg) { + ::error("Jobs running in the ". + "background cannot be interactive."); + ::wait_and_exit(255); + } + } +} + +sub record_env() { + # Record current %ENV-keys in $PARALLEL_HOME/ignored_vars + # Returns: N/A + my $ignore_filename = $Global::config_dir . "/ignored_vars"; + write_or_exit($ignore_filename,map { $_,"\n" } keys %ENV); +} + +sub open_joblog() { + # Open joblog as specified by --joblog + # Uses: + # $opt::resume + # $opt::resume_failed + # $opt::joblog + # $opt::results + # $Global::job_already_run + # %Global::fh + my $append = 0; + if(($opt::resume or $opt::resume_failed) + and + not ($opt::joblog or $opt::results)) { + ::error("--resume and --resume-failed require --joblog or --results."); + ::wait_and_exit(255); + } + if(defined $opt::joblog and $opt::joblog =~ s/^\+//) { + # --joblog +filename = append to filename + $append = 1; + } + if($opt::joblog + and + ($opt::sqlmaster + or + not $opt::sqlworker)) { + # Do not log if --sqlworker + if($opt::resume || $opt::resume_failed || $opt::retry_failed) { + if(open(my $joblog_fh, "<", $opt::joblog)) { + # Enable utf8 if possible + eval q{ binmode $joblog_fh, "encoding(utf8)"; }; + # Read the joblog + # Override $/ with \n because -d might be set + local $/ = "\n"; + # If there is a header: Open as append later + $append = <$joblog_fh>; + my $joblog_regexp; + if($opt::retry_failed) { + # Make a regexp that matches commands with exit+signal=0 + # 4 host 1360490623.067 3.445 1023 1222 0 0 command + $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; + my @group; + while(<$joblog_fh>) { + if(/$joblog_regexp/o) { + # This is 30% faster than set_job_already_run($1); + vec($Global::job_already_run,($1||0),1) = 1; + $Global::total_completed++; + $group[$1-1] = "true"; + } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) { + # Grab out the command + $group[$1-1] = $3; + } else { + chomp; + ::error("Format of '$opt::joblog' is wrong: $_"); + ::wait_and_exit(255); + } + } + if(@group) { + my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); + unlink($name); + # Put args into argfile + if(grep /\0/, @group) { + # force --null to deal with \n in commandlines + ::warning("Command lines contain newline. ". + "Forcing --null."); + $opt::null = 1; + $/ = "\0"; + } + # Replace \0 with '\n' as used in print_joblog() + print $outfh (map { s/\0/\n/g; $_,$/ } + map { $_ } @group); + seek $outfh, 0, 0; + exit_if_disk_full(); + # Set filehandle to -a + @opt::a = ($outfh); + } + # Remove $command (so -a is run) + @ARGV = (); + } + if($opt::resume || $opt::resume_failed) { + if($opt::resume_failed) { + # Make a regexp that matches commands with exit+signal=0 + # 4 host 1360490623.067 3.445 1023 1222 0 0 command + $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; + } else { + # Just match the job number + $joblog_regexp='^(\d+)'; + } + while(<$joblog_fh>) { + if(/$joblog_regexp/o) { + # This is 30% faster than set_job_already_run($1); + vec($Global::job_already_run,($1||0),1) = 1; + $Global::total_completed++; + } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) { + ::error("Format of '$opt::joblog' is wrong: $_"); + ::wait_and_exit(255); + } + } + } + close $joblog_fh; + } + # $opt::null may be set if the commands contain \n + if($opt::null) { $/ = "\0"; } + } + if($opt::dryrun) { + # Do not write to joblog in a dry-run + + } elsif($append) { + # Append to joblog + $Global::joblog = open_or_exit(">>", $opt::joblog); + } else { + if($opt::joblog eq "-") { + # Use STDOUT as joblog + $Global::joblog = $Global::fh{1}; + } else { + # Overwrite the joblog + $Global::joblog = open_or_exit(">", $opt::joblog); + } + print $Global::joblog + join("\t", "Seq", "Host", "Starttime", "JobRuntime", + "Send", "Receive", "Exitval", "Signal", "Command" + ). "\n"; + } + } +} + +sub open_json_csv() { + if($opt::results) { + # Output as JSON/CSV/TSV + if($opt::results eq "-.csv" + or + $opt::results eq "-.tsv" + or + $opt::results eq "-.json") { + # Output as JSON/CSV/TSV on stdout + open $Global::csv_fh, ">&", "STDOUT" or + ::die_bug("Can't dup STDOUT in csv: $!"); + # Do not print any other output to STDOUT + # by forcing all other output to /dev/null + open my $fd, ">", "/dev/null" or + ::die_bug("Can't >/dev/null in csv: $!"); + $Global::fh{1} = $fd; + $Global::fh{2} = $fd; + } elsif($Global::csvsep or $Global::jsonout) { + $Global::csv_fh = open_or_exit(">",$opt::results); + } + } +} + +sub find_compression_program() { + # Find a fast compression program + # Returns: + # $compress_program = compress program with options + # $decompress_program = decompress program with options + + # Search for these. Sorted by speed on 128 core + + # seq 120000000|shuf > 1gb & + # apt-get update + # apt install make g++ htop + # wget -O - pi.dk/3 | bash + # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz + # git clone https://github.com/facebook/zstd.git + # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin) + # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz + # chmod +x /usr/local/bin/lrz + # wait + # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2" + # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz" + # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread + # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread + # sort -nk4 jl-? + + # 1-core: + # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip + # 4-cores: + # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip + # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2 + # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip + # 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip + # 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip + + my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip + lrz pxz bzip2 lzma xz clzip); + for my $p (@prg) { + if(which($p)) { + return ("$p -c -1","$p -dc"); + } + } + # Fall back to cat + return ("cat","cat"); +} + +sub read_options() { + # Read options from command line, profile and $PARALLEL + # Uses: + # $opt::shebang_wrap + # $opt::shebang + # @ARGV + # $opt::plain + # @opt::profile + # $ENV{'HOME'} + # $ENV{'PARALLEL'} + # Returns: + # @ARGV_no_opt = @ARGV without --options + + # This must be done first as this may exec myself + if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or + $ARGV[0] =~ /^--shebang-?wrap/ or + $ARGV[0] =~ /^--hashbang/)) { + # Program is called from #! line in script + # remove --shebang-wrap if it is set + $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//); + # remove --shebang if it is set + $opt::shebang = ($ARGV[0] =~ s/^--shebang *//); + # remove --hashbang if it is set + $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//); + if($opt::shebang) { + my $argfile = Q(pop @ARGV); + # exec myself to split $ARGV[0] into separate fields + exec "$0 --skip-first-line -a $argfile @ARGV"; + } + if($opt::shebang_wrap) { + my @options; + my @parser; + if ($^O eq 'freebsd') { + # FreeBSD's #! puts different values in @ARGV than Linux' does + my @nooptions = @ARGV; + get_options_from_array(\@nooptions); + while($#ARGV > $#nooptions) { + push @options, shift @ARGV; + } + while(@ARGV and $ARGV[0] ne ":::") { + push @parser, shift @ARGV; + } + if(@ARGV and $ARGV[0] eq ":::") { + shift @ARGV; + } + } else { + @options = shift @ARGV; + } + my $script = Q(Q(shift @ARGV)); # TODO - test if script = " " + my @args = map{ Q($_) } @ARGV; + # exec myself to split $ARGV[0] into separate fields + exec "$0 --_pipe-means-argfiles @options @parser $script ". + "::: @args"; + } + } + if($ARGV[0] =~ / --shebang(-?wrap)? /) { + ::warning("--shebang and --shebang-wrap must be the first ". + "argument.\n"); + } + + Getopt::Long::Configure("bundling","require_order"); + my @ARGV_copy = @ARGV; + my @ARGV_orig = @ARGV; + # Check if there is a --profile to set @opt::profile + get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage(); + my @ARGV_profile = (); + my @ARGV_env = (); + if(not $opt::plain) { + # Add options from $PARALLEL_HOME/config and other profiles + my @config_profiles = ( + "/etc/parallel/config", + (map { "$_/config" } @Global::config_dirs), + $ENV{'HOME'}."/.parallelrc"); + my @profiles = @config_profiles; + if(@opt::profile) { + # --profile overrides default profiles + @profiles = (); + for my $profile (@opt::profile) { + if($profile =~ m:^\./|^/:) { + # Look for ./profile in . + # Look for /profile in / + push @profiles, grep { -r $_ } $profile; + } else { + # Look for the $profile in @Global::config_dirs + push @profiles, grep { -r $_ } + map { "$_/$profile" } @Global::config_dirs; + } + } + } + for my $profile (@profiles) { + if(-r $profile) { + ::debug("init","Read $profile\n"); + local $/ = "\n"; + open (my $in_fh, "<", $profile) || + ::die_bug("read-profile: $profile"); + while(<$in_fh>) { + /^\s*\#/ and next; + chomp; + push @ARGV_profile, shell_words($_); + } + close $in_fh; + } else { + if(grep /^\Q$profile\E$/, @config_profiles) { + # config file is not required to exist + } else { + ::error("$profile not readable."); + wait_and_exit(255); + } + } + } + # Add options from shell variable $PARALLEL + if($ENV{'PARALLEL'}) { + push @ARGV_env, shell_words($ENV{'PARALLEL'}); + } + # Add options from env_parallel.csh via $PARALLEL_CSH + if($ENV{'PARALLEL_CSH'}) { + push @ARGV_env, shell_words($ENV{'PARALLEL_CSH'}); + } + } + Getopt::Long::Configure("bundling","require_order"); + get_options_from_array(\@ARGV_profile) || die_usage(); + get_options_from_array(\@ARGV_env) || die_usage(); + get_options_from_array(\@ARGV) || die_usage(); + # What were the options given on the command line? + # Used to start --sqlworker + my $ai = arrayindex(\@ARGV_orig, \@ARGV); + @Global::options_in_argv = @ARGV_orig[0..$ai-1]; + # Prepend non-options to @ARGV (such as commands like 'nice') + unshift @ARGV, @ARGV_profile, @ARGV_env; + return @ARGV; +} + +sub arrayindex($$) { + # Similar to Perl's index function, but for arrays + # Input: + # $arr_ref1 = ref to @array1 to search in + # $arr_ref2 = ref to @array2 to search for + # Returns: + # $pos = position of @array1 in @array2, -1 if not found + my ($arr_ref1,$arr_ref2) = @_; + my $array1_as_string = join "", map { "\0".$_ } @$arr_ref1; + my $array2_as_string = join "", map { "\0".$_ } @$arr_ref2; + my $i = index($array1_as_string,$array2_as_string,0); + if($i == -1) { return -1 } + my @before = split /\0/, substr($array1_as_string,0,$i); + return $#before; +} + +sub read_args_from_command_line() { + # Arguments given on the command line after: + # ::: ($Global::arg_sep) + # :::: ($Global::arg_file_sep) + # :::+ ($Global::arg_sep with --link) + # ::::+ ($Global::arg_file_sep with --link) + # Removes the arguments from @ARGV and: + # - puts filenames into -a + # - puts arguments into files and add the files to -a + # - adds --linkinputsource with 0/1 for each -a depending on :::+/::::+ + # Input: + # @::ARGV = command option ::: arg arg arg :::: argfiles + # Uses: + # $Global::arg_sep + # $Global::arg_file_sep + # $opt::_pipe_means_argfiles + # $opt::pipe + # @opt::a + # Returns: + # @argv_no_argsep = @::ARGV without ::: and :::: and following args + my %group_sep = ($Global::arg_sep => ":::", + $Global::arg_sep."+" => ":::+", + $Global::arg_file_sep => "::::", + $Global::arg_file_sep."+" => "::::+"); + sub is_linked($) { + # file is linked if file starts with + + local $_ = shift; + if(/^\+(.*)/) { + my $noplus = $1; + if(-e $_ and -e $noplus) { + ::error("It is unclear whether you mean +./$noplus or ./+$noplus"); + wait_and_exit(255); + } elsif(-e $_ and not -e $noplus) { + # This is ./+file = this is not linked + return 0; + } elsif(not -e $_ and -e $noplus) { + # This is +./file = this is linked + return 1; + } elsif(not -e $_ and not -e $noplus) { + # File does not exist, maybe it is stdin? + if($_ eq "-") { + # This is - = this is not linked + return 0; + } elsif($_ eq "+-") { + # This is +- = this is linked + return 1; + } else { + ::error("File not found: $_"); + wait_and_exit(255); + } + } else { + ::die_bug("noplus: $noplus $_"); + } + } + # not linked + return 0; + } + sub cmd_template() { + # remove command template from @ARGV + # keep ::: / :::: in @ARGV if any + my @cmd_template; + while(@ARGV) { + my $arg = shift @ARGV; + if($group_sep{$arg}) { + # Found separator: push it back and exit loop + unshift @ARGV, $arg; + last; + } + push @cmd_template, $arg; + } + return @cmd_template; + } + sub divide_into_groups() { + # Split arguments from @ARGV into groups: + # ::: 1 2 3 :::: a b c ::::+ d e f + # => + # [ ::: 1 2 3 ], [ :::: a b c ], [ ::::+ d e f ] + my @g; + my @grp; + while(@ARGV) { + my $arg = shift @ARGV; + if($group_sep{$arg}) { + # start a new group + push @grp, [@g]; + @g = ($group_sep{$arg}); + } else { + push @g, $arg; + } + } + push @grp, [@g]; + shift @grp; # The first will always be empty + return @grp; + } + sub save_to_file(@) { + # Put args into a file, return open file handle of file + # Create argfile + my ($fh,$name) = ::tmpfile(SUFFIX => ".arg"); + unlink($name); + # Put args into argfile + print $fh map { $_,$/ } @_; + seek $fh, 0, 0; + exit_if_disk_full(); + return $fh; + } + my @cmd = cmd_template(); + # The rest of @ARGV is ::: / :::: args + # If there are any -a: Rewrite them to use :::: + if(@opt::a) { unshift @ARGV, $Global::arg_file_sep, @opt::a; } + @opt::a = (); + # Convert ::: and :::: into (linked) files and put those into @opt::a + for my $g_ref (divide_into_groups()) { + my $group_sep = shift @$g_ref; + if($group_sep eq ":::" or $group_sep eq ":::+") { + # Group starts with ::: / :::+ + if($opt::_pipe_means_argfiles and $#$g_ref < 0) { + # TODO + # Deal with --shebang-wrap and ::: on the shebang line + } else { + push @opt::a, save_to_file(@$g_ref); + # if $group_sep == ":::+": it is linked + push @opt::linkinputsource, ($group_sep eq ":::+"); + } + } elsif($group_sep eq "::::" or $group_sep eq "::::+") { + # Group starts with :::: / ::::+ + for my $f (@$g_ref) { + if($group_sep eq "::::+") { + # Linking forced + push @opt::a, $f; + push @opt::linkinputsource, 1; + } elsif($group_sep eq "::::") { + # Auto detect linking + if(is_linked($f)) { + # +file + push @opt::linkinputsource, 1; + $f =~ s/^\+//; + } else { + # file (no plus) + push @opt::linkinputsource, 0; + } + push @opt::a, $f; + } else { + ::die_bug("arg link error"); + } + } + } else { + ::die_bug("arg link error"); + } + } + # Output: command to run with options + return @cmd; +} + +sub cleanup() { + # Returns: N/A + unlink keys %Global::unlink; + map { rmdir $_ } keys %Global::unlink; + if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); } + for(keys %Global::sshmaster) { + # If 'ssh -M's are running: kill them + kill "TERM", $_; + } +} + + +sub __QUOTING_ARGUMENTS_FOR_SHELL__() {} + +sub shell_quote(@) { + # Input: + # @strings = strings to be quoted + # Returns: + # @shell_quoted_strings = string quoted as needed by the shell + return wantarray ? (map { Q($_) } @_) : (join" ",map { Q($_) } @_); +} + +sub shell_quote_scalar_rc($) { + # Quote for the rc-shell + my $a = $_[0]; + if(defined $a) { + if(($a =~ s/'/''/g) + + + ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) { + # A string was replaced + # No need to test for "" or \0 + } elsif($a eq "") { + $a = "''"; + } elsif($a eq "\0") { + $a = ""; + } + } + return $a; +} + +sub shell_quote_scalar_csh($) { + # Quote for (t)csh + my $a = $_[0]; + if(defined $a) { + # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; + # This is 1% faster than the above + if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go) + + + # quote newline in csh as \\\n + ($a =~ s/[\n]/"\\\n"/go)) { + # A string was replaced + # No need to test for "" or \0 + } elsif($a eq "") { + $a = "''"; + } elsif($a eq "\0") { + $a = ""; + } + } + return $a; +} + +sub shell_quote_scalar_default($) { + # Quote for other shells (Bourne compatibles) + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted as needed by the shell + local $_ = $_[0]; + if(/[^-_.+a-z0-9\/]/i) { + s/'+/'"$&"'/g; # "-quote '-quotes: ''' => "'''" + $_ = "'$_'"; # '-quote entire string + s/^''//; # Remove unneeded '' at ends + s/''$//; # (faster than s/^''|''$//g) + return $_; + } elsif ($_ eq "") { + return "''"; + } else { + # No quoting needed + return $_; + } +} + +sub shell_quote_scalar($) { + # Quote the string so the shell will not expand any special chars + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted as needed by the shell + + # Speed optimization: Choose the correct shell_quote_scalar_* + # and call that directly from now on + no warnings 'redefine'; + if($Global::cshell) { + # (t)csh + *shell_quote_scalar = \&shell_quote_scalar_csh; + } elsif($Global::shell =~ m:(^|/)rc$:) { + # rc-shell + *shell_quote_scalar = \&shell_quote_scalar_rc; + } else { + # other shells + *shell_quote_scalar = \&shell_quote_scalar_default; + } + # The sub is now redefined. Call it + return shell_quote_scalar($_[0]); +} + +sub Q($) { + # Q alias for ::shell_quote_scalar + my $ret = shell_quote_scalar($_[0]); + no warnings 'redefine'; + *Q = \&::shell_quote_scalar; + return $ret; +} + +sub shell_quote_file($) { + # Quote the string so shell will not expand any special chars + # and prepend ./ if needed + # Input: + # $filename = filename to be shell quoted + # Returns: + # $quoted_filename = filename quoted with \ and ./ if needed + my $a = shift; + if(defined $a) { + if($a =~ m:^/: or $a =~ m:^\./:) { + # /abs/path or ./rel/path => skip + } else { + # rel/path => ./rel/path + $a = "./".$a; + } + } + return Q($a); +} + +sub shell_words(@) { + # Input: + # $string = shell line + # Returns: + # @shell_words = $string split into words as shell would do + $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;"; + return Text::ParseWords::shellwords(@_); +} + +sub perl_quote_scalar($) { + # Quote the string so perl's eval will not expand any special chars + # Inputs: + # $string = string to be quoted + # Returns: + # $perl_quoted = string quoted with \ as needed by perl's eval + my $a = $_[0]; + if(defined $a) { + $a =~ s/[\\\"\$\@]/\\$&/go; + } + return $a; +} + +# -w complains about prototype +sub pQ($) { + # pQ alias for ::perl_quote_scalar + my $ret = perl_quote_scalar($_[0]); + *pQ = \&::perl_quote_scalar; + return $ret; +} + +sub unquote_printf() { + # Convert \t \n \r \xFF \000 \0 + # Inputs: + # $string = string with \t \n \r \num \0 + # Returns: + # $replaced = string with TAB NEWLINE CR NUL + $_ = shift; + s/\\t/\t/g; + s/\\n/\n/g; + s/\\r/\r/g; + # Hex: \xFF + s/\\x([0-9a-fA-F]+)/eval 'sprintf "\\x'.$1.'"'/ge; + # Octal: \007 + s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge; + # Single digit octal: \7 + s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge; + return $_; +} + + +sub __FILEHANDLES__() {} + + +sub save_stdin_stdout_stderr() { + # Remember the original STDIN, STDOUT and STDERR + # and file descriptors opened by the shell (e.g. 3>/tmp/foo) + # Uses: + # %Global::fh + # $Global::original_stderr + # $Global::original_stdin + # Returns: N/A + + # TODO Disabled until we have an open3 that will take n filehandles + # for my $fdno (1..61) { + # # /dev/fd/62 and above are used by bash for <(cmd) + # # Find file descriptors that are already opened (by the shell) + # Only focus on stdout+stderr for now + for my $fdno (1..2) { + my $fh; + # 2-argument-open is used to be compatible with old perl 5.8.0 + # bug #43570: Perl 5.8.0 creates 61 files + if(open($fh,">&=$fdno")) { + $Global::fh{$fdno}=$fh; + } + } + open $Global::original_stderr, ">&", "STDERR" or + ::die_bug("Can't dup STDERR: $!"); + open $Global::status_fd, ">&", "STDERR" or + ::die_bug("Can't dup STDERR: $!"); + open $Global::original_stdin, "<&", "STDIN" or + ::die_bug("Can't dup STDIN: $!"); +} + +sub enough_file_handles() { + # Check that we have enough filehandles available for starting + # another job + # Uses: + # $opt::ungroup + # %Global::fh + # Returns: + # 1 if ungrouped (thus not needing extra filehandles) + # 0 if too few filehandles + # 1 if enough filehandles + if(not $opt::ungroup) { + my %fh; + my $enough_filehandles = 1; + # perl uses 7 filehandles for something? + # open3 uses 2 extra filehandles temporarily + # We need a filehandle for each redirected file descriptor + # (normally just STDOUT and STDERR) + for my $i (1..(7+2+keys %Global::fh)) { + $enough_filehandles &&= open($fh{$i}, "<", "/dev/null"); + } + for (values %fh) { close $_; } + return $enough_filehandles; + } else { + # Ungrouped does not need extra file handles + return 1; + } +} + +sub open_or_exit($$) { + # Open a file name or exit if the file cannot be opened + # Inputs: + # $mode = read:"<" write:">" + # $file = filehandle or filename to open + # Uses: + # $Global::original_stdin + # Returns: + # $fh = file handle to opened file + my $mode = shift; + my $file = shift; + if($file eq "-") { + if($mode eq "<") { + return ($Global::original_stdin || *STDIN); + } else { + return ($Global::original_stderr || *STDERR); + } + } + if(ref $file eq "GLOB") { + # This is an open filehandle + return $file; + } + my $fh = gensym; + if(not open($fh, $mode, $file)) { + ::error("Cannot open `$file': $!"); + wait_and_exit(255); + } + return $fh; +} + +sub slurp_or_exit($) { + # Read content of a file or exit if the file cannot be opened + # Inputs: + # $file = filehandle or filename to open + # Returns: + # $content = content as scalar + my $fh = open_or_exit("<",shift); + # $/ = undef => slurp whole file + local $/; + my $content = <$fh>; + close $fh; + return $content; +} + +sub write_or_exit(@) { + # Write content to a file or exit if the file cannot be opened + # Inputs: + # $file = filehandle or filename to open + # @content = content to be written + # Returns: + # N/A + my $file = shift; + sub failed { + error("Cannot write to `$file': $!"); + wait_and_exit(255); + } + my $fh = open_or_exit(">",$file); + print($fh @_) or failed(); + close($fh) or failed(); +} + +sub set_fh_blocking($) { + # Set filehandle as blocking + # Inputs: + # $fh = filehandle to be blocking + # Returns: + # N/A + my $fh = shift; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $flags; + # Get the current flags on the filehandle + fcntl($fh, &F_GETFL, $flags) || die $!; + # Remove non-blocking from the flags + $flags &= ~&O_NONBLOCK; + # Set the flags on the filehandle + fcntl($fh, &F_SETFL, $flags) || die $!; +} + +sub set_fh_non_blocking($) { + # Set filehandle as non-blocking + # Inputs: + # $fh = filehandle to be blocking + # Returns: + # N/A + my $fh = shift; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $flags; + # Get the current flags on the filehandle + fcntl($fh, &F_GETFL, $flags) || die $!; + # Add non-blocking to the flags + $flags |= &O_NONBLOCK; + # Set the flags on the filehandle + fcntl($fh, &F_SETFL, $flags) || die $!; +} + + +sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__() {} + + +# Variable structure: +# +# $Global::running{$pid} = Pointer to Job-object +# @Global::virgin_jobs = Pointer to Job-object that have received no input +# $Global::host{$sshlogin} = Pointer to SSHLogin-object +# $Global::total_running = total number of running jobs +# $Global::total_started = total jobs started +# $Global::max_procs_file = filename if --jobs is given a filename +# $Global::JobQueue = JobQueue object for the queue of jobs +# $Global::timeoutq = queue of times where jobs timeout +# $Global::newest_job = Job object of the most recent job started +# $Global::newest_starttime = timestamp of $Global::newest_job +# @Global::sshlogin +# $Global::minimal_command_line_length = min len supported by all sshlogins +# $Global::start_no_new_jobs = should more jobs be started? +# $Global::original_stderr = file handle for STDERR when the program started +# $Global::total_started = total number of jobs started +# $Global::joblog = filehandle of joblog +# $Global::debug = Is debugging on? +# $Global::exitstatus = status code of GNU Parallel +# $Global::quoting = quote the command to run + +sub init_run_jobs() { + # Set Global variables and progress signal handlers + # Do the copying of basefiles + # Returns: N/A + $Global::total_running = 0; + $Global::total_started = 0; + $SIG{USR1} = \&list_running_jobs; + $SIG{USR2} = \&toggle_progress; + if(@opt::basefile) { setup_basefile(); } +} + +{ + my $last_time; + my %last_mtime; + my $max_procs_file_last_mod; + + sub changed_procs_file { + # If --jobs is a file and it is modfied: + # Force recomputing of max_jobs_running for each $sshlogin + # Uses: + # $Global::max_procs_file + # %Global::host + # Returns: N/A + if($Global::max_procs_file) { + # --jobs filename + my $mtime = (stat($Global::max_procs_file))[9]; + $max_procs_file_last_mod ||= 0; + if($mtime > $max_procs_file_last_mod) { + # file changed: Force re-computing max_jobs_running + $max_procs_file_last_mod = $mtime; + for my $sshlogin (values %Global::host) { + $sshlogin->set_max_jobs_running(undef); + } + } + } + } + + sub changed_sshloginfile { + # If --slf is changed: + # reload --slf + # filter_hosts + # setup_basefile + # Uses: + # @opt::sshloginfile + # @Global::sshlogin + # %Global::host + # $opt::filter_hosts + # Returns: N/A + if(@opt::sshloginfile) { + # Is --sshloginfile changed? + for my $slf (@opt::sshloginfile) { + my $actual_file = expand_slf_shorthand($slf); + my $mtime = (stat($actual_file))[9]; + $last_mtime{$actual_file} ||= $mtime; + if($mtime - $last_mtime{$actual_file} > 1) { + ::debug("run", + "--sshloginfile $actual_file changed. reload\n"); + $last_mtime{$actual_file} = $mtime; + # Reload $slf + # Empty sshlogins + @Global::sshlogin = (); + for (values %Global::host) { + # Don't start new jobs on any host + # except the ones added back later + $_->set_max_jobs_running(0); + } + # This will set max_jobs_running on the SSHlogins + read_sshloginfile($actual_file); + parse_sshlogin(); + $opt::filter_hosts and filter_hosts(); + setup_basefile(); + } + } + } + } + + sub start_more_jobs { + # Run start_another_job() but only if: + # * not $Global::start_no_new_jobs set + # * not JobQueue is empty + # * not load on server is too high + # * not server swapping + # * not too short time since last remote login + # Uses: + # %Global::host + # $Global::start_no_new_jobs + # $Global::JobQueue + # $opt::pipe + # $opt::load + # $opt::noswap + # $opt::delay + # $Global::newest_starttime + # Returns: + # $jobs_started = number of jobs started + my $jobs_started = 0; + if($Global::start_no_new_jobs) { + return $jobs_started; + } + if(time - ($last_time||0) > 1) { + # At most do this every second + $last_time = time; + changed_procs_file(); + changed_sshloginfile(); + } + # This will start 1 job on each --sshlogin (if possible) + # thus distribute the jobs on the --sshlogins round robin + for my $sshlogin (values %Global::host) { + if($Global::JobQueue->empty() and not $opt::pipe) { + # No more jobs in the queue + last; + } + debug("run", "Running jobs before on ", $sshlogin->string(), ": ", + $sshlogin->jobs_running(), "\n"); + if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) { + if($opt::delay + and + $opt::delay-0.008 > ::now()-$Global::newest_starttime) { + # It has been too short since last start + next; + } + if($opt::load and $sshlogin->loadavg_too_high()) { + # The load is too high or unknown + next; + } + if($opt::noswap and $sshlogin->swapping()) { + # The server is swapping + next; + } + if($opt::limit and $sshlogin->limit()) { + # Over limit + next; + } + if(($opt::memfree or $opt::memsuspend) + and + $sshlogin->memfree() < $Global::memlimit) { + # The server has not enough mem free + ::debug("mem", "Not starting job: not enough mem\n"); + next; + } + if($sshlogin->too_fast_remote_login()) { + # It has been too short since last login + next; + } + debug("run", $sshlogin->string(), + " has ", $sshlogin->jobs_running(), + " out of ", $sshlogin->max_jobs_running(), + " jobs running. Start another.\n"); + if(start_another_job($sshlogin) == 0) { + # No more jobs to start on this $sshlogin + debug("run","No jobs started on ", + $sshlogin->string(), "\n"); + next; + } + $sshlogin->inc_jobs_running(); + $sshlogin->set_last_login_at(::now()); + $jobs_started++; + } + debug("run","Running jobs after on ", $sshlogin->string(), ": ", + $sshlogin->jobs_running(), " of ", + $sshlogin->max_jobs_running(), "\n"); + } + + return $jobs_started; + } +} + +{ + my $no_more_file_handles_warned; + + sub start_another_job() { + # If there are enough filehandles + # and JobQueue not empty + # and not $job is in joblog + # Then grab a job from Global::JobQueue, + # start it at sshlogin + # mark it as virgin_job + # Inputs: + # $sshlogin = the SSHLogin to start the job on + # Uses: + # $Global::JobQueue + # $opt::pipe + # $opt::results + # $opt::resume + # @Global::virgin_jobs + # Returns: + # 1 if another jobs was started + # 0 otherwise + my $sshlogin = shift; + # Do we have enough file handles to start another job? + if(enough_file_handles()) { + if($Global::JobQueue->empty() and not $opt::pipe) { + # No more commands to run + debug("start", "Not starting: JobQueue empty\n"); + return 0; + } else { + my $job; + # Skip jobs already in job log + # Skip jobs already in results + do { + $job = get_job_with_sshlogin($sshlogin); + if(not defined $job) { + # No command available for that sshlogin + debug("start", "Not starting: no jobs available for ", + $sshlogin->string(), "\n"); + return 0; + } + if($job->is_already_in_joblog()) { + $job->free_slot(); + } + } while ($job->is_already_in_joblog() + or + ($opt::results and $opt::resume + and $job->is_already_in_results())); + debug("start", "Command to run on '", + $job->sshlogin()->string(), "': '", + $job->replaced(),"'\n"); + if($job->start()) { + if($opt::pipe) { + if($job->virgin()) { + push(@Global::virgin_jobs,$job); + } else { + # Block already set: This is a retry + $job->write_block(); + } + } + debug("start", "Started as seq ", $job->seq(), + " pid:", $job->pid(), "\n"); + return 1; + } else { + # Not enough processes to run the job. + # Put it back on the queue. + $Global::JobQueue->unget($job); + # Count down the number of jobs to run for this SSHLogin. + my $max = $sshlogin->max_jobs_running(); + if($max > 1) { $max--; } else { + my @arg; + for my $record (@{$job->{'commandline'}{'arg_list'}}) { + push @arg, map { $_->orig() } @$record; + } + ::error("No more processes: cannot run a single job. ". + "Something is wrong at @arg."); + ::wait_and_exit(255); + } + $sshlogin->set_max_jobs_running($max); + # Sleep up to 300 ms to give other processes time to die + ::usleep(rand()*300); + ::warning("No more processes: ". + "Decreasing number of running jobs to $max.", + "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)", + "or increasing 'nproc' in /etc/security/limits.conf", + "or increasing /proc/sys/kernel/pid_max"); + return 0; + } + } + } else { + # No more file handles + $no_more_file_handles_warned++ or + ::warning("No more file handles. ", + "Try running 'parallel -j0 -N 100 --pipe parallel -j0'", + "or increasing 'ulimit -n' (try: ulimit -n `ulimit -Hn`)", + "or increasing 'nofile' in /etc/security/limits.conf", + "or increasing /proc/sys/fs/file-max"); + debug("start", "No more file handles. "); + return 0; + } + } +} + +sub init_progress() { + # Uses: + # $opt::bar + # Returns: + # list of computers for progress output + $|=1; + if($opt::bar) { + return("",""); + } + my $progress = progress(); + my $cpu_units = $opt::use_sockets_instead_of_threads ? "CPU sockets" : + ($opt::use_cores_instead_of_threads ? "CPU cores" : "CPU threads"); + return ("\nComputers / $cpu_units / Max jobs to run\n", + $progress->{'workerlist'},"\n",$progress->{'header'}); +} + +sub drain_job_queue(@) { + # Uses: + # $opt::progress + # $Global::total_running + # $Global::max_jobs_running + # %Global::running + # $Global::JobQueue + # %Global::host + # $Global::start_no_new_jobs + # Returns: N/A + my @command = @_; + my $sleep = 0.2; + my $sleepsum = 0; + do { + while($Global::total_running > 0) { + debug("init",$Global::total_running, "==", scalar + keys %Global::running," slots: ", $Global::max_jobs_running); + if($opt::pipe) { + # When using --pipe sometimes file handles are not + # closed properly + for my $job (values %Global::running) { + close $job->fh(0,"w"); + } + } + if($opt::progress) { + my $progress = progress(); + ::status_no_nl("\r",$progress->{'status'}); + } + if($Global::total_running < $Global::max_jobs_running + and not $Global::JobQueue->empty()) { + # These jobs may not be started because of loadavg + # or too little time between each ssh login. + if(start_more_jobs() > 0) { + # Exponential back-on if jobs were started + $sleep = $sleep/2+0.001; + } + } + # Exponential back-off sleeping + $sleep = ::reap_usleep($sleep); + $sleepsum += $sleep; + if($sleepsum >= 1000) { + # At most do this every second + $sleepsum = 0; + changed_procs_file(); + changed_sshloginfile(); + start_more_jobs(); + } + } + if(not $Global::JobQueue->empty()) { + # These jobs may not be started: + # * because there the --filter-hosts has removed all + if(not %Global::host) { + ::error("There are no hosts left to run on."); + ::wait_and_exit(255); + } + # * because of loadavg + # * because of too little time between each ssh login. + $sleep = ::reap_usleep($sleep); + start_more_jobs(); + if($Global::max_jobs_running == 0) { + ::warning("There are no job slots available. Increase --jobs."); + } + } + while($opt::sqlmaster and not $Global::sql->finished()) { + # SQL master + $sleep = ::reap_usleep($sleep); + start_more_jobs(); + if($Global::start_sqlworker) { + # Start an SQL worker as we are now sure there is work to do + $Global::start_sqlworker = 0; + if(my $pid = fork()) { + $Global::unkilled_sqlworker = $pid; + } else { + # Replace --sql/--sqlandworker with --sqlworker + my @ARGV = (map { s/^--sql(andworker)?$/--sqlworker/; $_ } + @Global::options_in_argv); + # exec the --sqlworker + exec($0,@ARGV,@command); + } + } + } + } while ($Global::total_running > 0 + or + not $Global::start_no_new_jobs and not $Global::JobQueue->empty() + or + $opt::sqlmaster and not $Global::sql->finished()); + $Global::all_jobs_done = 1; + if($opt::progress) { + my $progress = progress(); + ::status("\r".$progress->{'status'}); + } +} + +sub toggle_progress() { + # Turn on/off progress view + # Uses: + # $opt::progress + # Returns: N/A + $opt::progress = not $opt::progress; + if($opt::progress) { + ::status_no_nl(init_progress()); + } +} + +{ + my $last_header; + my $eol; + + sub progress() { + # Uses: + # $opt::bar + # $opt::eta + # %Global::host + # $Global::total_started + # Returns: + # $workerlist = list of workers + # $header = that will fit on the screen + # $status = message that will fit on the screen + if($opt::bar) { + return {"workerlist" => "", "header" => "", "status" => bar()}; + } + my $eta = ""; + my ($status,$header)=("",""); + if($opt::eta) { + my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) = + compute_eta(); + $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ", + $this_eta, $left, $avgtime); + } + my $termcols = terminal_columns(); + my @workers = sort keys %Global::host; + my $workerno = 1; + my %wrk; + for my $w (@workers) { + my %i; + $i{'sshlogin'} = $w eq ":" ? "local" : $w; + $i{'no'} = $workerno++; + $i{'ncpu'} = ($Global::host{$w}->ncpus() || "-"); + $i{'jobslots'} = $Global::host{$w}->max_jobs_running(); + $i{'completed'} = ($Global::host{$w}->jobs_completed() || 0); + $i{'running'} = $Global::host{$w}->jobs_running(); + $i{'pct'} = $Global::total_started ? + (($i{'running'}+$i{'completed'})*100 / + $Global::total_started) : 0; + $i{'time'} = $i{'completed'} ? (time-$^T)/($i{'completed'}) : 0; + $wrk{$w} = \%i; + } + + my $workerlist = ""; + for my $w (@workers) { + $workerlist .= + $wrk{$w}{'no'}.":".$wrk{$w}{'sshlogin'} ." / ". + $wrk{$w}{'ncpu'}." / ". + $wrk{$w}{'jobslots'}."\n"; + } + # Force $status to select one of the below formats + $status = "c"x($termcols+1); + # Select an output format that will fit on a single line + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX%/XX.Xs s2:XX/XX/XX%/XX.Xs s3:XX/XX/XX%/XX.Xs + $header = "Computer:jobs running/jobs completed/". + "%of started jobs/Average seconds to complete"; + $status = $eta . join(" ",map { + sprintf("%s:%d/%d/%d%%/%.1fs ", + @{$wrk{$_}} + {'sshlogin','running','completed','pct','time'} + ); } @workers); + } + if(length $status > $termcols) { + # 1:XX/XX/XX%/X.Xs 2:XX/XX/XX%/X.Xs 3:XX/XX/XX%/X.Xs + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . join(" ",map { + sprintf("%s:%d/%d/%d%%/%.1fs ", + @{$wrk{$_}} + {'no','running','completed','pct','time'} + ); } @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX% + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . join(" ",map { + sprintf("%s:%d/%d/%d%%", + @{$wrk{$_}} + {'sshlogin','running','completed','pct'} + ); } @workers); + } + if(length $status > $termcols) { + # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . join(" ",map { + sprintf("%s:%d/%d/%d%%", + @{$wrk{$_}} + {'no','running','completed','pct'} + ); } @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . join(" ", map { + sprintf("%s:%d/%d", + @{$wrk{$_}} + {'sshlogin','running','completed'} + ); } @workers); + } + if(length $status > $termcols) { + # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . join(" ", map { + sprintf("%s:%d/%d", + @{$wrk{$_}} + {'no','running','completed'} + ); } @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX + $header = "Computer:jobs completed"; + $status = $eta . join(" ", map { + sprintf("%s:%d", + @{$wrk{$_}} + {'sshlogin','completed'} + ); } @workers); + } + if(length $status > $termcols) { + # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX + $header = "Computer:jobs completed"; + $status = $eta . join(" ", map { + sprintf("%s:%d", + @{$wrk{$_}} + {'no','completed'} + ); } @workers); + } + if($last_header ne $header) { + $header .= "\n"; + $last_header = $header; + } else { + $header = ""; + } + if(not $eol) { + $eol = `sh -c "tput el /dev/null`; + chomp($eol); + if($eol eq "") { $eol = "\033[K"; } + } + + return {"workerlist" => $workerlist, "header" => $header, + "status" => $status.$eol}; + } +} + +{ + + my ($first_completed, $smoothed_avg_time, $last_eta); + + sub compute_eta { + # Calculate important numbers for ETA + # Returns: + # $total = number of jobs in total + # $completed = number of jobs completed + # $left = number of jobs left + # $pctcomplete = percent of jobs completed + # $avgtime = averaged time + # $eta = smoothed eta + my $completed = $Global::total_completed; + # In rare cases with -X will $completed > total_jobs() + my $total = ::max($Global::JobQueue->total_jobs(),$completed); + my $left = $total - $completed; + if(not $completed) { + return($total, $completed, $left, 0, 0, 0); + } + my $pctcomplete = ::min($completed / $total,100); + $first_completed ||= time; + my $timepassed = (time - $first_completed); + my $avgtime = $timepassed / $completed; + $smoothed_avg_time ||= $avgtime; + # Smooth the eta so it does not jump wildly + $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time + + $pctcomplete * $avgtime; + my $eta = int($left * $smoothed_avg_time); + if($eta*0.90 < $last_eta and $last_eta < $eta) { + # Eta jumped less that 10% up: Keep the last eta instead + $eta = $last_eta; + } else { + $last_eta = $eta; + } + return($total, $completed, $left, $pctcomplete, $avgtime, $eta); + } +} + +{ + my ($rev,$reset); + + sub bar() { + # Return: + # $status = bar with eta, completed jobs, arg and pct + $rev ||= "\033[7m"; + $reset ||= "\033[0m"; + my($total, $completed, $left, $pctcomplete, $avgtime, $eta) = + compute_eta(); + if($Global::all_jobs_done) { $eta = now()-$Global::start_time; } + my $arg = $Global::newest_job ? + $Global::newest_job->{'commandline'}-> + replace_placeholders(["\177<\177>"],0,0) : ""; + $arg = decode_utf8($arg); + my $eta_dhms = ::seconds_to_time_units($eta); + my $bar_text = + sprintf("%d%% %d:%d=%s %s", + $pctcomplete*100, $completed, $left, $eta_dhms, $arg); + my $terminal_width = terminal_columns(); + my $s = sprintf("%-${terminal_width}s", + substr($bar_text." "x$terminal_width, + 0,$terminal_width)); + my $width = int($terminal_width * $pctcomplete); + substr($s,$width,0) = $reset; + my $zenity = sprintf("%-${terminal_width}s", + substr("# $eta sec $arg", + 0,$terminal_width)); + # Prefix with zenity header + $s = "\r" . $zenity . "\r" . $pctcomplete*100 . + "\r" . $rev . $s . $reset; + return $s; + } +} + +{ + my ($rows,$columns,$last_update_time); + + sub compute_terminal_size() { + # && true is to force spawning a shell and not just exec'ing + my @tput = qx{ tput lines cols /dev/null && true }; + $rows = 0 + $tput[0]; + $columns = 0 + $tput[1]; + if(not ($rows && $columns)) { + # && true is to force spawning a shell and not just exec'ing + my $stty = qx{ stty -a /dev/null && true }; + # FreeBSD/OpenBSD/NetBSD/Dragonfly/MirOS + # MacOSX/IRIX/AIX/Tru64 + $stty =~ /(\d+) columns/ and do { $columns = $1; }; + $stty =~ /(\d+) rows/ and do { $rows = $1; }; + # GNU/Linux/Solaris + $stty =~ /columns (\d+)/ and do { $columns = $1; }; + $stty =~ /rows (\d+)/ and do { $rows = $1; }; + # Solaris-x86/HPUX/SCOsysV/UnixWare/OpenIndiana + $stty =~ /columns = (\d+)/ and do { $columns = $1; }; + $stty =~ /rows = (\d+)/ and do { $rows = $1; }; + # QNX + $stty =~ /rows=(\d+),(\d+)/ and do { ($rows,$columns) = ($1,$2); }; + } + if(not ($rows && $columns)) { + # && true is to force spawning a shell and not just exec'ing + my $resize = qx{ resize 2>/dev/null && true }; + $resize =~ /COLUMNS=(\d+);/ and do { $columns ||= $1; }; + $resize =~ /LINES=(\d+);/ and do { $rows ||= $1; }; + } + $rows ||= 24; + $columns ||= 80; + } + + sub update_terminal_size() { + # Only update once per second. + if($last_update_time < time) { + $last_update_time = time; + compute_terminal_size(); + # Set signal WINdow CHange to force recompute + $SIG{WINCH} = \&compute_terminal_size; + } + } + + sub terminal_rows() { + # Get the number of rows of the terminal. + # Returns: + # number of rows of the screen + update_terminal_size(); + return $rows; + } + + sub terminal_columns() { + # Get the number of columns of the terminal. + # Returns: + # number of columns of the screen + update_terminal_size(); + return $columns; + } +} + +sub untabify($) { + # Convert \t into spaces + my @out; + my ($src); + # Deal with multi-byte characters + for my $src (split("\t",$_[0])) { + push @out, $src. " "x(8-mbswidth($src)%8); + } + return join "",@out; +} + +# Prototype forwarding +sub get_job_with_sshlogin($); +sub get_job_with_sshlogin($) { + # Input: + # $sshlogin = which host should the job be run on? + # Uses: + # $opt::hostgroups + # $Global::JobQueue + # Returns: + # $job = next job object for $sshlogin if any available + my $sshlogin = shift; + my $job; + + if ($opt::hostgroups) { + my @other_hostgroup_jobs = (); + + while($job = $Global::JobQueue->get()) { + if($sshlogin->in_hostgroups($job->hostgroups())) { + # Found a job to be run on a hostgroup of this + # $sshlogin + last; + } else { + # This job was not in the hostgroups of $sshlogin + push @other_hostgroup_jobs, $job; + } + } + $Global::JobQueue->unget(@other_hostgroup_jobs); + if(not defined $job) { + # No more jobs + return undef; + } + } else { + $job = $Global::JobQueue->get(); + if(not defined $job) { + # No more jobs + ::debug("start", "No more jobs: JobQueue empty\n"); + return undef; + } + } + if(not $job->suspended()) { + $job->set_sshlogin($sshlogin); + } + if(defined $opt::retries and $job->failed_here()) { + # This command with these args failed for this sshlogin + my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed(); + # Only look at the Global::host that have > 0 jobslots + if($no_of_failed_sshlogins == + grep { $_->max_jobs_running() > 0 } values %Global::host + and $job->failed_here() == $min_failures) { + # It failed the same or more times on another host: + # run it on this host + } else { + # If it failed fewer times on another host: + # Find another job to run + my $nextjob; + if(not $Global::JobQueue->empty()) { + # This can potentially recurse for all args + no warnings 'recursion'; + $nextjob = get_job_with_sshlogin($sshlogin); + } + # Push the command back on the queue + $Global::JobQueue->unget($job); + return $nextjob; + } + } + return $job; +} + + +sub __REMOTE_SSH__() {} + + +sub read_sshloginfiles(@) { + # Read a list of --slf's + # Input: + # @files = files or symbolic file names to read + # Returns: N/A + for my $s (@_) { + read_sshloginfile(expand_slf_shorthand($s)); + } +} + +sub expand_slf_shorthand($) { + # Expand --slf shorthand into a read file name + # Input: + # $file = file or symbolic file name to read + # Returns: + # $file = actual file name to read + my $file = shift; + if($file eq "-") { + # skip: It is stdin + } elsif($file eq "..") { + $file = $Global::config_dir."/sshloginfile"; + } elsif($file eq ".") { + $file = "/etc/parallel/sshloginfile"; + } elsif(not -r $file) { + for(@Global::config_dirs) { + if(not -r $_."/".$file) { + # Try prepending $PARALLEL_HOME + ::error("Cannot open $file."); + ::wait_and_exit(255); + } else { + $file = $_."/".$file; + last; + } + } + } + return $file; +} + +sub read_sshloginfile($) { + # Read sshloginfile into @Global::sshlogin + # Input: + # $file = file to read + # Uses: + # @Global::sshlogin + # Returns: N/A + local $/ = "\n"; + my $file = shift; + my $close = 1; + my $in_fh; + ::debug("init","--slf ",$file); + if($file eq "-") { + $in_fh = *STDIN; + $close = 0; + } else { + $in_fh = open_or_exit("<", $file); + } + while(<$in_fh>) { + chomp; + /^\s*#/ and next; + /^\s*$/ and next; + push @Global::sshlogin, $_; + } + if($close) { + close $in_fh; + } +} + +sub parse_sshlogin() { + # Parse @Global::sshlogin into %Global::host. + # Keep only hosts that are in one of the given ssh hostgroups. + # Uses: + # @Global::sshlogin + # $Global::minimal_command_line_length + # %Global::host + # $opt::transfer + # @opt::return + # $opt::cleanup + # @opt::basefile + # @opt::trc + # Returns: N/A + sub expand_range($) { + # Expand host[9-11,15]a[09-11]b + # [9-11,15] => 9 10 11 15 + # [09-11] => 09 10 11 + my ($in) = @_; + my ($prefix, $range, $suffix); + if(($prefix, $range, $suffix) = $in =~ /^(.*?)\[([-0-9,]*)\](.*)$/) { + my @res; + while(length $range) { + if($range =~ s/^,//) { + # skip + } elsif($range =~ s/^(\d+)-(\d+)//) { + my ($start, $end) = ($1, $2); + push @res, map { $prefix . $_ . $suffix } $start..$end; + } elsif($range =~ s/^(\d+)//) { + push @res, map { $prefix . $_ . $suffix } $1; + } else { + die "Cannot parse $in (at $range)"; + } + } + return map { expand_range($_) } @res; + } else { + return $in; + } + } + my @login; + if(not @Global::sshlogin) { @Global::sshlogin = (":"); } + for my $sshlogin (@Global::sshlogin) { + # Split up -S sshlogin,sshlogin + # Parse ,, and \, as , but do not split on that + # -S "ssh -J jump1,,jump2 host1,host2" => + # ssh -J jump1,jump2 host1 + # host2 + # Protect \, and ,, as \0 + $sshlogin =~ s/\\,|,,/\0/g; + # Protect , in ranges: [___,___] => [___\0___] + while($sshlogin =~ s/(\[[-0-9\0]*),(.*\])/$1\0$2/g) {} + for my $s (split /,|\n/, $sshlogin) { + # Replace \0 => , + $s =~ s/\0/,/g; + if ($s eq ".." or $s eq "-") { + # This may add to @Global::sshlogin - possibly bug + read_sshloginfile(expand_slf_shorthand($s)); + } else { + $s =~ s/\s*$//; + # Expand host[1-12,15]a[01-10]b + push @login, expand_range($s); + } + } + } + $Global::minimal_command_line_length = 100_000_000; + my @allowed_hostgroups; + for my $ncpu_sshlogin_string (::uniq(@login)) { + my $sshlogin = SSHLogin->new($ncpu_sshlogin_string); + my $sshlogin_string = $sshlogin->string(); + if($sshlogin_string eq "") { + # This is an ssh group: -S @webservers + push @allowed_hostgroups, $sshlogin->hostgroups(); + next; + } + if($Global::host{$sshlogin_string}) { + # This sshlogin has already been added: + # It is probably a host that has come back + # Set the max_jobs_running back to the original + debug("run","Already seen $sshlogin_string\n"); + if($sshlogin->{'ncpus'}) { + # If ncpus set by '#/' of the sshlogin, overwrite it: + $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus()); + } + $Global::host{$sshlogin_string}->set_max_jobs_running(undef); + next; + } + $sshlogin->set_maxlength(Limits::Command::max_length()); + + $Global::minimal_command_line_length = + ::min($Global::minimal_command_line_length, $sshlogin->maxlength()); + $Global::host{$sshlogin_string} = $sshlogin; + } + $Global::usable_command_line_length = + # Usable len = maxlen - 3000 for wrapping, div 2 for hexing + int(($Global::minimal_command_line_length - 3000)/2); + if($opt::max_chars) { + if($opt::max_chars <= $Global::usable_command_line_length) { + $Global::usable_command_line_length = $opt::max_chars; + } else { + ::warning("Value for option -s should be < ". + $Global::usable_command_line_length."."); + } + } + if(@allowed_hostgroups) { + # Remove hosts that are not in these groups + while (my ($string, $sshlogin) = each %Global::host) { + if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) { + delete $Global::host{$string}; + } + } + } + + # debug("start", "sshlogin: ", my_dump(%Global::host),"\n"); + if(@Global::transfer_files or @opt::return + or $opt::cleanup or @opt::basefile) { + if(not remote_hosts()) { + # There are no remote hosts + if(@opt::trc) { + ::warning("--trc ignored as there are no remote --sshlogin."); + } elsif (defined $opt::transfer) { + ::warning("--transfer ignored as there are ". + "no remote --sshlogin."); + } elsif (@opt::transfer_files) { + ::warning("--transferfile ignored as there ". + "are no remote --sshlogin."); + } elsif (@opt::return) { + ::warning("--return ignored as there are no remote --sshlogin."); + } elsif (defined $opt::cleanup and not %opt::template) { + ::warning("--cleanup ignored as there ". + "are no remote --sshlogin."); + } elsif (@opt::basefile) { + ::warning("--basefile ignored as there ". + "are no remote --sshlogin."); + } + } + } +} + +sub remote_hosts() { + # Return sshlogins that are not ':' + # Uses: + # %Global::host + # Returns: + # list of sshlogins with ':' removed + return grep !/^:$/, keys %Global::host; +} + +sub setup_basefile() { + # Transfer basefiles to each $sshlogin + # This needs to be done before first jobs on $sshlogin is run + # Uses: + # %Global::host + # @opt::basefile + # Returns: N/A + my @cmd; + my $rsync_destdir; + my $workdir; + for my $sshlogin (values %Global::host) { + if($sshlogin->local()) { next } + for my $file (@opt::basefile) { + if($file !~ m:^/: and $opt::workdir eq "...") { + ::error("Work dir '...' will not work with relative basefiles."); + ::wait_and_exit(255); + } + if(not $workdir) { + my $dummycmdline = + CommandLine->new(1,["true"],{},0,0,[],[],[],[],{},{}); + my $dummyjob = Job->new($dummycmdline); + $workdir = $dummyjob->workdir(); + } + push @cmd, $sshlogin->rsync_transfer_cmd($file,$workdir); + } + } + debug("init", "basesetup: @cmd\n"); + my ($exitstatus,$stdout_ref,$stderr_ref) = + run_gnu_parallel((join "\n",@cmd),"-j0","--retries",5); + if($exitstatus) { + my @stdout = @$stdout_ref; + my @stderr = @$stderr_ref; + ::error("Copying of --basefile failed: @stdout@stderr"); + ::wait_and_exit(255); + } +} + +sub cleanup_basefile() { + # Remove the basefiles transferred + # Uses: + # %Global::host + # @opt::basefile + # Returns: N/A + my @cmd; + my $workdir; + if(not $workdir) { + my $dummycmdline = CommandLine->new(1,["true"],{},0,0,[],[],[],[],{},{}); + my $dummyjob = Job->new($dummycmdline); + $workdir = $dummyjob->workdir(); + } + for my $sshlogin (values %Global::host) { + if($sshlogin->local()) { next } + for my $file (@opt::basefile) { + push @cmd, $sshlogin->cleanup_cmd($file,$workdir); + } + } + debug("init", "basecleanup: @cmd\n"); + my ($exitstatus,$stdout_ref,$stderr_ref) = + run_gnu_parallel(join("\n",@cmd),"-j0","--retries",5); + if($exitstatus) { + my @stdout = @$stdout_ref; + my @stderr = @$stderr_ref; + ::error("Cleanup of --basefile failed: @stdout@stderr"); + ::wait_and_exit(255); + } +} + +sub run_gnu_parallel() { + my ($stdin,@args) = @_; + my $cmd = join "",map { " $_ & " } split /\n/, $stdin; + print $Global::original_stderr ` $cmd wait` ; + return 0 +} + +sub _run_gnu_parallel() { + # Run GNU Parallel + # This should ideally just fork an internal copy + # and not start it through a shell + # Input: + # $stdin = data to provide on stdin for GNU Parallel + # @args = command line arguments + # Returns: + # $exitstatus = exitcode of GNU Parallel run + # \@stdout = standard output + # \@stderr = standard error + my ($stdin,@args) = @_; + my ($exitstatus,@stdout,@stderr); + my ($stdin_fh,$stdout_fh)=(gensym(),gensym()); + my ($stderr_fh, $stderrname) = ::tmpfile(SUFFIX => ".par"); + unlink $stderrname; + + my $pid = ::open3($stdin_fh,$stdout_fh,$stderr_fh, + $0,qw(--plain --shell /bin/sh --will-cite), @args); + if(my $writerpid = fork()) { + close $stdin_fh; + @stdout = <$stdout_fh>; + # Now stdout is closed: + # These pids should be dead or die very soon + while(kill 0, $writerpid) { ::usleep(1); } + die; +# reap $writerpid; +# while(kill 0, $pid) { ::usleep(1); } +# reap $writerpid; + $exitstatus = $?; + seek $stderr_fh, 0, 0; + @stderr = <$stderr_fh>; + close $stdout_fh; + close $stderr_fh; + } else { + close $stdout_fh; + close $stderr_fh; + print $stdin_fh $stdin; + close $stdin_fh; + exit(0); + } + return ($exitstatus,\@stdout,\@stderr); +} + +sub filter_hosts() { + # Remove down --sshlogins from active duty. + # Find ncpus, ncores, maxlen, time-to-login for each host. + # Uses: + # %Global::host + # $Global::minimal_command_line_length + # $opt::use_sockets_instead_of_threads + # $opt::use_cores_instead_of_threads + # $opt::use_cpus_instead_of_cores + # Returns: N/A + + my ($nsockets_ref,$ncores_ref, $nthreads_ref, $time_to_login_ref, + $maxlen_ref, $echo_ref, $down_hosts_ref) = + parse_host_filtering(parallelized_host_filtering()); + + delete @Global::host{@$down_hosts_ref}; + @$down_hosts_ref and ::warning("Removed @$down_hosts_ref."); + + $Global::minimal_command_line_length = 100_000_000; + while (my ($string, $sshlogin) = each %Global::host) { + if($sshlogin->local()) { next } + my ($nsockets,$ncores,$nthreads,$time_to_login,$maxlen) = + ($nsockets_ref->{$string},$ncores_ref->{$string}, + $nthreads_ref->{$string},$time_to_login_ref->{$string}, + $maxlen_ref->{$string}); + defined $nsockets or ::die_bug("nsockets missing: $string"); + defined $ncores or ::die_bug("ncores missing: $string"); + defined $nthreads or ::die_bug("nthreads missing: $string"); + defined $time_to_login or ::die_bug("time_to_login missing: $string"); + defined $maxlen or ::die_bug("maxlen missing: $string"); + # ncpus may be set by 4/hostname or may be undefined yet + my $ncpus = $sshlogin->{'ncpus'}; + # $nthreads may be 0 if GNU Parallel is not installed remotely + $ncpus = $nthreads || $ncpus || $sshlogin->ncpus(); + if($opt::use_cpus_instead_of_cores) { + $ncpus = $ncores || $ncpus; + } elsif($opt::use_sockets_instead_of_threads) { + $ncpus = $nsockets || $ncpus; + } elsif($opt::use_cores_instead_of_threads) { + $ncpus = $ncores || $ncpus; + } + $sshlogin->set_ncpus($ncpus); + $sshlogin->set_time_to_login($time_to_login); + $maxlen = $maxlen || Limits::Command::max_length(); + $sshlogin->set_maxlength($maxlen); + ::debug("init", "Timing from -S:$string ", + " ncpus:", $ncpus, + " nsockets:",$nsockets, + " ncores:", $ncores, + " nthreads:",$nthreads, + " time_to_login:", $time_to_login, + " maxlen:", $maxlen, + " min_max_len:", $Global::minimal_command_line_length,"\n"); + } +} + +sub parse_host_filtering() { + # Input: + # @lines = output from parallelized_host_filtering() + # Returns: + # \%nsockets = number of sockets of {host} + # \%ncores = number of cores of {host} + # \%nthreads = number of hyperthreaded cores of {host} + # \%time_to_login = time_to_login on {host} + # \%maxlen = max command len on {host} + # \%echo = echo received from {host} + # \@down_hosts = list of hosts with no answer + local $/ = "\n"; + my (%nsockets, %ncores, %nthreads, %time_to_login, %maxlen, %echo, + @down_hosts); + for (@_) { + ::debug("init","Read: ",$_); + chomp; + my @col = split /\t/, $_; + if($col[0] =~ /^parallel: Warning:/) { + # Timed out job: Ignore it + next; + } elsif(defined $col[6]) { + # This is a line from --joblog + # seq host time spent sent received exit signal command + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores + if($col[0] eq "Seq" and $col[1] eq "Host" and + $col[2] eq "Starttime") { + # Header => skip + next; + } + # Get server from: eval true server\; + $col[8] =~ /eval .?true.?\s([^\;]+);/ or + ::die_bug("col8 does not contain host: $col[8] in $_"); + my $host = $1; + $host =~ tr/\\//d; + $Global::host{$host} or next; + if($col[6] eq "255" or $col[6] eq "-1" or $col[6] eq "1") { + # exit == 255 or exit == timeout (-1): ssh failed/timedout + # exit == 1: lsh failed + # Remove sshlogin + ::debug("init", "--filtered $host\n"); + push(@down_hosts, $host); + } elsif($col[6] eq "127") { + # signal == 127: parallel not installed remote + # Set nsockets, ncores, nthreads = 1 + ::warning("Could not figure out ". + "number of cpus on $host. Using 1."); + $nsockets{$host} = 1; + $ncores{$host} = 1; + $nthreads{$host} = 1; + $maxlen{$host} = Limits::Command::max_length(); + } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) { + # Remember how log it took to log in + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo + $time_to_login{$host} = ::min($time_to_login{$host},$col[3]); + } else { + ::die_bug("host check unmatched long jobline: $_"); + } + } elsif($Global::host{$col[0]}) { + # This output from --number-of-cores, --number-of-cpus, + # --max-line-length-allowed + # ncores: server 8 + # ncpus: server 2 + # maxlen: server 131071 + if(/parallel: Warning: Cannot figure out number of/) { + next; + } + if(/\t(perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed|Disconnected from|Received disconnect from)/ + or + /\tWarning: / + or + /\t(Host key fingerprint is|\+-.*-\+|\|.*\|)/ + or + /\t\S+: Undefined variable./ + ) { + # Skip these (from perl): + # perl: warning: Setting locale failed. + # perl: warning: Please check that your locale settings: + # LANGUAGE = (unset), + # LC_ALL = (unset), + # LANG = "en_US.UTF-8" + # are supported and installed on your system. + # perl: warning: Falling back to the standard locale ("C"). + # Disconnected from 127.0.0.1 port 22 + # + # Skip these (from ssh): + # Warning: Permanently added * to the list of known hosts. + # Warning: Identity file * not accessible: * + # (VisualHostKey=yes) + # Host key fingerprint is SHA256:... + # +--[ED25519 256]--+ + # | o | + # +----[SHA256]-----+ + # + # Skip these (from csh): + # MANPATH: Undefined variable. + } elsif(not defined $nsockets{$col[0]}) { + $nsockets{$col[0]} = $col[1]; + } elsif(not defined $ncores{$col[0]}) { + $ncores{$col[0]} = $col[1]; + } elsif(not defined $nthreads{$col[0]}) { + $nthreads{$col[0]} = $col[1]; + } elsif(not defined $maxlen{$col[0]}) { + $maxlen{$col[0]} = $col[1]; + } elsif(not defined $echo{$col[0]}) { + $echo{$col[0]} = $col[1]; + } else { + ::die_bug("host check too many col0: $_"); + } + } else { + ::die_bug("host check unmatched short jobline ($col[0]): $_"); + } + } + @down_hosts = uniq(@down_hosts); + return(\%nsockets, \%ncores, \%nthreads, \%time_to_login, + \%maxlen, \%echo, \@down_hosts); +} + +sub parallelized_host_filtering() { + # Uses: + # %Global::host + # Returns: + # text entries with: + # * joblog line + # * hostname \t number of cores + # * hostname \t number of cpus + # * hostname \t max-line-length-allowed + # * hostname \t empty + + sub sshwrapped { + # Wrap with ssh and --env + # Return $default_value if command fails + my $sshlogin = shift; + my $command = shift; + # wrapper that returns output "0\n" if the command fails + # E.g. parallel not installed => "0\n" + my $wcmd = q(perl -e '$a=`).$command.q(`; print $? ? "0".v010 : $a'); + my $commandline = CommandLine->new(1,[$wcmd],{},0,0,[],[],[],[],{},{}); + my $job = Job->new($commandline); + $job->set_sshlogin($sshlogin); + $job->wrapped(); + return($job->{'wrapped'}); + } + + my(@sockets, @cores, @threads, @maxline, @echo); + while (my ($host, $sshlogin) = each %Global::host) { + if($host eq ":") { next } + # The 'true' is used to get the $host out later + push(@sockets, $host."\t"."true $host; ". + sshwrapped($sshlogin,"parallel --number-of-sockets")."\n\0"); + push(@cores, $host."\t"."true $host; ". + sshwrapped($sshlogin,"parallel --number-of-cores")."\n\0"); + push(@threads, $host."\t"."true $host; ". + sshwrapped($sshlogin,"parallel --number-of-threads")."\n\0"); + push(@maxline, $host."\t"."true $host; ". + sshwrapped($sshlogin, + "parallel --max-line-length-allowed")."\n\0"); + # 'echo' is used to get the fastest possible ssh login time + push(@echo, $host."\t"."true $host; ". + $sshlogin->wrap("echo $host")."\n\0"); + } + # --timeout 10: Setting up an SSH connection and running a simple + # command should never take > 10 sec. + # --delay 0.1: If multiple sshlogins use the same proxy the delay + # will make it less likely to overload the ssh daemon. + # --retries 3: If the ssh daemon is overloaded, try 3 times + my $cmd = + "$0 -j0 --timeout 10 --joblog - --plain --delay 0.1 --retries 3 ". + "--tag --tagstring '{1}' -0 --colsep '\t' -k eval '{2}' && true "; + $cmd = $Global::shell." -c ".Q($cmd); + ::debug("init", $cmd, "\n"); + my @out; + my $prepend = ""; + + my ($host_fh,$in,$err); + open3($in, $host_fh, $err, $cmd) || ::die_bug("parallel host check: $cmd"); + ::debug("init", map { $_,"\n" } @sockets, @cores, @threads, @maxline, @echo); + + if(not fork()) { + # Give the commands to run to the $cmd + close $host_fh; + print $in @sockets, @cores, @threads, @maxline, @echo; + close $in; + exit(); + } + close $in; + # If -0: $/ must be \n + local $/ = "\n"; + for(<$host_fh>) { + # TODO incompatible with '-quoting. Needs to be fixed differently + #if(/\'$/) { + # # if last char = ' then append next line + # # This may be due to quoting of \n in environment var + # $prepend .= $_; + # next; + #} + $_ = $prepend . $_; + $prepend = ""; + push @out, $_; + } + close $host_fh; + return @out; +} + +sub onall($@) { + # Runs @command on all hosts. + # Uses parallel to run @command on each host. + # --jobs = number of hosts to run on simultaneously. + # For each host a parallel command with the args will be running. + # Uses: + # $Global::debug + # $Global::exitstatus + # $Global::joblog + # $Global::quoting + # $opt::D + # $opt::arg_file_sep + # $opt::arg_sep + # $opt::colsep + # $opt::files + # $opt::files0 + # $opt::group + # $opt::joblog + # $opt::jobs + # $opt::keeporder + # $opt::linebuffer + # $opt::max_chars + # $opt::plain + # $opt::retries + # $opt::tag + # $opt::tee + # $opt::timeout + # $opt::ungroup + # %Global::host + # @opt::basefile + # @opt::env + # @opt::v + # Input: + # @command = command to run on all hosts + # Returns: N/A + sub tmp_joblog { + # Input: + # $joblog = filename of joblog - undef if none + # Returns: + # $tmpfile = temp file for joblog - undef if none + my $joblog = shift; + if(not defined $joblog) { + return undef; + } + my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log"); + close $fh; + return $tmpfile; + } + my ($input_source_fh_ref,@command) = @_; + if($Global::quoting) { + @command = shell_quote(@command); + } + + # Copy all @input_source_fh (-a and :::) into tempfiles + my @argfiles = (); + for my $fh (@$input_source_fh_ref) { + my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => not $opt::D); + print $outfh (<$fh>); + close $outfh; + push @argfiles, $name; + } + if(@opt::basefile) { setup_basefile(); } + # for each sshlogin do: + # parallel -S $sshlogin $command :::: @argfiles + # + # Pass some of the options to the sub-parallels, not all of them as + # -P should only go to the first, and -S should not be copied at all. + my $options = + join(" ", + ((defined $opt::sshdelay) ? "--delay ".$opt::sshdelay : ""), + ((defined $opt::memfree) ? "--memfree ".$opt::memfree : ""), + ((defined $opt::memsuspend) ? "--memfree ".$opt::memsuspend : ""), + ((defined $opt::D) ? "-D $opt::D" : ""), + ((defined $opt::group) ? "--group" : ""), + ((defined $opt::jobs) ? "-P $opt::jobs" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::linebuffer) ? "--linebuffer" : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ((defined $opt::plain) ? "--plain" : ""), + (($opt::ungroup == 1) ? "-u" : ""), + ((defined $opt::tee) ? "--tee" : ""), + ); + my $suboptions = + join(" ", + ((defined $opt::sshdelay) ? "--delay ".$opt::sshdelay : ""), + ((defined $opt::D) ? "-D $opt::D" : ""), + ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""), + ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""), + ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""), + ((defined $opt::files) ? "--files" : ""), + ((defined $opt::files0) ? "--files0" : ""), + ((defined $opt::group) ? "--group" : ""), + ((defined $opt::cleanup) ? "--cleanup" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::linebuffer) ? "--linebuffer" : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ((defined $opt::plain) ? "--plain" : ""), + ((defined $opt::plus) ? "--plus" : ""), + ((defined $opt::retries) ? "--retries ".$opt::retries : ""), + ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""), + (($opt::ungroup == 1) ? "-u" : ""), + ((defined $opt::ssh) ? "--ssh '".$opt::ssh."'" : ""), + ((defined $opt::tee) ? "--tee" : ""), + ((defined $opt::workdir) ? "--wd ".Q($opt::workdir) : ""), + (@Global::transfer_files ? map { "--tf ".Q($_) } + @Global::transfer_files : ""), + (@Global::ret_files ? map { "--return ".Q($_) } + @Global::ret_files : ""), + (@opt::env ? map { "--env ".Q($_) } @opt::env : ""), + (map { "-v" } @opt::v), + ); + ::debug("init", "| $0 $options\n"); + open(my $parallel_fh, "|-", "$0 -0 --will-cite -j0 $options") || + ::die_bug("This does not run GNU Parallel: $0 $options"); + my @joblogs; + for my $host (sort keys %Global::host) { + my $sshlogin = $Global::host{$host}; + my $qsshlogin = Q($sshlogin->string()); + my $qsshloginpw = Q($sshlogin->pwstring()); + if($qsshloginpw ne $qsshlogin) { + ::warning_once("Using password or SSHPASS with --(n)onall ". + "exposes the password", + "on the command line, ". + "making it visible to local users via `ps`."); + } + my $joblog = tmp_joblog($opt::joblog); + if($joblog) { + push @joblogs, $joblog; + $joblog = "--joblog ".::Q($joblog); + } + my $quad = $opt::arg_file_sep || "::::"; + # If PARALLEL_ENV is set: Pass it on + my $penv=$Global::parallel_env ? + "PARALLEL_ENV=".Q($Global::parallel_env) : ''; + my $results; + if(defined $opt::results) { + $results = Q($opt::results) . $qsshlogin; + } + ::debug("init", "$penv $0 $suboptions -j1 $joblog ", + ((defined $opt::tag) ? "--tagstring ".$qsshlogin : ""), + ((defined $opt::results) ? "--results ".$results : ""), + " -S $qsshloginpw ", + join(" ",shell_quote(@command,$quad,@argfiles)),"\n"); + print $parallel_fh "$penv $0 $suboptions -j1 $joblog ", + ((defined $opt::tag) ? "--tagstring ".$qsshlogin : ""), + ((defined $opt::results) ? "--results ".$results : ""), + " -S $qsshloginpw ", + join(" ",shell_quote(@command,$quad,@argfiles)),"\0"; + } + close $parallel_fh; + $Global::exitstatus = $? >> 8; + debug("init", "--onall exitvalue ", $?); + if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); } + $Global::debug or unlink(@argfiles); + my %seen; + for my $joblog (@joblogs) { + # Append to $joblog + my $fh = open_or_exit("<", $joblog); + # Skip first line (header); + <$fh>; + print $Global::joblog (<$fh>); + close $fh; + unlink($joblog); + } +} + + +sub __SIGNAL_HANDLING__() {} + + +sub sigtstp() { + # Send TSTP signal (Ctrl-Z) to all children process groups + # Uses: + # %SIG + # Returns: N/A + signal_children("TSTP"); +} + +sub sigpipe() { + # Send SIGPIPE signal to all children process groups + # Uses: + # %SIG + # Returns: N/A + signal_children("PIPE"); +} + +sub signal_children() { + # Send signal to all children process groups + # and GNU Parallel itself + # Uses: + # %SIG + # Returns: N/A + my $signal = shift; + debug("run", "Sending $signal "); + kill $signal, map { -$_ } keys %Global::running; + # Use default signal handler for GNU Parallel itself + $SIG{$signal} = undef; + kill $signal, $$; +} + +sub save_original_signal_handler() { + # Remember the original signal handler + # Uses: + # %Global::original_sig + # Returns: N/A + $SIG{INT} = sub { + if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); } + wait_and_exit(255); + }; + $SIG{TERM} = sub { + if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); } + wait_and_exit(255); + }; + %Global::original_sig = %SIG; + $SIG{TERM} = sub {}; # Dummy until jobs really start + $SIG{ALRM} = 'IGNORE'; + # Allow Ctrl-Z to suspend and `fg` to continue + $SIG{TSTP} = \&sigtstp; + $SIG{PIPE} = \&sigpipe; + $SIG{CONT} = sub { + # Set $SIG{TSTP} again (it is undef'ed in sigtstp() ) + $SIG{TSTP} = \&sigtstp; + for my $job (values %Global::running) { + if($job->suspended()) { + # Force jobs to suspend, if they are marked as suspended. + # --memsupspend can suspend a job that will be resumed + # if the user presses CTRL-Z followed by `fg`. + $job->suspend(); + } else { + # Resume the rest of the jobs + $job->resume(); + } + } + }; +} + +sub list_running_jobs() { + # Print running jobs on tty + # Uses: + # %Global::running + # Returns: N/A + for my $job (values %Global::running) { + ::status("$Global::progname: ".$job->replaced()); + } +} + +sub start_no_new_jobs() { + # Start no more jobs + # Uses: + # %Global::original_sig + # %Global::unlink + # $Global::start_no_new_jobs + # Returns: N/A + unlink keys %Global::unlink; + ::status + ("$Global::progname: SIGHUP received. No new jobs will be started.", + "$Global::progname: Waiting for these ".(keys %Global::running). + " jobs to finish. Send SIGTERM to stop now."); + list_running_jobs(); + $Global::start_no_new_jobs ||= 1; +} + +sub reapers() { + # Run reaper until there are no more left + # Returns: + # @pids_reaped = pids of reaped processes + my @pids_reaped; + my $pid; + while($pid = reaper()) { + push @pids_reaped, $pid; + } + return @pids_reaped; +} + +sub reaper() { + # A job finished: + # * Set exitstatus, exitsignal, endtime. + # * Free ressources for new job + # * Update median runtime + # * Print output + # * If --halt = now: Kill children + # * Print progress + # Uses: + # %Global::running + # $opt::timeout + # $Global::timeoutq + # $opt::keeporder + # $Global::total_running + # Returns: + # $stiff = PID of child finished + my $stiff; + debug("run", "Reaper "); + if(($stiff = waitpid(-1, &WNOHANG)) <= 0) { + # No jobs waiting to be reaped + return 0; + } + + # $stiff = pid of dead process + my $job = $Global::running{$stiff}; + + # '-a <(seq 10)' will give us a pid not in %Global::running + # The same will one of the ssh -M: ignore + $job or return 0; + delete $Global::running{$stiff}; + $Global::total_running--; + if($job->{'commandline'}{'skip'}) { + # $job->skip() was called + $job->set_exitstatus(-2); + $job->set_exitsignal(0); + } else { + $job->set_exitsignal($? & 127); + if($job->exitstatus()) { + # Exit status already set - probably by --timeout + } elsif($? & 127) { + # Killed by signal. Many shells return: 128 | $signal + $job->set_exitstatus(128 | $?); + } else { + # Normal exit + $job->set_exitstatus($? >> 8); + } + } + + debug("run", "\nseq ",$job->seq()," died (", $job->exitstatus(), ")"); + if($Global::delayauto or $Global::sshdelayauto) { + if($job->exitstatus()) { + # Job failed: Increase delay (if $opt::(ssh)delay set) + $opt::delay &&= $opt::delay * 1.3; + $opt::sshdelay &&= $opt::sshdelay * 1.3; + } else { + # Job succeeded: Decrease delay (if $opt::(ssh)delay set) + $opt::delay &&= $opt::delay * 0.9; + $opt::sshdelay &&= $opt::sshdelay * 0.9; + } + debug("run", "delay:$opt::delay ssh:$opt::sshdelay "); + } + $job->set_endtime(::now()); + my $sshlogin = $job->sshlogin(); + $sshlogin->dec_jobs_running(); + if($job->should_be_retried()) { + # Free up file handles + $job->free_ressources(); + } else { + # The job is done + $sshlogin->inc_jobs_completed(); + # Free the jobslot + $job->free_slot(); + if($opt::timeout and not $job->exitstatus()) { + # Update average runtime for timeout only for successful jobs + $Global::timeoutq->update_median_runtime($job->runtime()); + } + if($opt::keeporder and not $opt::latestline) { + # --latestline fixes --keeporder in Job::row() + $job->print_earlier_jobs(); + } else { + $job->print(); + } + if($job->should_we_halt() eq "now") { + # Kill children + ::kill_sleep_seq($job->pid()); + ::killall(); + ::wait_and_exit($Global::halt_exitstatus); + } + } + $job->cleanup(); + + if($opt::progress) { + my $progress = progress(); + ::status_no_nl("\r",$progress->{'status'}); + } + + debug("run", "jobdone \n"); + return $stiff; +} + + +sub __USAGE__() {} + + +sub killall() { + # Kill all jobs by killing their process groups + # Uses: + # $Global::start_no_new_jobs = we are stopping + # $Global::killall = Flag to not run reaper + $Global::start_no_new_jobs ||= 1; + # Do not reap killed children: Ignore them instead + $Global::killall ||= 1; + kill_sleep_seq(keys %Global::running); +} + +sub kill_sleep_seq(@) { + # Send jobs TERM,TERM,KILL to processgroups + # Input: + # @pids = list of pids that are also processgroups + # Convert pids to process groups ($processgroup = -$pid) + my @pgrps = map { -$_ } @_; + my @term_seq = split/,/,$opt::termseq; + if(not @term_seq) { + @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25); + } + # for each signal+waittime: kill process groups still not dead + while(@term_seq) { + @pgrps = kill_sleep(shift @term_seq, shift @term_seq, @pgrps); + } +} + +sub kill_sleep() { + # Kill pids with a signal and wait a while for them to die + # Input: + # $signal = signal to send to @pids + # $sleep_max = number of ms to sleep at most before returning + # @pids = pids to kill (actually process groups) + # Uses: + # $Global::killall = set by killall() to avoid calling reaper + # Returns: + # @pids = pids still alive + my ($signal, $sleep_max, @pids) = @_; + ::debug("kill","kill_sleep $signal ",(join " ",sort @pids),"\n"); + kill $signal, @pids; + my $sleepsum = 0; + my $sleep = 0.001; + + while(@pids and $sleepsum < $sleep_max) { + if($Global::killall) { + # Killall => don't run reaper + while(waitpid(-1, &WNOHANG) > 0) { + $sleep = $sleep/2+0.001; + } + } elsif(reapers()) { + $sleep = $sleep/2+0.001; + } + $sleep *= 1.1; + ::usleep($sleep); + $sleepsum += $sleep; + # Keep only living children + @pids = grep { kill(0, $_) } @pids; + } + return @pids; +} + +sub wait_and_exit($) { + # If we do not wait, we sometimes get segfault + # Returns: N/A + my $error = shift; + unlink keys %Global::unlink; + if($error) { + # Kill all jobs without printing + killall(); + } + for (keys %Global::unkilled_children) { + # Kill any (non-jobs) children (e.g. reserved processes) + kill 9, $_; + waitpid($_,0); + delete $Global::unkilled_children{$_}; + } + if($Global::unkilled_sqlworker) { + waitpid($Global::unkilled_sqlworker,0); + } + # Avoid: Warning: unable to close filehandle properly: No space + # left on device during global destruction. + $SIG{__WARN__} = sub {}; + if($opt::_parset) { + # Make the shell script return $error + print "$Global::parset_endstring\nreturn $error"; + } + exit($error); +} + +sub die_usage() { + # Returns: N/A + usage(); + wait_and_exit(255); +} + +sub usage() { + # Returns: N/A + print join + ("\n", + "Usage:", + "", + "$Global::progname [options] [command [arguments]] < list_of_arguments", + "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...", + "cat ... | $Global::progname --pipe [options] [command [arguments]]", + "", + "-j n Run n jobs in parallel", + "-k Keep same order", + "-X Multiple arguments with context replace", + "--colsep regexp Split input on regexp for positional replacements", + "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings", + "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings", + "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =", + " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}", + "", + "-S sshlogin Example: foo\@server.example.com", + "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins", + "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup", + "--onall Run the given command with argument on all sshlogins", + "--nonall Run the given command with no arguments on all sshlogins", + "", + "--pipe Split stdin (standard input) to multiple jobs.", + "--recend str Record end separator for --pipe.", + "--recstart str Record start separator for --pipe.", + "", + "GNU Parallel can do much more. See 'man $Global::progname' for details", + "", + "Academic tradition requires you to cite works you base your article on.", + "If you use programs that use GNU Parallel to process data for an article in a", + "scientific publication, please cite:", + "", + " Tange, O. (2024, December 22). GNU Parallel 20241222 ('Bashar').", + " Zenodo. https://doi.org/10.5281/zenodo.14550073", + "", + # Before changing these lines, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be put in a public hall of shame by removing + # these lines + "This helps funding further development; AND IT WON'T COST YOU A CENT.", + "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.", + "", + "",); +} + +sub citation_notice() { + # if --will-cite or --plain: do nothing + # if stderr redirected: do nothing + # if $PARALLEL_HOME/will-cite: do nothing + # else: print citation notice to stderr + if($opt::willcite + or + $opt::plain + or + not -t $Global::original_stderr + or + grep { -e "$_/will-cite" } @Global::config_dirs) { + # skip + } else { + ::status + ("Academic tradition requires you to cite works you base your article on.", + "If you use programs that use GNU Parallel to process data for an article in a", + "scientific publication, please cite:", + "", + " Tange, O. (2024, December 22). GNU Parallel 20241222 ('Bashar').", + " Zenodo. https://doi.org/10.5281/zenodo.14550073", + "", + # Before changing these line, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice and + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be put in a public hall of shame by removing these lines + "This helps funding further development; AND IT WON'T COST YOU A CENT.", + "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.", + "", + "More about funding GNU Parallel and the citation notice:", + "https://www.gnu.org/software/parallel/parallel_design.html#citation-notice", + "", + "To silence this citation notice: run 'parallel --citation' once.", + "" + ); + mkdir $Global::config_dir; + # Number of times the user has run GNU Parallel without showing + # willingness to cite + my $runs = 0; + if(open (my $fh, "<", $Global::config_dir. + "/runs-without-willing-to-cite")) { + $runs = <$fh>; + close $fh; + } + $runs++; + if(open (my $fh, ">", $Global::config_dir. + "/runs-without-willing-to-cite")) { + print $fh $runs; + close $fh; + if($runs >= 10) { + ::status("Come on: You have run parallel $runs times. ". + "Isn't it about time ", + "you run 'parallel --citation' once to silence ". + "the citation notice?", + ""); + } + } + } +} + +sub status(@) { + my @w = @_; + my $fh = $Global::status_fd || *STDERR; + print $fh map { ($_, "\n") } @w; + flush $fh; +} + +sub status_no_nl(@) { + my @w = @_; + my $fh = $Global::status_fd || *STDERR; + print $fh @w; + flush $fh; +} + +sub warning(@) { + my @w = @_; + my $prog = $Global::progname || "parallel"; + status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w); +} + +{ + my %warnings; + sub warning_once(@) { + my @w = @_; + my $prog = $Global::progname || "parallel"; + $warnings{@w}++ or + status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w); + } +} + +sub error(@) { + my @w = @_; + my $prog = $Global::progname || "parallel"; + status(map { ($prog.": Error: ". $_); } @w); +} + +sub die_bug($) { + my $bugid = shift; + print STDERR + ("$Global::progname: This should not happen. You have found a bug. ", + "Please follow\n", + "https://www.gnu.org/software/parallel/man.html#reporting-bugs\n", + "\n", + "Include this in the report:\n", + "* The version number: $Global::version\n", + "* The bugid: $bugid\n", + "* The command line being run\n", + "* The files being read (put the files on a webserver if they are big)\n", + "\n", + "If you get the error on smaller/fewer files, please include those instead.\n"); + ::wait_and_exit(255); +} + +sub version() { + # Returns: N/A + print join + ("\n", + "GNU $Global::progname $Global::version", + "Copyright (C) 2007-2024 Ole Tange, http://ole.tange.dk and Free Software", + "Foundation, Inc.", + "License GPLv3+: GNU GPL version 3 or later ", + "This is free software: you are free to change and redistribute it.", + "GNU $Global::progname comes with no warranty.", + "", + "Web site: https://www.gnu.org/software/${Global::progname}\n", + "When using programs that use GNU Parallel to process data for publication", + "please cite as described in 'parallel --citation'.\n", + ); +} + +sub citation() { + # Returns: N/A + my ($all_argv_ref,$argv_options_removed_ref) = @_; + my $all_argv = "@$all_argv_ref"; + my $no_opts = "@$argv_options_removed_ref"; + $all_argv=~s/--citation//; + if($all_argv ne $no_opts) { + ::warning("--citation ignores all other options and arguments."); + ::status(""); + } + + ::status( + "Academic tradition requires you to cite works you base your article on.", + "If you use programs that use GNU Parallel to process data for an article in a", + "scientific publication, please cite:", + "", + "\@software{tange_2024_14550073,", + " author = {Tange, Ole},", + " title = {GNU Parallel 20241222 ('Bashar')},", + " month = Dec,", + " year = 2024,", + " note = {{GNU Parallel is a general parallelizer to run", + " multiple serial command line programs in parallel", + " without changing them.}},", + " publisher = {Zenodo},", + " doi = {10.5281/zenodo.14550073},", + " url = {https://doi.org/10.5281/zenodo.14550073}", + "}", + "", + "(Feel free to use \\nocite{tange_2024_14550073})", + "", + # Before changing these lines, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice and + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be put in a public hall of shame by removing + # these lines + "This helps funding further development; AND IT WON'T COST YOU A CENT.", + "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.", + "", + "More about funding GNU Parallel and the citation notice:", + "https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html", + "https://www.gnu.org/software/parallel/parallel_design.html#citation-notice", + "https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt", + "" + ); + while(not grep { -e "$_/will-cite" } @Global::config_dirs) { + print "\nType: 'will cite' and press enter.\n> "; + my $input = ; + if(not defined $input) { + exit(255); + } + if($input =~ /will cite/i) { + if(mkdir $Global::config_dir) { + # Recompute @Global::config_dirs so we can break out of the loop. + init_globals(); + } + if(open (my $fh, ">", $Global::config_dir."/will-cite")) { + close $fh; + ::status( + "", + "Thank you for your support: You are the reason why there is funding to", + "continue maintaining GNU Parallel. On behalf of future versions of", + "GNU Parallel, which would not exist without your support:", + "", + " THANK YOU SO MUCH", + "", + "It is really appreciated. The citation notice is now silenced.", + ""); + } else { + ::status( + "", + "Thank you for your support. It is much appreciated. The citation", + "cannot permanently be silenced. Use '--will-cite' instead.", + "", + "If you use '--will-cite' in scripts to be run by others you are making", + "it harder for others to see the citation notice. The development of", + "GNU Parallel is indirectly financed through citations, so if users", + "do not know they should cite then you are making it harder to finance", + "development. However, if you pay 10000 EUR, you should feel free to", + "use '--will-cite' in scripts.", + ""); + last; + } + } + } +} + +sub show_limits() { + # Returns: N/A + print("Maximal size of command: ",Limits::Command::real_max_length(),"\n", + "Maximal usable size of command: ", + $Global::usable_command_line_length,"\n", + "\n", + "Execution will continue now, ", + "and it will try to read its input\n", + "and run commands; if this is not ", + "what you wanted to happen, please\n", + "press CTRL-D or CTRL-C\n"); +} + +sub embed() { + # Give an embeddable version of GNU Parallel + # Tested with: bash, zsh, ksh, ash, dash, sh + my $randomstring = "cut-here-".join"", + map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20); + if(not -f $0 or not -r $0) { + ::error("--embed only works if parallel is a readable file"); + exit(255); + } + # Read the source from $0 + my $source = slurp_or_exit($0); + my $user = $ENV{LOGNAME} || $ENV{USERNAME} || $ENV{USER}; + my $env_parallel_source; + my $shell = $Global::shell; + $shell =~ s:.*/::; + for(which("env_parallel.$shell")) { + -r $_ or next; + # Read the source of env_parallel.shellname + $env_parallel_source .= slurp_or_exit($_); + last; + } + print "#!$Global::shell + +# Copyright (C) 2007-2024 $user, Ole Tange, http://ole.tange.dk +# and Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +"; + + print q! +# Embedded GNU Parallel created with --embed +parallel() { + # Start GNU Parallel without leaving temporary files + # + # Not all shells support 'perl <(cat ...)' + # This is a complex way of doing: + # perl <(cat <<'cut-here' + # [...] + # ) "$@" + # and also avoiding: + # [1]+ Done cat + + # Make a temporary fifo that perl can read from + _fifo_with_GNU_Parallel_source=`perl -e 'use POSIX qw(mkfifo); + do { + $f = "/tmp/parallel-".join"", + map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $f); + mkfifo($f,0600); + print $f;'` + # Put source code into temporary file + # so it is easy to copy to the fifo + _file_with_GNU_Parallel_source=`mktemp`; +!, + "cat <<'$randomstring' > \$_file_with_GNU_Parallel_source\n", + $source, + $randomstring,"\n", + q! + # Copy the source code from the file to the fifo + # and remove the file and fifo ASAP + # 'sh -c' is needed to avoid + # [1]+ Done cat + sh -c "(rm $_file_with_GNU_Parallel_source; cat >$_fifo_with_GNU_Parallel_source; rm $_fifo_with_GNU_Parallel_source) < $_file_with_GNU_Parallel_source &" + + # Read the source from the fifo + perl $_fifo_with_GNU_Parallel_source "$@" +} +!, + $env_parallel_source, + q! + +# This will call the functions above +parallel -k echo ::: Put your code here +env_parallel --session +env_parallel -k echo ::: Put your code here +parset p,y,c,h -k echo ::: Put your code here +echo $p $y $c $h +echo You can also activate GNU Parallel for interactive use by: +echo . "$0" +!; + ::status("Redirect the output to a file and add your changes at the end:", + " $0 --embed > new_script"); +} + +sub pack_combined_executable { + my ($before_ref,$with_argsep_ref,$argv_ref) = @_; + my @parallelopts; + my $skip_next; + # Remove '--combine-exec file' from options + for(@{$before_ref}[0..(arrayindex($before_ref,$with_argsep_ref))-1]) { + if (/^--combine-?exec(utable)?$/ || $skip_next) { + # Also skip the filename given to --combine-exec + $skip_next = !$skip_next; + next; + } + push @parallelopts, $_; + } + # From ::: and to end + my @argsep = @{$with_argsep_ref}[($#ARGV+1)..$#$with_argsep_ref]; + # The executable is now the first in @ARGV + my $execname = shift @ARGV; + # The rest of @ARGV are options for $execname + my @execopts = @ARGV; + debug("combine", + "Parallel opts: @parallelopts ", + "Executable: $execname ", + "Execopts: @execopts ", + "Argsep: @argsep\n"); + # Read the the executable + my $exec = slurp_or_exit(which($execname)); + # Read the source of GNU Parallel and the executable + my $parallel = slurp_or_exit($0); + # Remove possibly __END__ from GNU Parallel + $parallel =~ s/^__END__.*//s; + if(-t $Global::original_stderr) { + ::status( + "Please be aware that combining GNU Parallel and '$execname'", + "into a combined executable will make the whole executable", + "licensed under GPLv3 (section 5.c).", + "", + "If the license of '$execname' is incompatible with GPLv3,", + "you cannot legally convey copies of the combined executable", + "to others. You can, however, still run them yourself.", + "", + "The combined executable will not have a citation notice,", + "so it is your resposibilty to advice that academic tradition", + "requires the users to cite GNU Parallel.", + "" + ); + my $input; + do { + ::status_no_nl("\nType: 'I agree' and press enter.\n> "); + $input = ; + if(not defined $input) { + exit(255); + } + } until($input =~ /I agree/i); + } + write_or_exit($opt::combineexec, + $parallel, + "\n__END__\n", + (map { "$_\0\n" } @parallelopts), "\0\0\n", + $execname, "\0\0\n", + (map { "$_\0\n" } @execopts), "\0\0\n", + (map { "$_\0\n" } @argsep), "\0\0\n", + $exec); + # Set +x permission + chmod 0700, $opt::combineexec; + exit(0); +} + +sub unpack_combined_executable { + # If the script is a combined executable, + # it will have stuff in (I.e. after __END__) + my $combine_exec = join("",); + if(length $combine_exec) { + # Parse the + # + # __END__ + # Option for GNU Parallel\0\n + # Option for GNU Parallel\0\n + # \0\0\n + # Name of executable\0\0\n + # Option for executable\0\n + # Option for executable\0\n + # \0\0\n + # argsep + args if any\0\n + # argsep + args if any\0\n + # \0\0\n + # <> + # + # parallel --combine --pipe -j10% --recend '' myscript --myopt myval + # __END__ + # --pipe\0\n --pipe + # -j10%\0\n -j10% + # --recend\0\n --recend + # \0\n '' + # \0\0\n end-of-parallel-options + # myscript\0\0\n myscript + # --myopt\0\n --myopt + # myval\0\n myval + # \0\0\n end-of-myscript-options + # \0\0\n no argsep + # <> + # + # parallel --combine -j10% myscript ::: + # __END__ + # -j10%\0\n + # \0\0\n end-of-parallel-options + # myscript\0\0\n + # \0\0\n end-of-myscript-options + # :::\0\n + # \0\0\n + # <> + + my ($opts,$execname,$execopts,$argsep,$exec) = + split /\0\0\n/,$combine_exec,5; + # Make a tmpdir with a file called $execname + local %ENV; + $ENV{TMPDIR} ||= "/tmp"; + my $dir = File::Temp::tempdir($ENV{'TMPDIR'} . "/parXXXXX", CLEANUP => 1); + my $script = $dir."/".$execname; + write_or_exit($script,$exec); + # Set +x permission + chmod 0700, $script; + # Mark it for unlinking later + $Global::unlink{$script}++; + $Global::unlink{$dir}++; + # pass the options for GNU Parallel + my @opts = split /\0\n/, $opts; + my @execopts = split /\0\n/, $execopts; + if(length $argsep) { + # Only add argsep if set + unshift(@ARGV, split(/\0\n/,$argsep)); + } + unshift(@ARGV,@opts,$script,@execopts); + } +} + + +sub __GENERIC_COMMON_FUNCTION__() {} + + +sub mkdir_or_die($) { + # If dir is not executable: die + my $dir = shift; + # The eval is needed to catch exception from mkdir + eval { File::Path::mkpath($dir); }; + if(not -x $dir) { + ::error("Cannot change into non-executable dir $dir: $!"); + ::wait_and_exit(255); + } +} + +sub tmpfile(@) { + # Create tempfile as $TMPDIR/parXXXXX + # Returns: + # $filehandle = opened file handle + # $filename = file name created + my($filehandle,$filename) = + ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_); + if(wantarray) { + return($filehandle,$filename); + } else { + # Separate unlink due to NFS dealing badly with File::Temp + unlink $filename; + return $filehandle; + } +} + +sub tmpname($) { + # Select a name that does not exist + # Do not create the file as it may be used for creating a socket (by tmux) + # Remember the name in $Global::unlink to avoid hitting the same name twice + my $name = shift; + my($tmpname); + if(not -w $ENV{'TMPDIR'}) { + my $qtmp = ::Q($ENV{'TMPDIR'}); + if(not -e $ENV{'TMPDIR'}) { + ::error("Tmpdir $qtmp does not exist.","Try: mkdir -p $qtmp"); + } else { + ::error("Tmpdir $qtmp is not writable.","Try: chmod +w $qtmp"); + } + ::wait_and_exit(255); + } + do { + $tmpname = $ENV{'TMPDIR'}."/".$name. + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $tmpname or $Global::unlink{$tmpname}++); + return $tmpname; +} + +sub tmpfifo() { + # Find an unused name and mkfifo on it + my $tmpfifo = tmpname("fif"); + mkfifo($tmpfifo,0600); + return $tmpfifo; +} + +sub rm(@) { + # Remove file and remove it from %Global::unlink + # Uses: + # %Global::unlink + delete @Global::unlink{@_}; + unlink @_; +} + +sub size_of_block_dev() { + # Like -s but for block devices + # Input: + # $blockdev = file name of block device + # Returns: + # $size = in bytes, undef if error + my $blockdev = shift; + my $fh = open_or_exit("<", $blockdev); + seek($fh,0,2) || ::die_bug("cannot seek $blockdev"); + my $size = tell($fh); + close $fh; + return $size; +} + +sub qqx(@) { + # Like qx but with clean environment (except for @keep) + # and STDERR ignored + # This is needed if the environment contains functions + # that /bin/sh does not understand + my %env; + # ssh with ssh-agent needs PATH SSH_AUTH_SOCK SSH_AGENT_PID + # ssh with Kerberos needs KRB5CCNAME + # sshpass needs SSHPASS + # tmux needs LC_CTYPE + # lsh needs HOME LOGNAME + my @keep = qw(PATH SSH_AUTH_SOCK SSH_AGENT_PID KRB5CCNAME LC_CTYPE + HOME LOGNAME SSHPASS); + @env{@keep} = @ENV{@keep}; + local %ENV; + %ENV = %env; + if($Global::debug) { + # && true is to force spawning a shell and not just exec'ing + return qx{ @_ && true }; + } else { + # CygWin does not respect 2>/dev/null + # so we do that by hand + # This trick does not work: + # https://stackoverflow.com/q/13833088/363028 + # local *STDERR; + # open(STDERR, ">", "/dev/null"); + open(local *CHILD_STDIN, '<', '/dev/null') or die $!; + open(local *CHILD_STDERR, '>', '/dev/null') or die $!; + my $out; + # eval is needed if open3 fails (e.g. command line too long) + eval { + my $pid = open3( + '<&CHILD_STDIN', + $out, + '>&CHILD_STDERR', + # && true is to force spawning a shell and not just exec'ing + "@_ && true"); + my @arr = <$out>; + close $out; + # Make sure $? is set + waitpid($pid, 0); + return wantarray ? @arr : join "",@arr; + } or do { + # If eval fails, force $?=false + `false`; + }; + } +} + +sub uniq(@) { + # Remove duplicates and return unique values + return keys %{{ map { $_ => 1 } @_ }}; +} + +sub min(@) { + # Returns: + # Minimum value of array + my $min; + for (@_) { + # Skip undefs + defined $_ or next; + defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef + $min = ($min < $_) ? $min : $_; + } + return $min; +} + +sub max(@) { + # Returns: + # Maximum value of array + my $max; + for (@_) { + # Skip undefs + defined $_ or next; + defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef + $max = ($max > $_) ? $max : $_; + } + return $max; +} + +sub sum(@) { + # Returns: + # Sum of values of array + my @args = @_; + my $sum = 0; + for (@args) { + # Skip undefs + $_ and do { $sum += $_; } + } + return $sum; +} + +sub undef_as_zero($) { + my $a = shift; + return $a ? $a : 0; +} + +sub undef_as_empty($) { + my $a = shift; + return $a ? $a : ""; +} + +sub undef_if_empty($) { + if(defined($_[0]) and $_[0] eq "") { + return undef; + } + return $_[0]; +} + +sub multiply_binary_prefix(@) { + # Evalualte numbers with binary prefix + # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80 + # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80 + # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80 + # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24 + # 13G = 13*1024*1024*1024 = 13958643712 + # Input: + # $s = string with prefixes + # Returns: + # $value = int with prefixes multiplied + my @v = @_; + for(@v) { + defined $_ or next; + s/ki/*1024/gi; + s/mi/*1024*1024/gi; + s/gi/*1024*1024*1024/gi; + s/ti/*1024*1024*1024*1024/gi; + s/pi/*1024*1024*1024*1024*1024/gi; + s/ei/*1024*1024*1024*1024*1024*1024/gi; + s/zi/*1024*1024*1024*1024*1024*1024*1024/gi; + s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi; + s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi; + + s/K/*1024/g; + s/M/*1024*1024/g; + s/G/*1024*1024*1024/g; + s/T/*1024*1024*1024*1024/g; + s/P/*1024*1024*1024*1024*1024/g; + s/E/*1024*1024*1024*1024*1024*1024/g; + s/Z/*1024*1024*1024*1024*1024*1024*1024/g; + s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g; + s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g; + + s/k/*1000/g; + s/m/*1000*1000/g; + s/g/*1000*1000*1000/g; + s/t/*1000*1000*1000*1000/g; + s/p/*1000*1000*1000*1000*1000/g; + s/e/*1000*1000*1000*1000*1000*1000/g; + s/z/*1000*1000*1000*1000*1000*1000*1000/g; + s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g; + s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g; + + $_ = eval $_; + } + return wantarray ? @v : $v[0]; +} + +sub multiply_time_units($) { + # Evalualte numbers with time units + # s=1, m=60, h=3600, d=86400 + # Input: + # $s = string time units + # Returns: + # $value = int in seconds + my @v = @_; + for(@v) { + defined $_ or next; + if(/[dhms]/i) { + s/s/*1+/gi; + s/m/*60+/gi; + s/h/*3600+/gi; + s/d/*86400+/gi; + # 1m/3 => 1*60+/3 => 1*60/3 + s/\+(\D)/$1/gi; + } + $_ = eval $_."-0"; + } + return wantarray ? @v : $v[0]; +} + +sub seconds_to_time_units() { + # Convert seconds into ??d??h??m??s + # s=1, m=60, h=3600, d=86400 + # Input: + # $s = int in seconds + # Returns: + # $str = string time units + my $s = shift; + my $str; + my $d = int($s/86400); + $s -= $d * 86400; + my $h = int($s/3600); + $s -= $h * 3600; + my $m = int($s/60); + $s -= $m * 60; + if($d) { + $str = sprintf("%dd%02dh%02dm%02ds",$d,$h,$m,$s); + } elsif($h) { + $str = sprintf("%dh%02dm%02ds",$h,$m,$s); + } elsif($m) { + $str = sprintf("%dm%02ds",$m,$s); + } else { + $str = sprintf("%ds",$s); + } + return $str; +} + +{ + my ($disk_full_fh, $b8193, $error_printed); + sub exit_if_disk_full() { + # Checks if $TMPDIR is full by writing 8kb to a tmpfile + # If the disk is full: Exit immediately. + # Returns: + # N/A + if(not $disk_full_fh) { + $disk_full_fh = ::tmpfile(SUFFIX => ".df"); + $b8193 = "b"x8193; + } + # Linux does not discover if a disk is full if writing <= 8192 + # Tested on: + # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos + # ntfs reiserfs tmpfs ubifs vfat xfs + # TODO this should be tested on different OS similar to this: + # + # doit() { + # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop + # seq 100000 | parallel --tmpdir /mnt/loop/ true & + # seq 6900000 > /mnt/loop/i && echo seq OK + # seq 6980868 > /mnt/loop/i + # seq 10000 > /mnt/loop/ii + # sleep 3 + # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/ + # echo >&2 + # } + print $disk_full_fh $b8193; + if(not $disk_full_fh + or + tell $disk_full_fh != 8193) { + # On raspbian the disk can be full except for 10 chars. + if(not $error_printed) { + ::error("Output is incomplete.", + "Cannot append to buffer file in $ENV{'TMPDIR'}.", + "Is the disk full?", + "Change \$TMPDIR with --tmpdir or use --compress."); + $error_printed = 1; + } + ::wait_and_exit(255); + } + truncate $disk_full_fh, 0; + seek($disk_full_fh, 0, 0) || die; + } +} + +sub spacefree($$) { + # Remove comments and spaces + # Inputs: + # $spaces = keep 1 space? + # $s = string to remove spaces from + # Returns: + # $s = with spaces removed + my $spaces = shift; + my $s = shift; + $s =~ s/#.*//mg; + if(1 == $spaces) { + $s =~ s/\s+/ /mg; + } elsif(2 == $spaces) { + # Keep newlines + $s =~ s/\n\n+/\n/sg; + $s =~ s/[ \t]+/ /mg; + } elsif(3 == $spaces) { + # Keep perl code required space + $s =~ s{([^a-zA-Z0-9/])\s+}{$1}sg; + $s =~ s{([a-zA-Z0-9/])\s+([^:a-zA-Z0-9/])}{$1$2}sg; + } else { + $s =~ s/\s//mg; + } + return $s; +} + +{ + my $hostname; + sub hostname() { + local $/ = "\n"; + if(not $hostname) { + $hostname = `hostname`; + chomp($hostname); + $hostname ||= "nohostname"; + } + return $hostname; + } +} + +sub which(@) { + # Input: + # @programs = programs to find the path to + # Returns: + # @full_path = full paths to @programs. Nothing if not found + my @which; + for my $prg (@_) { + push(@which, grep { not -d $_ and -x $_ } + map { $_."/".$prg } split(":",$ENV{'PATH'})); + if($prg =~ m:/:) { + # Test if program with full path exists + push(@which, grep { not -d $_ and -x $_ } $prg); + } + } + ::debug("which", "$which[0] in $ENV{'PATH'}\n"); + return wantarray ? @which : $which[0]; +} + +{ + my ($regexp,$shell,%fakename); + + sub parent_shell { + # Input: + # $pid = pid to see if (grand)*parent is a shell + # Returns: + # $shellpath = path to shell - undef if no shell found + my $pid = shift; + ::debug("init","Parent of $pid\n"); + if(not $regexp) { + # All shells known to mankind + # + # ash bash csh dash fdsh fish fizsh ion ksh ksh93 mksh pdksh + # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh + + my @shells = (qw(ash bash bsd-csh csh dash fdsh fish fizsh ion ksh + ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh + static-sh tcsh yash zsh -sh -csh -bash), + '-sh (sh)' # sh on FreeBSD + ); + # Can be formatted as: + # [sh] -sh sh busybox sh -sh (sh) + # /bin/sh /sbin/sh /opt/csw/sh + # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh + $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")"; + $regexp = '^((\[)(-?)('. $shell. ')(\])|(|\S+/|\S*busybox |\S*rosetta )'. + '(-?)('. $shell. '))( *$| [^(])'; + %fakename = ( + # sh disguises itself as -sh (sh) on FreeBSD + "-sh (sh)" => ["sh"], + # csh and tcsh disguise themselves as -sh/-csh + # E.g.: ssh -tt csh@lo 'ps aux;true' |egrep ^csh + # but sh also disguises itself as -sh + # (TODO When does that happen?) + "-sh" => ["sh"], + "-csh" => ["tcsh", "csh"], + # ash disguises itself as -ash + "-ash" => ["ash", "dash", "sh"], + # dash disguises itself as -dash + "-dash" => ["dash", "ash", "sh"], + # bash disguises itself as -bash + "-bash" => ["bash", "sh"], + # ksh disguises itself as -ksh + "-ksh" => ["ksh", "sh"], + # zsh disguises itself as -zsh + "-zsh" => ["zsh", "sh"], + ); + } + if($^O eq "linux") { + # Optimized for GNU/Linux + my $testpid = $pid; + my $shellpath; + my $shellline; + while($testpid) { + if(open(my $fd, "<", "/proc/$testpid/cmdline")) { + local $/="\0"; + chomp($shellline = <$fd>); + if($shellline =~ /busybox$|rosetta$/) { + # Possibly: busybox \0 sh or .../rosetta \0 /bin/bash + # Skip busybox/rosetta + chomp($shellline = <$fd>); + } + if($shellline =~ /$regexp/o) { + my $shellname = $4 || $8; + my $dash = $3 || $7; + if($shellname eq "sh" and $dash) { + # -sh => csh or sh + if($shellpath = readlink "/proc/$testpid/exe") { + ::debug("init","procpath $shellpath\n"); + if($shellpath =~ m:/$shell$:o) { + ::debug("init", + "proc which ".$shellpath." => "); + return $shellpath; + } + } + } + ::debug("init", "which ".$shellname." => "); + $shellpath = (which($shellname, + @{$fakename{$shellname}}))[0]; + ::debug("init", "shell path $shellpath\n"); + return $shellpath; + } + } + # Get parent pid + if(open(my $fd, "<", "/proc/$testpid/stat")) { + my $line = <$fd>; + close $fd; + # Parent pid is field 4 + $testpid = (split /\s+/, $line)[3]; + } else { + # Something is wrong: fall back to old method + last; + } + } + } + # if -sh or -csh try readlink /proc/$$/exe + my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table(); + my $shellpath; + my $testpid = $pid; + while($testpid) { + if($name_of_ref->{$testpid} =~ /$regexp/o) { + my $shellname = $4 || $8; + my $dash = $3 || $7; + if($shellname eq "sh" and $dash) { + # -sh => csh or sh + if($shellpath = readlink "/proc/$testpid/exe") { + ::debug("init","procpath $shellpath\n"); + if($shellpath =~ m:/$shell$:o) { + ::debug("init", "proc which ".$shellpath." => "); + return $shellpath; + } + } + } + ::debug("init", "which ".$shellname." => "); + $shellpath = (which($shellname,@{$fakename{$shellname}}))[0]; + ::debug("init", "shell path $shellpath\n"); + $shellpath and last; + } + if($testpid == $parent_of_ref->{$testpid}) { + # In Solaris zones, the PPID of the zsched process is itself + last; + } + $testpid = $parent_of_ref->{$testpid}; + } + return $shellpath; + } +} + +{ + my %pid_parentpid_cmd; + + sub pid_table() { + # Returns: + # %children_of = { pid -> children of pid } + # %parent_of = { pid -> pid of parent } + # %name_of = { pid -> commandname } + + if(not %pid_parentpid_cmd) { + # Filter for SysV-style `ps` + my $sysv = q( ps -ef |). + q(perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). + q(s/^.{$s}//; print "@F[1,2] $_"' ); + # Minix uses cols 2,3 and can have newlines in the command + # so lines not having numbers in cols 2,3 must be ignored + my $minix = q( ps -ef |). + q(perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). + q(s/^.{$s}// and $F[2]>0 and $F[3]>0 and print "@F[2,3] $_"' ); + # BSD-style `ps` + my $bsd = q(ps -o pid,ppid,command -ax); + %pid_parentpid_cmd = + ( + 'aix' => $sysv, + 'android' => $sysv, + 'cygwin' => $sysv, + 'darwin' => $bsd, + 'dec_osf' => $sysv, + 'dragonfly' => $bsd, + 'freebsd' => $bsd, + 'gnu' => $sysv, + 'hpux' => $sysv, + 'linux' => $sysv, + 'mirbsd' => $bsd, + 'minix' => $minix, + 'msys' => $sysv, + 'MSWin32' => $sysv, + 'netbsd' => $bsd, + 'nto' => $sysv, + 'openbsd' => $bsd, + 'solaris' => $sysv, + 'svr5' => $sysv, + 'syllable' => "echo ps not supported", + ); + } + $pid_parentpid_cmd{$^O} or + ::die_bug("pid_parentpid_cmd for $^O missing"); + + my (@pidtable,%parent_of,%children_of,%name_of); + # Table with pid -> children of pid + @pidtable = `$pid_parentpid_cmd{$^O}`; + my $p=$$; + for (@pidtable) { + # must match: 24436 21224 busybox ash + # must match: 24436 21224 <> + # must match: 24436 21224 <> + # or: perl -e 'while($0=" "){}' + if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/ + or + /^\s*(\S+)\s+(\S+)\s+()$/) { + $parent_of{$1} = $2; + push @{$children_of{$2}}, $1; + $name_of{$1} = $3; + } else { + ::die_bug("pidtable format: $_"); + } + } + return(\%children_of, \%parent_of, \%name_of); + } +} + +sub now() { + # Returns time since epoch as in seconds with 3 decimals + # Uses: + # @Global::use + # Returns: + # $time = time now with millisecond accuracy + if(not $Global::use{"Time::HiRes"}) { + if(eval "use Time::HiRes qw ( time );") { + eval "sub TimeHiRestime { return Time::HiRes::time };"; + } else { + eval "sub TimeHiRestime { return time() };"; + } + $Global::use{"Time::HiRes"} = 1; + } + + return (int(TimeHiRestime()*1000))/1000; +} + +sub usleep($) { + # Sleep this many milliseconds. + # Input: + # $ms = milliseconds to sleep + my $ms = shift; + ::debug("timing",int($ms),"ms "); + select(undef, undef, undef, $ms/1000); +} + +sub make_regexp_ungreedy { + my $regexp = shift; + my $class_state = 0; + my $escape_state = 0; + my $found = 0; + my $ungreedy = ""; + my $c; + + for $c (split (//, $regexp)) { + if ($found) { + if($c ne "?") { $ungreedy .= "?"; } + $found = 0; + } + $ungreedy .= $c; + + if ($escape_state) { $escape_state = 0; next; } + if ($c eq "\\") { $escape_state = 1; next; } + if ($c eq '[') { $class_state = 1; next; } + if ($class_state) { + if($c eq ']') { $class_state = 0; } + next; + } + # Quantifiers: + * {...} + if ($c =~ /[*}+]/) { $found = 1; } + } + if($found) { $ungreedy .= '?'; } + return $ungreedy; +} + + +sub __KILLER_REAPER__() {} + +sub reap_usleep() { + # Reap dead children. + # If no dead children: Sleep specified amount with exponential backoff + # Input: + # $ms = milliseconds to sleep + # Returns: + # $ms/2+0.001 if children reaped + # $ms*1.1 if no children reaped + my $ms = shift; + if(reapers()) { + if(not $Global::total_completed % 100) { + if($opt::timeout) { + # Force cleaning the timeout queue for every 100 jobs + # Fixes potential memleak + $Global::timeoutq->process_timeouts(); + } + } + # Sleep exponentially shorter (1/2^n) if a job finished + return $ms/2+0.001; + } else { + if($opt::timeout) { + $Global::timeoutq->process_timeouts(); + } + if($opt::memfree) { + kill_youngster_if_not_enough_mem($opt::memfree*0.5); + } + if($opt::memsuspend) { + suspend_young_if_not_enough_mem($opt::memsuspend); + } + if($opt::limit) { + kill_youngest_if_over_limit(); + } + exit_if_disk_full(); + if($Global::linebuffer) { + my $something_printed = 0; + if($opt::keeporder and not $opt::latestline) { + for my $job (values %Global::running) { + $something_printed += $job->print_earlier_jobs(); + } + } else { + for my $job (values %Global::running) { + $something_printed += $job->print(); + } + } + if($something_printed) { $ms = $ms/2+0.001; } + } + if($ms > 0.002) { + # When a child dies, wake up from sleep (or select(,,,)) + $SIG{CHLD} = sub { kill "ALRM", $$ }; + if($opt::delay and not $Global::linebuffer) { + # The 0.004s is approximately the time it takes for one round + my $next_earliest_start = + $Global::newest_starttime + $opt::delay - 0.004; + my $remaining_ms = 1000 * ($next_earliest_start - ::now()); + # The next job can only start at $next_earliest_start + # so sleep until then (but sleep at least $ms) + usleep(::max($ms,$remaining_ms)); + } else { + usleep($ms); + } + # --compress needs $SIG{CHLD} unset + $SIG{CHLD} = 'DEFAULT'; + } + # Sleep exponentially longer (1.1^n) if a job did not finish, + # though at most 1000 ms. + return (($ms < 1000) ? ($ms * 1.1) : ($ms)); + } +} + +sub kill_youngest_if_over_limit() { + # Check each $sshlogin we are over limit + # If over limit: kill off the youngest child + # Put the child back in the queue. + # Uses: + # %Global::running + my %jobs_of; + my @sshlogins; + + for my $job (values %Global::running) { + if(not $jobs_of{$job->sshlogin()}) { + push @sshlogins, $job->sshlogin(); + } + push @{$jobs_of{$job->sshlogin()}}, $job; + } + for my $sshlogin (@sshlogins) { + for my $job (sort { $b->seq() <=> $a->seq() } + @{$jobs_of{$sshlogin}}) { + if($sshlogin->limit() == 2) { + $job->kill(); + last; + } + } + } +} + +sub suspend_young_if_not_enough_mem() { + # Check each $sshlogin if there is enough mem. + # If less than $limit free mem: suspend some of the young children + # Else: Resume all jobs + # Uses: + # %Global::running + my $limit = shift; + my %jobs_of; + my @sshlogins; + + for my $job (values %Global::running) { + if(not $jobs_of{$job->sshlogin()}) { + push @sshlogins, $job->sshlogin(); + } + push @{$jobs_of{$job->sshlogin()}}, $job; + } + for my $sshlogin (@sshlogins) { + my $free = $sshlogin->memfree(); + if($free < 2*$limit) { + # Suspend all jobs (resume some of them later) + map { $_->suspended() or $_->suspend(); } @{$jobs_of{$sshlogin}}; + my @jobs = (sort { $b->seq() <=> $a->seq() } + @{$jobs_of{$sshlogin}}); + # how many should be running? + # limit*1 => 1; + # limit*1.5 => 2; + # limit*1.75 => 4; + # free < limit*(2-1/2^n); + # => + # 1/(2-free/limit) < 2^n; + my $run = int(1/(2-$free/$limit)); + $run = ::min($run,$#jobs); + # Resume the oldest running + for my $job ((sort { $a->seq() <=> $b->seq() } @jobs)[0..$run]) { + ::debug("mem","\nResume ",$run+1, " jobs. Seq ", + $job->seq(), " resumed ", + $sshlogin->memfree()," < ",2*$limit); + $job->resume(); + } + } else { + for my $job (@{$jobs_of{$sshlogin}}) { + if($job->suspended()) { + $job->resume(); + ::debug("mem","\nResume ",$#{$jobs_of{$sshlogin}}+1, + " jobs. Seq ", $job->seq(), " resumed ", + $sshlogin->memfree()," > ",2*$limit); + last; + } + } + } + } +} + +sub kill_youngster_if_not_enough_mem() { + # Check each $sshlogin if there is enough mem. + # If less than 50% enough free mem: kill off the youngest child + # Put the child back in the queue. + # Uses: + # %Global::running + my $limit = shift; + my %jobs_of; + my @sshlogins; + + for my $job (values %Global::running) { + if(not $jobs_of{$job->sshlogin()}) { + push @sshlogins, $job->sshlogin(); + } + push @{$jobs_of{$job->sshlogin()}}, $job; + } + for my $sshlogin (@sshlogins) { + for my $job (sort { $b->seq() <=> $a->seq() } + @{$jobs_of{$sshlogin}}) { + if($sshlogin->memfree() < $limit) { + ::debug("mem","\n",map { $_->seq()." " } + (sort { $b->seq() <=> $a->seq() } + @{$jobs_of{$sshlogin}})); + ::debug("mem","\n", $job->seq(), "killed ", + $sshlogin->memfree()," < ",$limit); + $job->kill(); + $job->set_killreason("mem"); + $sshlogin->memfree_recompute(); + } else { + last; + } + } + ::debug("mem","Free mem OK? ", + $sshlogin->memfree()," > ",$limit); + } +} + + +sub __DEBUGGING__() {} + + +sub debug(@) { + # Uses: + # $Global::debug + # %Global::fh + # Returns: N/A + $Global::debug or return; + @_ = grep { defined $_ ? $_ : "" } @_; + if($Global::debug eq "all" or $Global::debug eq $_[0]) { + if($Global::fh{2}) { + # Original stderr was saved + my $stderr = $Global::fh{2}; + print $stderr @_[1..$#_]; + } else { + print STDERR @_[1..$#_]; + } + } +} + +sub my_memory_usage() { + # Returns: + # memory usage if found + # 0 otherwise + use strict; + use FileHandle; + + local $/ = "\n"; + my $pid = $$; + if(-e "/proc/$pid/stat") { + my $fh = FileHandle->new("; + chomp $data; + $fh->close; + + my @procinfo = split(/\s+/,$data); + + return undef_as_zero($procinfo[22]); + } else { + return 0; + } +} + +sub my_size() { + # Returns: + # $size = size of object if Devel::Size is installed + # -1 otherwise + my @size_this = (@_); + eval "use Devel::Size qw(size total_size)"; + if ($@) { + return -1; + } else { + return total_size(@_); + } +} + +sub my_dump(@) { + # Returns: + # ascii expression of object if Data::Dump(er) is installed + # error code otherwise + my @dump_this = (@_); + eval "use Data::Dump qw(dump);"; + if ($@) { + # Data::Dump not installed + eval "use Data::Dumper;"; + if ($@) { + my $err = "Neither Data::Dump nor Data::Dumper is installed\n". + "Not dumping output\n"; + ::status($err); + return $err; + } else { + return Dumper(@dump_this); + } + } else { + # Create a dummy Data::Dump:dump as Hans Schou sometimes has + # it undefined + eval "sub Data::Dump:dump {}"; + eval "use Data::Dump qw(dump);"; + return (Data::Dump::dump(@dump_this)); + } +} + +sub my_croak(@) { + eval "use Carp; 1"; + $Carp::Verbose = 1; + croak(@_); +} + +sub my_carp() { + eval "use Carp; 1"; + $Carp::Verbose = 1; + carp(@_); +} + + +sub __OBJECT_ORIENTED_PARTS__() {} + + +package SSHLogin; + +sub new($$) { + my $class = shift; + my $s = shift; + my $origs = $s; + my %hostgroups; + my $ncpus; + my $sshcommand; + my $user; + my $password; + my $host; + my $port; + my $local; + my $string; + # SSHLogins can have these formats: + # @grp1+grp2/ => use only servers in @grp1+grp2 + # [@grp+grp/][ncpu/][ssh command ][[user][:password]@][host[:port]] + # + # Here most combinations are generated: + # grp=grp1+grp2 + # ncpu=4 + # ssh=/usr/bin/ssh + # user=user + # pass=pass + # host=host + # port=port + # parallel -k echo {1}{2}{3}{4}{5}{=1'$_ = ($arg[4]||$arg[5]) ? "\@" : ""' =}$host{6} ::: '' @$grp/ ::: '' $ncpu/ ::: '' $ssh' ' ::: '' $user ::: '' :$pass ::: '' :$port + # host + # host:port + # :pass@host + # :pass@host:port + # user@host + # user@host:port + # user:pass@host + # user:pass@host:port + # /usr/bin/ssh host + # /usr/bin/ssh host:port + # /usr/bin/ssh :pass@host + # /usr/bin/ssh :pass@host:port + # /usr/bin/ssh user@host + # /usr/bin/ssh user@host:port + # /usr/bin/ssh user:pass@host + # /usr/bin/ssh user:pass@host:port + # ncpu/host + # ncpu/host:port + # ncpu/:pass@host + # ncpu/:pass@host:port + # ncpu/user@host + # ncpu/user@host:port + # ncpu/user:pass@host + # ncpu/user:pass@host:port + # ncpu//usr/bin/ssh host + # ncpu//usr/bin/ssh host:port + # ncpu//usr/bin/ssh :pass@host + # ncpu//usr/bin/ssh :pass@host:port + # ncpu//usr/bin/ssh user@host + # ncpu//usr/bin/ssh user@host:port + # ncpu//usr/bin/ssh user:pass@host + # ncpu//usr/bin/ssh user:pass@host:port + # @grp1+grp2/host + # @grp1+grp2/host:port + # @grp1+grp2/:pass@host + # @grp1+grp2/:pass@host:port + # @grp1+grp2/user@host + # @grp1+grp2/user@host:port + # @grp1+grp2/user:pass@host + # @grp1+grp2/user:pass@host:port + # @grp1+grp2//usr/bin/ssh host + # @grp1+grp2//usr/bin/ssh host:port + # @grp1+grp2//usr/bin/ssh :pass@host + # @grp1+grp2//usr/bin/ssh :pass@host:port + # @grp1+grp2//usr/bin/ssh user@host + # @grp1+grp2//usr/bin/ssh user@host:port + # @grp1+grp2//usr/bin/ssh user:pass@host + # @grp1+grp2//usr/bin/ssh user:pass@host:port + # @grp1+grp2/ncpu/host + # @grp1+grp2/ncpu/host:port + # @grp1+grp2/ncpu/:pass@host + # @grp1+grp2/ncpu/:pass@host:port + # @grp1+grp2/ncpu/user@host + # @grp1+grp2/ncpu/user@host:port + # @grp1+grp2/ncpu/user:pass@host + # @grp1+grp2/ncpu/user:pass@host:port + # @grp1+grp2/ncpu//usr/bin/ssh host + # @grp1+grp2/ncpu//usr/bin/ssh host:port + # @grp1+grp2/ncpu//usr/bin/ssh :pass@host + # @grp1+grp2/ncpu//usr/bin/ssh :pass@host:port + # @grp1+grp2/ncpu//usr/bin/ssh user@host + # @grp1+grp2/ncpu//usr/bin/ssh user@host:port + # @grp1+grp2/ncpu//usr/bin/ssh user:pass@host + # @grp1+grp2/ncpu//usr/bin/ssh user:pass@host:port + + # [@grp+grp/][ncpu/][ssh command ][[user][:password]@][host[:port]] + if($s =~ s:^\@([^/]+)/::) { + # Look for SSHLogin hostgroups + %hostgroups = map { $_ => 1 } split(/\+|,/, $1); + } + + # [ncpu/][ssh command ][[user][:password]@][host[:port]] + if ($s =~ s:^(\d+)/::) { $ncpus = $1; } + + # Why disallow space in password? + # Example: + # C:/bin/ssh user:C:/bin/ssh@host + # Should this parse as: + # user 'C' with password '/bin/ssh user:C:/bin/ssh' + # or + # cmd 'C:/bin/ssh' user 'user' with password 'C:/bin/ssh' + # This is impossible to determine. + # With space forbidden in password it uniquely parses as the 2nd. + # [ssh command ][[user][:password]@][host[:port]] + if($s =~ s/^(.*) //) { $sshcommand = $1; } + + # [[user][:password]@][host[:port]] + # An SSHLogin is always in the hostgroup of its "user:pass@host:port" + $hostgroups{$s} = 1; + if($s =~ s/^(.*)@//) { + my $userpw = $1; + # user[:pass] + if($userpw =~ s/:(.*)//) { + $password = $1; + if($password eq "") { $password = $ENV{'SSHPASS'} } + if(not ::which("sshpass")) { + ::error("--sshlogin with password requires sshpass installed"); + ::wait_and_exit(255); + } + } + # This might be empty - that is OK + $user = $userpw; + } + # [host[:port]] + # host can have these formats: + # hostname (: as local is dealt with later) + # IPv4 i.p.n.o + # with port + # * :22 + # * :ssh + # IPv6: + # * \[b11010000011101] + # * \[o64072/14] + # * \[xd074/14] + # * \[208.116.0.0/14] + # with port + # * .22 + # * p22 + # * #22 + if(not $s =~ /:.*:/ + and + $s =~ s/^([-a-z0-9._]+)//i) { + # Not IPv6 (IPv6 has 2 or more ':') + $host = $1; + } elsif($s =~ s/^(\\[\[\]box0-9a-f.]+)//i) { + # RFC2673 allows for: + # \[b11010000011101] \[o64072/14] \[xd074/14] \[208.116.0.0/14] + $host = $1; + } elsif($s =~ s/^\[([0-9a-f:]+)\]//i + or + $s =~ s/^([0-9a-f:]+)//i) { + # RFC5952 + # [2001:db8::1]:80 + # 2001:db8::1.80 + # 2001:db8::1p80 + # 2001:db8::1#80 + # 2001:db8::1:80 - not supported + # 2001:db8::1 port 80 - not supported + $host = $1; + } + # [:port] + if($s =~ s/^:(\w+)//i) { + $port = $1; + } elsif($s =~ s/^[p\.\#](\w+)//i) { + # RFC5952 + # 2001:db8::1.80 + # 2001:db8::1p80 + # 2001:db8::1#80 + $port = $1; + } + + if($s and $s ne ':') { + ::die_bug("SSHLogin parser failed on '$origs' => '$s'"); + } + + $string = + # Only include the sshcommand in $string if it is set by user + ($sshcommand && $sshcommand." "). + ($user && $user."@"). + ($host && $host). + ($port && ":$port"); + my $userpassword = ($user && $user).($password && ":".$password); + my $pwstring = + # Only include the sshcommand in $string if it is set by user + ($sshcommand && $sshcommand." "). + ($userpassword && $userpassword."@"). + ($host && $host). + ($port && ":$port"); + if($host eq ':') { + $local = 1; + $string = ":"; + } else { + $sshcommand ||= $opt::ssh || $ENV{'PARALLEL_SSH'} || "ssh"; + } + # An SSHLogin is always in the hostgroup of its $string-name + $hostgroups{$string} = 1; + @Global::hostgroups{keys %hostgroups} = values %hostgroups; + # Used for file names for loadavg + my $no_slash_string = $string; + $no_slash_string =~ s/[^-a-z0-9:]/_/gi; + return bless { + 'string' => $string, + 'pwstring' => $pwstring, + 'jobs_running' => 0, + 'jobs_completed' => 0, + 'maxlength' => undef, + 'max_jobs_running' => undef, + 'orig_max_jobs_running' => undef, + 'ncpus' => $ncpus, + 'sshcommand' => $sshcommand, + 'user' => $user, + 'password' => $password, + 'host' => $host, + 'port' => $port, + 'hostgroups' => \%hostgroups, + 'local' => $local, + 'control_path_dir' => undef, + 'control_path' => undef, + 'time_to_login' => undef, + 'last_login_at' => undef, + 'loadavg_file' => $Global::cache_dir . "/tmp/sshlogin/" . + $no_slash_string . "/loadavg", + 'loadavg' => undef, + 'last_loadavg_update' => 0, + 'swap_activity_file' => $Global::cache_dir . "/tmp/sshlogin/" . + $no_slash_string . "/swap_activity", + 'swap_activity' => undef, + }, ref($class) || $class; +} + +sub DESTROY($) { + my $self = shift; + # Remove temporary files if they are created. + ::rm($self->{'loadavg_file'}); + ::rm($self->{'swap_activity_file'}); +} + +sub string($) { + my $self = shift; + return $self->{'string'}; +} + +sub pwstring($) { + my $self = shift; + return $self->{'pwstring'}; +} + +sub host($) { + my $self = shift; + return $self->{'host'}; +} + +sub sshcmd($) { + # Give the ssh command without hostname + # Returns: + # "sshpass -e ssh -p port -l user" + my $self = shift; + my @local; + # [sshpass -e] ssh -p port -l user + if($self->{'password'}) { push @local, "sshpass -e"; } + # [ssh] -p port -l user + # TODO sshpass + space + push @local, $self->{'sshcommand'}; + # [-p port] -l user + if($self->{'port'}) { push @local, '-p',$self->{'port'}; } + # [-l user] + if($self->{'user'}) { push @local, '-l',$self->{'user'}; } + if($opt::controlmaster) { + # Use control_path to make ssh faster + my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p"; + + if(not $self->{'control_path'}{$control_path}++) { + # Master is not running for this control_path + # Start it + my $pid = fork(); + if($pid) { + $Global::sshmaster{$pid} ||= 1; + } else { + push @local, "-S", $control_path; + $SIG{'TERM'} = undef; + # Run a sleep that outputs data, so it will discover + # if the ssh connection closes. + my $sleep = ::Q('$|=1;while(1){sleep 1;print "foo\n"}'); + # Ignore the 'foo' being printed + open(STDOUT,">","/dev/null"); + # STDERR >/dev/null to ignore + open(STDERR,">","/dev/null"); + open(STDIN,"<","/dev/null"); + exec(@local, "-MT", $self->{'host'}, "--", + "perl", "-e", $sleep); + } + } + push @local, "-S", ::Q($control_path); + } + return "@local"; +} + +sub wrap($@) { + # Input: + # @cmd = shell command to run on remote + # Returns: + # $sshwrapped = ssh remote @cmd + my $self = shift; + my @remote = @_; + return(join " ", + $self->sshcmd(), $self->{'host'}, "--", "exec", @remote); +} + +sub hexwrap($@) { + # Input: + # @cmd = perl expresion to eval + # Returns: + # $hexencoded = perl command that decodes hex and evals @cmd + my $self = shift; + my $cmd = join("",@_); + + # "#" is needed because Perl on MacOS X adds NULs + # when running pack q/H10000000/ + my $hex = unpack "H*", $cmd."#"; + # csh does not deal well with > 1000 chars in one word + # Insert space every 1000 char + $hex =~ s/\G.{1000}\K/ /sg; + # Explanation: + # Write this without special chars: eval pack 'H*', join '',@ARGV + # GNU_Parallel_worker = String so people can see this is from GNU Parallel + # eval+ = way to write 'eval ' without space (gives warning) + # pack+ = way to write 'pack ' without space + # q/H10000000/, = almost the same as "H*" but does not use * + # join+q//, = join '', + return('perl -X -e '. + 'GNU_Parallel_worker,eval+pack+q/H10000000/,join+q//,@ARGV '. + $hex); +} + +sub jobs_running($) { + my $self = shift; + return ($self->{'jobs_running'} || "0"); +} + +sub inc_jobs_running($) { + my $self = shift; + $self->{'jobs_running'}++; +} + +sub dec_jobs_running($) { + my $self = shift; + $self->{'jobs_running'}--; +} + +sub set_maxlength($$) { + my $self = shift; + $self->{'maxlength'} = shift; +} + +sub maxlength($) { + my $self = shift; + return $self->{'maxlength'}; +} + +sub jobs_completed() { + my $self = shift; + return $self->{'jobs_completed'}; +} + +sub in_hostgroups() { + # Input: + # @hostgroups = the hostgroups to look for + # Returns: + # true if intersection of @hostgroups and the hostgroups of this + # SSHLogin is non-empty + my $self = shift; + return grep { defined $self->{'hostgroups'}{$_} } @_; +} + +sub hostgroups() { + my $self = shift; + return keys %{$self->{'hostgroups'}}; +} + +sub inc_jobs_completed($) { + my $self = shift; + $self->{'jobs_completed'}++; + $Global::total_completed++; +} + +sub set_max_jobs_running($$) { + my $self = shift; + if(defined $self->{'max_jobs_running'}) { + $Global::max_jobs_running -= $self->{'max_jobs_running'}; + } + $self->{'max_jobs_running'} = shift; + + if(defined $self->{'max_jobs_running'}) { + # max_jobs_running could be resat if -j is a changed file + $Global::max_jobs_running += $self->{'max_jobs_running'}; + } + # Initialize orig to the first non-zero value that comes around + $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'}; +} + +sub memfree() { + # Returns: + # $memfree in bytes + my $self = shift; + $self->memfree_recompute(); + # Return 1 if not defined. + return (not defined $self->{'memfree'} or $self->{'memfree'}) +} + +sub memfree_recompute() { + my $self = shift; + my $script = memfreescript(); + + # TODO add sshlogin and backgrounding + # Run the script twice if it gives 0 (typically intermittent error) + $self->{'memfree'} = ::qqx($script) || ::qqx($script); + if(not $self->{'memfree'}) { + ::die_bug("Less than 1 byte memory free"); + } + #::debug("mem","New free:",$self->{'memfree'}," "); +} + +{ + my $script; + + sub memfreescript() { + # Returns: + # shellscript for giving available memory in bytes + if(not $script) { + my %script_of = ( + # /proc/meminfo + # MemFree: 7012 kB + # Buffers: 19876 kB + # Cached: 431192 kB + # SwapCached: 0 kB + "linux" => ( + q{ + print 1024 * qx{ + awk '/^((Swap)?Cached|MemFree|Buffers):/ + { sum += \$2} END { print sum }' + /proc/meminfo } + }), + # Android uses same code as GNU/Linux + "android" => ( + q{ + print 1024 * qx{ + awk '/^((Swap)?Cached|MemFree|Buffers):/ + { sum += \$2} END { print sum }' + /proc/meminfo } + }), + # $ vmstat 1 1 + # procs memory page faults cpu + # r b w avm free re at pi po fr de sr in sy cs us sy id + # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99 + "hpux" => ( + q{ + print (((reverse `vmstat 1 1`)[0] + =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) + }), + # $ vmstat 1 2 + # kthr memory page disk faults cpu + # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id + # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97 + # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92 + # + # The second free value is correct + "solaris" => ( + q{ + print (((reverse `vmstat 1 2`)[0] + =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) + }), + # hw.pagesize: 4096 + # vm.stats.vm.v_cache_count: 0 + # vm.stats.vm.v_inactive_count: 79574 + # vm.stats.vm.v_free_count: 4507 + "freebsd" => ( + q{ + for(qx{/sbin/sysctl -a}) { + if (/^([^:]+):\s+(.+)\s*$/s) { + $sysctl->{$1} = $2; + } + } + print $sysctl->{"hw.pagesize"} * + ($sysctl->{"vm.stats.vm.v_cache_count"} + + $sysctl->{"vm.stats.vm.v_inactive_count"} + + $sysctl->{"vm.stats.vm.v_free_count"}); + }), + # Mach Virtual Memory Statistics: (page size of 4096 bytes) + # Pages free: 198061. + # Pages active: 159701. + # Pages inactive: 47378. + # Pages speculative: 29707. + # Pages wired down: 89231. + # "Translation faults": 928901425. + # Pages copy-on-write: 156988239. + # Pages zero filled: 271267894. + # Pages reactivated: 48895. + # Pageins: 1798068. + # Pageouts: 257. + # Object cache: 6603 hits of 1713223 lookups (0% hit rate) + 'darwin' => ( + q{ + $vm = `vm_stat`; + print (($vm =~ /page size of (\d+)/)[0] * + (($vm =~ /Pages free:\s+(\d+)/)[0] + + ($vm =~ /Pages inactive:\s+(\d+)/)[0])); + }), + ); + my $perlscript = ""; + # Make a perl script that detects the OS ($^O) and runs + # the appropriate command + for my $os (keys %script_of) { + $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}'; + } + $script = "perl -e " . ::Q(::spacefree(1,$perlscript)); + } + return $script; + } +} + +sub limit($) { + # Returns: + # 0 = Below limit. Start another job. + # 1 = Over limit. Start no jobs. + # 2 = Kill youngest job + my $self = shift; + + if(not defined $self->{'limitscript'}) { + my %limitscripts = + ("io" => q! + io() { + limit=$1; + io_file=$2; + # Do the measurement in the background + ((tmp=$(mktemp || tempfile); + LANG=C iostat -x 1 2 > $tmp; + mv $tmp $io_file) /dev/null & ); + perl -e '-e $ARGV[0] or exit(1); + for(reverse <>) { + /Device/ and last; + /(\S+)$/ and $max = $max > $1 ? $max : $1; } + exit ('$limit' < $max)' $io_file; + }; + io %s %s + !, + "mem" => q! + mem() { + limit=$1; + awk '/^((Swap)?Cached|MemFree|Buffers):/{ sum += $2} + END { + if (sum*1024 < '$limit'/2) { exit 2; } + else { exit (sum*1024 < '$limit') } + }' /proc/meminfo; + }; + mem %s; + !, + "load" => q! + load() { + limit=$1; + ps ax -o state,command | + grep -E '^[DOR].[^[]' | + wc -l | + perl -ne 'exit ('$limit' < $_)'; + }; + load %s + !, + ); + my ($cmd,@args) = split /\s+/,$opt::limit; + if($limitscripts{$cmd}) { + my $tmpfile = ::tmpname("parlmt"); + ++$Global::unlink{$tmpfile}; + $self->{'limitscript'} = + ::spacefree(1, sprintf($limitscripts{$cmd}, + ::multiply_binary_prefix(@args),$tmpfile)); + } else { + $self->{'limitscript'} = $opt::limit; + } + } + + my %env = %ENV; + local %ENV = %env; + $ENV{'SSHLOGIN'} = $self->string(); + system($Global::shell,"-c",$self->{'limitscript'}); + ::debug("limit","limit `".$self->{'limitscript'}."` result ".($?>>8)."\n"); + return $?>>8; +} + + +sub swapping($) { + my $self = shift; + my $swapping = $self->swap_activity(); + return (not defined $swapping or $swapping) +} + +sub swap_activity($) { + # If the currently known swap activity is too old: + # Recompute a new one in the background + # Returns: + # last swap activity computed + my $self = shift; + # Should we update the swap_activity file? + my $update_swap_activity_file = 0; + # Test with (on 64 core machine): + # seq 100 | parallel --lb -j100 'seq 1000 | parallel --noswap -j 1 true' + if(open(my $swap_fh, "<", $self->{'swap_activity_file'})) { + my $swap_out = <$swap_fh>; + close $swap_fh; + if($swap_out =~ /^(\d+)$/) { + $self->{'swap_activity'} = $1; + ::debug("swap", "New swap_activity: ", $self->{'swap_activity'}); + } + ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'}); + if(time - $self->{'last_swap_activity_update'} > 10) { + # last swap activity update was started 10 seconds ago + ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'}); + $update_swap_activity_file = 1; + } + } else { + ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'}); + $self->{'swap_activity'} = undef; + $update_swap_activity_file = 1; + } + if($update_swap_activity_file) { + ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'}); + $self->{'last_swap_activity_update'} = time; + my $dir = ::dirname($self->{'swap_activity_file'}); + -d $dir or eval { File::Path::mkpath($dir); }; + my $swap_activity; + $swap_activity = swapactivityscript(); + if(not $self->local()) { + $swap_activity = $self->wrap($swap_activity); + } + # Run swap_activity measuring. + # As the command can take long to run if run remote + # save it to a tmp file before moving it to the correct file + my $file = $self->{'swap_activity_file'}; + my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp"); + ::debug("swap", "\n", $swap_activity, "\n"); + my $qtmp = ::Q($tmpfile); + my $qfile = ::Q($file); + ::qqx("($swap_activity > $qtmp && mv $qtmp $qfile || rm $qtmp &)"); + } + return $self->{'swap_activity'}; +} + +{ + my $script; + + sub swapactivityscript() { + # Returns: + # shellscript for detecting swap activity + # + # arguments for vmstat are OS dependant + # swap_in and swap_out are in different columns depending on OS + # + if(not $script) { + my %vmstat = ( + # linux: $7*$8 + # $ vmstat 1 2 + # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu---- + # r b swpd free buff cache si so bi bo in cs us sy id wa + # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1 + # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0 + 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'], + + # solaris: $6*$7 + # $ vmstat -S 1 2 + # kthr memory page disk faults cpu + # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id + # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97 + # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98 + 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'], + + # darwin (macosx): $21*$22 + # $ vm_stat -c 2 1 + # Mach Virtual Memory Statistics: (page size of 4096 bytes) + # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts + # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0 + # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0 + 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'], + + # ultrix: $12*$13 + # $ vmstat -S 1 2 + # procs faults cpu memory page disk + # r b w in sy cs us sy id avm fre si so pi po fr de sr s0 + # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0 + # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0 + 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'], + + # aix: $6*$7 + # $ vmstat 1 2 + # System configuration: lcpu=1 mem=2048MB + # + # kthr memory page faults cpu + # ----- ----------- ------------------------ ------------ ----------- + # r b avm fre re pi po fr sr cy in sy cs us sy id wa + # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0 + # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5 + 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'], + + # freebsd: $8*$9 + # $ vmstat -H 1 2 + # procs memory page disks faults cpu + # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id + # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99 + # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99 + 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'], + + # mirbsd: $8*$9 + # $ vmstat 1 2 + # procs memory page disks traps cpu + # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id + # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96 + # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100 + 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # netbsd: $7*$8 + # $ vmstat 1 2 + # procs memory page disks faults cpu + # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id + # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100 + # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100 + 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'], + + # openbsd: $8*$9 + # $ vmstat 1 2 + # procs memory page disks traps cpu + # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id + # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99 + # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99 + 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # hpux: $8*$9 + # $ vmstat 1 2 + # procs memory page faults cpu + # r b w avm free re at pi po fr de sr in sy cs us sy id + # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83 + # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105 + 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # dec_osf (tru64): $11*$12 + # $ vmstat 1 2 + # Virtual Memory Statistics: (pagesize = 8192) + # procs memory pages intr cpu + # r w u act free wire fault cow zero react pin pout in sy cs us sy id + # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94 + # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98 + 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'], + + # gnu (hurd): $7*$8 + # $ vmstat -k 1 2 + # (pagesize: 4, size: 512288, swap size: 894972) + # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree + # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972 + # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972 + 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'], + + # -nto (qnx has no swap) + #-irix + #-svr5 (scosysv) + ); + my $perlscript = ""; + # Make a perl script that detects the OS ($^O) and runs + # the appropriate vmstat command + for my $os (keys %vmstat) { + $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$ + $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' . + $vmstat{$os}[1] . '}"` }'; + } + $script = "perl -e " . ::Q($perlscript); + } + return $script; + } +} + +sub too_fast_remote_login($) { + my $self = shift; + if($self->{'last_login_at'} and $self->{'time_to_login'}) { + # sshd normally allows 10 simultaneous logins + # A login takes time_to_login + # So time_to_login/5 should be safe + # If now <= last_login + time_to_login/5: Then it is too soon. + my $too_fast = (::now() <= $self->{'last_login_at'} + + $self->{'time_to_login'}/5); + ::debug("run", "Too fast? $too_fast "); + return $too_fast; + } else { + # No logins so far (or time_to_login not computed): it is not too fast + return 0; + } +} + +sub last_login_at($) { + my $self = shift; + return $self->{'last_login_at'}; +} + +sub set_last_login_at($$) { + my $self = shift; + $self->{'last_login_at'} = shift; +} + +sub loadavg_too_high($) { + my $self = shift; + my $loadavg = $self->loadavg(); + if(defined $loadavg) { + ::debug("load", "Load $loadavg > ",$self->max_loadavg()); + return $loadavg >= $self->max_loadavg(); + } else { + # Unknown load: Assume load is too high + return 1; + } +} + + + +sub loadavg($) { + # If the currently know loadavg is too old: + # Recompute a new one in the background + # The load average is computed as the number of processes waiting + # for disk or CPU right now. So it is the server load this instant + # and not averaged over several minutes. This is needed so GNU + # Parallel will at most start one job that will push the load over + # the limit. + # + # Returns: + # $last_loadavg = last load average computed (undef if none) + + my $self = shift; + sub loadavg_cmd() { + if(not $Global::loadavg_cmd) { + # aix => "ps -ae -o state,command" # state wrong + # bsd => "ps ax -o state,command" + # sysv => "ps -ef -o s -o comm" + # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \ + # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status | + # awk '{print $2,$1}' + # dec_osf => bsd + # dragonfly => bsd + # freebsd => bsd + # gnu => bsd + # hpux => ps -el|awk '{print $2,$14,$15}' + # irix => ps -ef -o state -o comm + # linux => bsd + # minix => ps el|awk '{print \$1,\$11}' + # mirbsd => bsd + # netbsd => bsd + # openbsd => bsd + # solaris => sysv + # svr5 => sysv + # ultrix => ps -ax | awk '{print $3,$5}' + # unixware => ps -el|awk '{print $2,$14,$15}' + my $ps = ::spacefree(1,q{ + $sysv="ps -ef -o s -o comm"; + $sysv2="ps -ef -o state -o comm"; + $bsd="ps ax -o state,command"; + # Treat threads as processes + $bsd2="ps axH -o state,command"; + $psel="ps -el|awk '{ print \$2,\$14,\$15 }'"; + $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n"; + /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status | + awk '{print $2,$1}' }; + $dummy="echo S COMMAND;echo R dummy"; + %ps=( + # TODO Find better code for AIX/Android + 'aix' => "uptime", + 'android' => "uptime", + 'cygwin' => $cygwin, + 'darwin' => $bsd, + 'dec_osf' => $sysv2, + 'dragonfly' => $bsd, + 'freebsd' => $bsd2, + 'gnu' => $bsd, + 'hpux' => $psel, + 'irix' => $sysv2, + 'linux' => $bsd2, + 'minix' => "ps el|awk '{print \$1,\$11}'", + 'mirbsd' => $bsd, + 'msys' => $cygwin, + 'netbsd' => $bsd, + 'nto' => $dummy, + 'openbsd' => $bsd, + 'solaris' => $sysv, + 'svr5' => $psel, + 'ultrix' => "ps -ax | awk '{print \$3,\$5}'", + 'MSWin32' => $sysv, + ); + print `$ps{$^O}`; + }); + # The command is too long for csh, so base64_wrap the command + $Global::loadavg_cmd = $self->hexwrap($ps); + } + return $Global::loadavg_cmd; + } + # Should we update the loadavg file? + my $update_loadavg_file = 0; + if(open(my $load_fh, "<", $self->{'loadavg_file'})) { + local $/; # $/ = undef => slurp whole file + my $load_out = <$load_fh>; + close $load_fh; + if($load_out =~ /\S/) { + # Content can be empty if ~/ is on NFS + # due to reading being non-atomic. + # + # Count lines starting with D,O,R but command does not start with [ + my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm); + if($load > 0) { + # load is overestimated by 1 + $self->{'loadavg'} = $load - 1; + ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n"); + } elsif ($load_out=~/average: (\d+.\d+)/) { + # AIX does not support instant load average + # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55 + $self->{'loadavg'} = $1; + } else { + ::die_bug("loadavg_invalid_content: " . + $self->{'loadavg_file'} . "\n$load_out"); + } + } + $update_loadavg_file = 1; + } else { + ::debug("load", "No loadavg file: ", $self->{'loadavg_file'}); + $self->{'loadavg'} = undef; + $update_loadavg_file = 1; + } + if($update_loadavg_file) { + ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n"); + $self->{'last_loadavg_update'} = time; + my $dir = ::dirname($self->{'swap_activity_file'}); + -d $dir or eval { File::Path::mkpath($dir); }; + -w $dir or ::die_bug("Cannot write to $dir"); + my $cmd = ""; + if($self->{'string'} ne ":") { + $cmd = $self->wrap(loadavg_cmd()); + } else { + $cmd .= loadavg_cmd(); + } + # As the command can take long to run if run remote + # save it to a tmp file before moving it to the correct file + ::debug("load", "Update load\n"); + my $file = ::Q($self->{'loadavg_file'}); + # tmpfile on same filesystem as $file + my $tmpfile = $file.$$; + $ENV{'SSHPASS'} = $self->{'password'}; + ::qqx("($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile & )"); + } + return $self->{'loadavg'}; +} + +sub max_loadavg($) { + my $self = shift; + # If --load is a file it might be changed + if($Global::max_load_file) { + my $mtime = (stat($Global::max_load_file))[9]; + if($mtime > $Global::max_load_file_last_mod) { + $Global::max_load_file_last_mod = $mtime; + for my $sshlogin (values %Global::host) { + $sshlogin->set_max_loadavg(undef); + } + } + } + if(not defined $self->{'max_loadavg'}) { + $self->{'max_loadavg'} = + $self->compute_max_loadavg($opt::load); + } + ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'}); + return $self->{'max_loadavg'}; +} + +sub set_max_loadavg($$) { + my $self = shift; + $self->{'max_loadavg'} = shift; +} + +sub compute_max_loadavg($) { + # Parse the max loadaverage that the user asked for using --load + # Returns: + # max loadaverage + my $self = shift; + my $loadspec = shift; + my $load; + if(defined $loadspec) { + if($loadspec =~ /^\+(\d+)$/) { + # E.g. --load +2 + my $j = $1; + $load = + $self->ncpus() + $j; + } elsif ($loadspec =~ /^-(\d+)$/) { + # E.g. --load -2 + my $j = $1; + $load = + $self->ncpus() - $j; + } elsif ($loadspec =~ /^(\d+)\%$/) { + my $j = $1; + $load = + $self->ncpus() * $j / 100; + } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) { + $load = $1; + } elsif (-f $loadspec) { + $Global::max_load_file = $loadspec; + $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9]; + $load = $self->compute_max_loadavg( + ::slurp_or_exit($Global::max_load_file) + ); + } else { + ::error("Parsing of --load failed."); + ::die_usage(); + } + if($load < 0.01) { + $load = 0.01; + } + } + return $load; +} + +sub time_to_login($) { + my $self = shift; + return $self->{'time_to_login'}; +} + +sub set_time_to_login($$) { + my $self = shift; + $self->{'time_to_login'} = shift; +} + +sub max_jobs_running($) { + my $self = shift; + if(not defined $self->{'max_jobs_running'}) { + my $nproc = $self->compute_number_of_processes($opt::jobs); + $self->set_max_jobs_running($nproc); + } + return $self->{'max_jobs_running'}; +} + +sub orig_max_jobs_running($) { + my $self = shift; + return $self->{'orig_max_jobs_running'}; +} + +sub compute_number_of_processes($) { + # Number of processes wanted and limited by system resources + # Returns: + # Number of processes + my $self = shift; + my $opt_P = shift; + my $wanted_processes = $self->user_requested_processes($opt_P); + if(not defined $wanted_processes) { + $wanted_processes = $Global::default_simultaneous_sshlogins; + } + ::debug("load", "Wanted procs: $wanted_processes\n"); + my $system_limit = + $self->processes_available_by_system_limit($wanted_processes); + ::debug("load", "Limited to procs: $system_limit\n"); + return $system_limit; +} + +{ + my @children; + my $max_system_proc_reached; + my $more_filehandles; + my %fh; + my $tmpfhname; + my $count_jobs_already_read; + my @jobs; + my $job; + my @args; + my $arg; + + sub reserve_filehandles($) { + # Reserves filehandle + my $n = shift; + for (1..$n) { + $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null"); + } + } + + sub reserve_process() { + # Spawn a dummy process + my $child; + if($child = fork()) { + push @children, $child; + $Global::unkilled_children{$child} = 1; + } elsif(defined $child) { + # This is the child + # The child takes one process slot + # It will be killed later + $SIG{'TERM'} = $Global::original_sig{'TERM'}; + if($^O eq "cygwin" or $^O eq "msys" or $^O eq "nto") { + # The exec does not work on Cygwin and QNX + sleep 10101010; + } else { + # 'exec sleep' takes less RAM than sleeping in perl + exec 'sleep', 10101; + } + exit(0); + } else { + # Failed to spawn + $max_system_proc_reached = 1; + } + } + + sub get_args_or_jobs() { + # Get an arg or a job (depending on mode) + if($Global::semaphore or ($opt::pipe and not $opt::tee)) { + # Skip: No need to get args + return 1; + } elsif(defined $opt::retries and $count_jobs_already_read) { + # For retries we may need to run all jobs on this sshlogin + # so include the already read jobs for this sshlogin + $count_jobs_already_read--; + return 1; + } else { + if($opt::X or $opt::m) { + # The arguments may have to be re-spread over several jobslots + # So pessimistically only read one arg per jobslot + # instead of a full commandline + if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) { + if($Global::JobQueue->empty()) { + return 0; + } else { + $job = $Global::JobQueue->get(); + push(@jobs, $job); + return 1; + } + } else { + $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); + push(@args, $arg); + return 1; + } + } else { + # If there are no more command lines, then we have a process + # per command line, so no need to go further + if($Global::JobQueue->empty()) { + return 0; + } else { + $job = $Global::JobQueue->get(); + # Replacement must happen here due to seq() + $job and $job->replaced(); + push(@jobs, $job); + return 1; + } + } + } + } + + sub cleanup() { + # Cleanup: Close the files + for (values %fh) { close $_ } + # Cleanup: Kill the children + for my $pid (@children) { + kill 9, $pid; + waitpid($pid,0); + delete $Global::unkilled_children{$pid}; + } + # Cleanup: Unget the command_lines or the @args + $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget(@args); + @args = (); + $Global::JobQueue->unget(@jobs); + @jobs = (); + } + + sub processes_available_by_system_limit($) { + # If the wanted number of processes is bigger than the system limits: + # Limit them to the system limits + # Limits are: File handles, number of input lines, processes, + # and taking > 1 second to spawn 10 extra processes + # Returns: + # Number of processes + my $self = shift; + my $wanted_processes = shift; + my $system_limit = 0; + my $slow_spawning_warning_printed = 0; + my $time = time; + $more_filehandles = 1; + $tmpfhname = "TmpFhNamE"; + + # perl uses 7 filehandles for something? + # parallel uses 1 for memory_usage + # parallel uses 4 for ? + reserve_filehandles(12); + # Two processes for load avg and ? + reserve_process(); + reserve_process(); + + # For --retries count also jobs already run + $count_jobs_already_read = $Global::JobQueue->next_seq(); + my $wait_time_for_getting_args = 0; + my $start_time = time; + if($wanted_processes < $Global::infinity) { + $Global::dummy_jobs = 1; + } + while(1) { + $system_limit >= $wanted_processes and last; + not $more_filehandles and last; + $max_system_proc_reached and last; + + my $before_getting_arg = time; + if(!$Global::dummy_jobs) { + get_args_or_jobs() or last; + } + $wait_time_for_getting_args += time - $before_getting_arg; + $system_limit++; + + # Every simultaneous process uses 2 filehandles to write to + # and 2 filehandles to read from + reserve_filehandles(4); + + # System process limit + reserve_process(); + + my $forktime = time - $time - $wait_time_for_getting_args; + ::debug("run", "Time to fork $system_limit procs: ". + $wait_time_for_getting_args, " ", $forktime, + " (processes so far: ", $system_limit,")\n"); + if($system_limit > 10 and + $forktime > 1 and + $forktime > $system_limit * 0.01) { + # It took more than 0.01 second to fork a processes on avg. + # Give the user a warning. He can press Ctrl-C if this + # sucks. + ::warning_once( + "Starting $system_limit processes took > $forktime sec.", + "Consider adjusting -j. Press CTRL-C to stop."); + } + } + cleanup(); + + if($system_limit < $wanted_processes) { + # The system_limit is less than the wanted_processes + if($system_limit < 1 and not $Global::JobQueue->empty()) { + ::warning("Cannot spawn any jobs.", + "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)", + "or increasing 'nproc' in /etc/security/limits.conf", + "or increasing /proc/sys/kernel/pid_max"); + ::wait_and_exit(255); + } + if(not $more_filehandles) { + ::warning("Only enough file handles to run ". + $system_limit. " jobs in parallel.", + "Try running 'parallel -j0 -N $system_limit --pipe parallel -j0'", + "or increasing 'ulimit -n' (try: ulimit -n `ulimit -Hn`)", + "or increasing 'nofile' in /etc/security/limits.conf", + "or increasing /proc/sys/fs/file-max"); + } + if($max_system_proc_reached) { + ::warning("Only enough available processes to run ". + $system_limit. " jobs in parallel.", + "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)", + "or increasing 'nproc' in /etc/security/limits.conf", + "or increasing /proc/sys/kernel/pid_max"); + } + } + if($] == 5.008008 and $system_limit > 1000) { + # https://savannah.gnu.org/bugs/?36942 + $system_limit = 1000; + } + if($Global::JobQueue->empty()) { + $system_limit ||= 1; + } + if($self->string() ne ":" and + $system_limit > $Global::default_simultaneous_sshlogins) { + $system_limit = + $self->simultaneous_sshlogin_limit($system_limit); + } + return $system_limit; + } +} + +sub simultaneous_sshlogin_limit($) { + # Test by logging in wanted number of times simultaneously + # Returns: + # min($wanted_processes,$working_simultaneous_ssh_logins-1) + my $self = shift; + my $wanted_processes = shift; + if($self->{'time_to_login'}) { + return $wanted_processes; + } + + # Try twice because it guesses wrong sometimes + # Choose the minimal + my $ssh_limit = + ::min($self->simultaneous_sshlogin($wanted_processes), + $self->simultaneous_sshlogin($wanted_processes)); + if($ssh_limit < $wanted_processes) { + my $serverlogin = $self->string(); + ::warning("ssh to $serverlogin only allows ". + "for $ssh_limit simultaneous logins.", + "You may raise this by changing", + "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.", + "You can also try --sshdelay 0.1", + "Using only ".($ssh_limit-1)." connections ". + "to avoid race conditions."); + # Race condition can cause problem if using all sshs. + if($ssh_limit > 1) { $ssh_limit -= 1; } + } + return $ssh_limit; +} + +sub simultaneous_sshlogin($) { + # Using $sshlogin try to see if we can do $wanted_processes + # simultaneous logins + # (ssh host echo simul-login & ssh host echo simul-login & ...) | + # grep simul|wc -l + # Input: + # $wanted_processes = Try for this many logins in parallel + # Returns: + # $ssh_limit = Number of succesful parallel logins + local $/ = "\n"; + my $self = shift; + my $wanted_processes = shift; + my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : ""; + # TODO sh -c wrapper to work for csh + my $cmd = ($sshdelay.$self->wrap("echo simultaneouslogin"). + "&1 &")x$wanted_processes; + ::debug("init","Trying $wanted_processes logins at ".$self->string()."\n"); + open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or + ::die_bug("simultaneouslogin"); + my $ssh_limit = <$simul_fh>; + close $simul_fh; + chomp $ssh_limit; + return $ssh_limit; +} + +sub set_ncpus($$) { + my $self = shift; + $self->{'ncpus'} = shift; +} + +sub user_requested_processes($) { + # Parse the number of processes that the user asked for using -j + # Input: + # $opt_P = string formatted as for -P + # Returns: + # $processes = the number of processes to run on this sshlogin + my $self = shift; + my $opt_P = shift; + my $processes; + if(defined $opt_P) { + if (-f $opt_P and not $opt_P =~ /^[-+]?\d+%?$/) { + # This is a file. Ignore files called +10% 4 -3 + $Global::max_procs_file = $opt_P; + my $opt_P_file = ::slurp_or_exit($Global::max_procs_file); + if($opt_P_file !~ /\S/) { + ::warning_once("$Global::max_procs_file is empty. ". + "Treated as 100%"); + $opt_P_file = "100%"; + } + $processes = $self->user_requested_processes($opt_P_file); + } else { + if($opt_P eq "0") { + # -P 0 = infinity (or at least close) + $processes = $Global::infinity; + } else { + # -P +3 and -P -1 + $opt_P =~ s/^([-+])/\$self->ncpus()$1/; + # -P 40% + $opt_P =~ s:%$:*\$self->ncpus()/100:; + $processes = eval $opt_P; + if($processes <= 0) { + # Do not go below 1 + $processes = 1; + } + } + } + $processes = ::ceil($processes); + } + return $processes; +} + +sub ncpus($) { + # Number of CPU threads + # --use_sockets_instead_of_threads = count socket instead + # --use_cores_instead_of_threads = count physical cores instead + # Returns: + # $ncpus = number of cpu (threads) on this sshlogin + local $/ = "\n"; + my $self = shift; + if(not defined $self->{'ncpus'}) { + if($self->local()) { + if($opt::use_sockets_instead_of_threads) { + $self->{'ncpus'} = socket_core_thread()->{'sockets'}; + } elsif($opt::use_cores_instead_of_threads) { + $self->{'ncpus'} = socket_core_thread()->{'cores'}; + } else { + $self->{'ncpus'} = socket_core_thread()->{'threads'}; + } + } else { + my $ncpu; + $ENV{'SSHPASS'} = $self->{'password'}; + ::debug("init",("echo | ".$self->wrap("parallel --number-of-sockets"))); + if($opt::use_sockets_instead_of_threads + or + $opt::use_cpus_instead_of_cores) { + $ncpu = ::qqx("echo | ".$self->wrap("parallel --number-of-sockets")); + } elsif($opt::use_cores_instead_of_threads) { + $ncpu = ::qqx("echo | ".$self->wrap("parallel --number-of-cores")); + } else { + $ncpu = ::qqx("echo | ".$self->wrap("parallel --number-of-threads")); + } + chomp $ncpu; + if($ncpu =~ /^\s*[0-9]+\s*$/s) { + $self->{'ncpus'} = $ncpu; + } else { + ::warning("Could not figure out ". + "number of cpus on ".$self->string." ($ncpu). Using 1."); + $self->{'ncpus'} = 1; + } + } + } + return $self->{'ncpus'}; +} + + +sub nproc() { + # Returns: + # Number of threads using `nproc` + my $no_of_threads = ::qqx("nproc"); + chomp $no_of_threads; + return $no_of_threads; +} + +sub no_of_sockets() { + return socket_core_thread()->{'sockets'}; +} + +sub no_of_cores() { + return socket_core_thread()->{'cores'}; +} + +sub no_of_threads() { + return socket_core_thread()->{'threads'}; +} + +sub socket_core_thread() { + # Returns: + # { + # 'sockets' => #sockets = number of socket with CPU present + # 'cores' => #cores = number of physical cores + # 'threads' => #threads = number of compute cores (hyperthreading) + # 'active' => #taskset_threads = number of taskset limited cores + # } + my $cpu; + if ($^O eq 'linux') { + $cpu = sct_gnu_linux($cpu); + } elsif ($^O eq 'android') { + $cpu = sct_android($cpu); + } elsif ($^O eq 'freebsd') { + $cpu = sct_freebsd($cpu); + } elsif ($^O eq 'netbsd') { + $cpu = sct_netbsd($cpu); + } elsif ($^O eq 'openbsd') { + $cpu = sct_openbsd($cpu); + } elsif ($^O eq 'gnu') { + $cpu = sct_hurd($cpu); + } elsif ($^O eq 'darwin') { + $cpu = sct_darwin($cpu); + } elsif ($^O eq 'solaris') { + $cpu = sct_solaris($cpu); + } elsif ($^O eq 'aix') { + $cpu = sct_aix($cpu); + } elsif ($^O eq 'hpux') { + $cpu = sct_hpux($cpu); + } elsif ($^O eq 'nto') { + $cpu = sct_qnx($cpu); + } elsif ($^O eq 'svr5') { + $cpu = sct_openserver($cpu); + } elsif ($^O eq 'irix') { + $cpu = sct_irix($cpu); + } elsif ($^O eq 'dec_osf') { + $cpu = sct_tru64($cpu); + } else { + # Try all methods until we find something that works + $cpu = (sct_gnu_linux($cpu) + || sct_android($cpu) + || sct_freebsd($cpu) + || sct_netbsd($cpu) + || sct_openbsd($cpu) + || sct_hurd($cpu) + || sct_darwin($cpu) + || sct_solaris($cpu) + || sct_aix($cpu) + || sct_hpux($cpu) + || sct_qnx($cpu) + || sct_openserver($cpu) + || sct_irix($cpu) + || sct_tru64($cpu) + ); + } + if(not $cpu) { + # Fall back: Set all to nproc + my $nproc = nproc(); + if($nproc) { + $cpu->{'sockets'} = + $cpu->{'cores'} = + $cpu->{'threads'} = + $cpu->{'active'} = + $nproc; + } + } + if(not $cpu) { + ::warning("Cannot figure out number of cpus. Using 1."); + $cpu->{'sockets'} = + $cpu->{'cores'} = + $cpu->{'threads'} = + $cpu->{'active'} = + 1 + } + $cpu->{'sockets'} ||= 1; + $cpu->{'threads'} ||= $cpu->{'cores'}; + $cpu->{'active'} ||= $cpu->{'threads'}; + chomp($cpu->{'sockets'}, + $cpu->{'cores'}, + $cpu->{'threads'}, + $cpu->{'active'}); + # Choose minimum of active and actual + my $mincpu; + $mincpu->{'sockets'} = ::min($cpu->{'sockets'},$cpu->{'active'}); + $mincpu->{'cores'} = ::min($cpu->{'cores'},$cpu->{'active'}); + $mincpu->{'threads'} = ::min($cpu->{'threads'},$cpu->{'active'}); + return $mincpu; +} + +sub sct_gnu_linux($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + my $cpu = shift; + + sub read_topology($) { + my $prefix = shift; + my %sibiling; + my %socket; + my $thread; + for($thread = 0; + -r "$prefix/cpu$thread/topology/physical_package_id"; + $thread++) { + $socket{::slurp_or_exit( + "$prefix/cpu$thread/topology/physical_package_id")}++; + } + for($thread = 0; + -r "$prefix/cpu$thread/topology/thread_siblings"; + $thread++) { + $sibiling{::slurp_or_exit( + "$prefix/cpu$thread/topology/thread_siblings")}++; + } + $cpu->{'sockets'} = keys %socket; + $cpu->{'cores'} = keys %sibiling; + $cpu->{'threads'} = $thread; + } + + sub read_cpuinfo(@) { + my @cpuinfo = @_; + $cpu->{'sockets'} = 0; + $cpu->{'cores'} = 0; + $cpu->{'threads'} = 0; + my %seen; + my %phy_seen; + my $physicalid; + for(@cpuinfo) { + # physical id : 0 + if(/^physical id.*[:](.*)/) { + $physicalid = $1; + if(not $phy_seen{$1}++) { + $cpu->{'sockets'}++; + } + } + # core id : 3 + if(/^core id.*[:](.*)/ and not $seen{$physicalid,$1}++) { + $cpu->{'cores'}++; + } + # processor : 2 + /^processor.*[:]\s*\d/i and $cpu->{'threads'}++; + } + $cpu->{'cores'} ||= $cpu->{'threads'}; + $cpu->{'cpus'} ||= $cpu->{'threads'}; + $cpu->{'sockets'} ||= 1; + } + + sub read_lscpu(@) { + my @lscpu = @_; + my $threads_per_core; + my $cores_per_socket; + for(@lscpu) { + # lscpu + /^CPU.s.:\s*(\d+)/ and $cpu->{'threads'} = $1; + /^Thread.s. per core:\s*(\d+)/ and $threads_per_core = $1; + /^Core.s. per socket:\s*(\d+)/ and $cores_per_socket = $1; + /^(CPU )?Socket.s.:\s*(\d+)/i and $cpu->{'sockets'} = $2; + # lscpu --all --extended + # 3 0 0 1 1:1:1:0 yes 2900.0000 800.0000 1995.6210 + # 1 0 0 1 1:1:1 yes 1300.0000 800.0000 800.0000 + /^\s+(\d+)\s+\d+\s+(\d+)\s+(\d+)\s+\d+:\d+:\S+\s/ and do { + # Really only the last line matters + $cpu->{'threads'} = $1+1; + $cpu->{'sockets'} = $2+1; + $cpu->{'cores'} = $3+1; + }; + } + if($cores_per_socket and $cpu->{'sockets'}) { + $cpu->{'cores'} = $cores_per_socket * $cpu->{'sockets'}; + } + if($threads_per_core and $cpu->{'cores'}) { + $cpu->{'threads'} = $threads_per_core * $cpu->{'cores'}; + } + if($threads_per_core and $cpu->{'threads'}) { + $cpu->{'cores'} = $cpu->{'threads'} / $threads_per_core; + } + $cpu->{'cpus'} ||= $cpu->{'threads'}; + } + + local $/ = "\n"; # If delimiter is set, then $/ will be wrong + my @cpuinfo; + my @lscpu; + if($ENV{'PARALLEL_CPUINFO'}) { + # Use CPUINFO from environment - used for testing only + read_cpuinfo(split/(?<=\n)/,$ENV{'PARALLEL_CPUINFO'}); + } elsif($ENV{'PARALLEL_LSCPU'}) { + # Use LSCPU from environment - used for testing only + read_lscpu(split/\n/,$ENV{'PARALLEL_LSCPU'}); + } elsif(-r "$ENV{'PARALLEL_CPUPREFIX'}/cpu0/topology/thread_siblings") { + # Use CPUPREFIX from environment - used for testing only + read_topology($ENV{'PARALLEL_CPUPREFIX'}); + } elsif($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) { + # Skip /proc/cpuinfo - already set + } else { + # Not debugging: Look at this computer + if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) + and + open(my $in_fh, "-|", "lscpu --all --extended")) { + # Parse output from lscpu + read_lscpu(<$in_fh>); + close $in_fh; + } + if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) + and + -r "/sys/devices/system/cpu/cpu0/topology/thread_siblings") { + read_topology("/sys/devices/system/cpu"); + } + if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) + and + open(my $in_fh, "<", "/proc/cpuinfo")) { + # Read /proc/cpuinfo + read_cpuinfo(<$in_fh>); + close $in_fh; + } + } + if(-e "/proc/self/status" + and not $ENV{'PARALLEL_CPUINFO'} + and not $ENV{'PARALLEL_LSCPU'}) { + # if 'taskset' is used to limit number of threads + if(open(my $in_fh, "<", "/proc/self/status")) { + while(<$in_fh>) { + if(/^Cpus_allowed:\s*(\S+)/) { + my $a = $1; + $a =~ tr/,//d; + $cpu->{'active'} = unpack ("%32b*", pack ("H*",$a)); + } + } + close $in_fh; + } + } + return $cpu; +} + +sub sct_android($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + # Use GNU/Linux + return sct_gnu_linux($_[0]); +} + +sub sct_freebsd($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= + (::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }) + or + ::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' })); + $cpu->{'threads'} ||= + (::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' }) + or + ::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' })); + return $cpu; +} + +sub sct_netbsd($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= ::qqx("sysctl -n hw.ncpu"); + return $cpu; +} + +sub sct_openbsd($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= ::qqx('sysctl -n hw.ncpu'); + return $cpu; +} + +sub sct_hurd($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= ::qqx("nproc"); + return $cpu; +} + +sub sct_darwin($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= + (::qqx('sysctl -n hw.physicalcpu') + or + ::qqx(qq{ sysctl -a hw | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' })); + $cpu->{'threads'} ||= + (::qqx('sysctl -n hw.logicalcpu') + or + ::qqx(qq{ sysctl -a hw | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' })); + return $cpu; +} + +sub sct_solaris($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + if(not $cpu->{'cores'}) { + if(-x "/usr/bin/kstat") { + my @chip_id = ::qqx("/usr/bin/kstat cpu_info|grep chip_id"); + if($#chip_id >= 0) { + $cpu->{'sockets'} ||= $#chip_id +1; + } + my @core_id = ::qqx("/usr/bin/kstat -m cpu_info|grep -w core_id|uniq"); + if($#core_id >= 0) { + $cpu->{'cores'} ||= $#core_id +1; + } + } + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = ::qqx("/usr/sbin/psrinfo -p"); + if($#psrinfo >= 0) { + $cpu->{'sockets'} ||= $psrinfo[0]; + } + } + if(-x "/usr/sbin/prtconf") { + my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance"); + if($#prtconf >= 0) { + $cpu->{'cores'} ||= $#prtconf +1; + } + } + } + return $cpu; +} + +sub sct_aix($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + if(not $cpu->{'cores'}) { + if(-x "/usr/sbin/lscfg") { + if(open(my $in_fh, "-|", + "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")) { + $cpu->{'cores'} = <$in_fh>; + close $in_fh; + } + } + } + if(not $cpu->{'threads'}) { + if(-x "/usr/bin/vmstat") { + if(open(my $in_fh, "-|", "/usr/bin/vmstat 1 1")) { + while(<$in_fh>) { + /lcpu=([0-9]*) / and $cpu->{'threads'} = $1; + } + close $in_fh; + } + } + } + return $cpu; +} + +sub sct_hpux($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= + ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'}); + $cpu->{'threads'} ||= + ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1"'}); + return $cpu; +} + +sub sct_qnx($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + # BUG: It is not known how to calculate this. + + return $cpu; +} + +sub sct_openserver($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + if(not $cpu->{'cores'}) { + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = ::qqx("/usr/sbin/psrinfo"); + if($#psrinfo >= 0) { + $cpu->{'cores'} = $#psrinfo +1; + } + } + } + $cpu->{'sockets'} ||= $cpu->{'cores'}; + return $cpu; +} + +sub sct_irix($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= + ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' }); + return $cpu; +} + +sub sct_tru64($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= ::qqx("sizer -pr"); + $cpu->{'sockets'} ||= $cpu->{'cores'}; + $cpu->{'threads'} ||= $cpu->{'cores'}; + + return $cpu; +} + +sub sshcommand($) { + # Returns: + # $sshcommand = the command (incl options) to run when using ssh + my $self = shift; + if (not defined $self->{'sshcommand'}) { + ::die_bug("sshcommand not set"); + } + return $self->{'sshcommand'}; +} + +sub local($) { + my $self = shift; + return $self->{'local'}; +} + +sub control_path_dir($) { + # Returns: + # $control_path_dir = dir of control path (for -M) + my $self = shift; + if(not defined $self->{'control_path_dir'}) { + $self->{'control_path_dir'} = + # Use $ENV{'TMPDIR'} as that is typically not + # NFS mounted. + # The file system must support UNIX domain sockets + File::Temp::tempdir($ENV{'TMPDIR'} + . "/ctrlpath-XXXX", + CLEANUP => 1); + } + return $self->{'control_path_dir'}; +} + +sub rsync_transfer_cmd($) { + # Command to run to transfer a file + # Input: + # $file = filename of file to transfer + # $workdir = destination dir + # Returns: + # $cmd = rsync command to run to transfer $file ("" if unreadable) + my $self = shift; + my $file = shift; + my $workdir = shift; + if(not -r $file) { + ::warning($file. " is not readable and will not be transferred."); + return "true"; + } + my $rsync_destdir; + my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./? + if($relpath) { + $rsync_destdir = ::shell_quote_file($workdir); + } else { + # rsync /foo/bar / + $rsync_destdir = "/"; + } + $file = ::shell_quote_file($file); + # Make dir if it does not exist + return($self->wrap("mkdir -p $rsync_destdir") . " && " . + $self->rsync()." $file ".$self->{'host'}.":$rsync_destdir"); +} + +{ + my $rsync_fix; + my $rsync_version; + + sub rsync($) { + # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7. + # If the version >= 3.1.0: downgrade to protocol 30 + # rsync 3.2.4 introduces a quoting bug: Add --old-args for that + # Returns: + # $rsync = "rsync" or "rsync --protocol 30 --old-args" + sub rsync_version { + if(not $rsync_version) { + my @out = `rsync --version`; + if(not @out) { + if(::which("rsync")) { + ::die_bug("'rsync --version' gave no output."); + } else { + ::error("'rsync' is not in \$PATH."); + ::wait_and_exit(255); + } + } + for (@out) { + # rsync version 3.1.3 protocol version 31 + # rsync version v3.2.3 protocol version 31 + if(/version v?(\d+)\.(\d+)(\.(\d+))?/) { + # 3.2.27 => 03.0227 + $rsync_version = sprintf "%02d.%02d%02d",$1,$2,$4; + } + } + $rsync_version or + ::die_bug("Cannot figure out version of rsync: @out"); + } + } + + sub rsync_fixup { + # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7. + # If the version >= 3.1.0: downgrade to protocol 30 + # Returns: + # $rsync = "rsync" or "rsync --protocol 30" + if(not $rsync_fix) { + rsync_version(); + if($rsync_version >= 3.01) { + # Version 3.1.0 or later: Downgrade to protocol 30 + $rsync_fix .= " --protocol 30"; + } + if($rsync_version >= 3.0204) { + # Version 3.2.4 .. 3.2.8: --old-args + $rsync_fix .= " --old-args"; + } + } + return $rsync_fix; + } + my $self = shift; + + return "rsync".rsync_fixup()." ".$ENV{'PARALLEL_RSYNC_OPTS'}. + " -e".::Q($self->sshcmd()); + } +} + +sub cleanup_cmd($$$) { + # Command to run to remove the remote file + # Input: + # $file = filename to remove + # $workdir = destination dir + # Returns: + # $cmd = ssh command to run to remove $file and empty parent dirs + my $self = shift; + my $file = shift; + my $workdir = shift; + my $f = $file; + if($f =~ m:/\./:) { + # foo/bar/./baz/quux => workdir/baz/quux + # /foo/bar/./baz/quux => workdir/baz/quux + $f =~ s:.*/\./:$workdir/:; + } elsif($f =~ m:^[^/]:) { + # foo/bar => workdir/foo/bar + $f = $workdir."/".$f; + } + my @subdirs = split m:/:, ::dirname($f); + my @rmdir; + my $dir = ""; + for(@subdirs) { + $dir .= $_."/"; + unshift @rmdir, ::shell_quote_file($dir); + } + my $rmdir = @rmdir ? "rmdir @rmdir 2>/dev/null;" : ""; + if(defined $opt::workdir and $opt::workdir eq "...") { + $rmdir .= "rm -rf " . ::shell_quote_file($workdir).';'; + } + my $rmf = "sh -c ". + ::Q("rm -f ".::shell_quote_file($f)." 2>/dev/null;".$rmdir); + return $self->wrap(::Q($rmf)); +} + +package JobQueue; + +sub new($) { + my $class = shift; + my $commandref = shift; + my $read_from = shift; + my $context_replace = shift; + my $max_number_of_args = shift; + my $transfer_files = shift; + my $return_files = shift; + my $template_names = shift; + my $template_contents = shift; + my $commandlinequeue = CommandLineQueue->new + ($commandref, $read_from, $context_replace, $max_number_of_args, + $transfer_files, $return_files, $template_names, $template_contents); + my @unget = (); + return bless { + 'unget' => \@unget, + 'commandlinequeue' => $commandlinequeue, + 'this_job_no' => 0, + 'total_jobs' => undef, + }, ref($class) || $class; +} + +sub get($) { + my $self = shift; + + $self->{'this_job_no'}++; + if(@{$self->{'unget'}}) { + my $job = shift @{$self->{'unget'}}; + # {%} may have changed, so flush computed values + $job && $job->flush_cache(); + return $job; + } else { + my $commandline = $self->{'commandlinequeue'}->get(); + if(defined $commandline) { + return Job->new($commandline); + } else { + $self->{'this_job_no'}--; + return undef; + } + } +} + +sub unget($) { + my $self = shift; + unshift @{$self->{'unget'}}, @_; + $self->{'this_job_no'} -= @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && + $self->{'commandlinequeue'}->empty(); + ::debug("run", "JobQueue->empty $empty "); + return $empty; +} + +sub total_jobs($) { + my $self = shift; + if(not defined $self->{'total_jobs'}) { + if($opt::pipe and not $opt::tee) { + ::error("--pipe is incompatible with --eta/--bar/--shuf"); + ::wait_and_exit(255); + } + if($opt::totaljobs) { + $self->{'total_jobs'} = $opt::totaljobs; + } elsif($opt::sqlworker) { + $self->{'total_jobs'} = $Global::sql->total_jobs(); + } else { + my $record; + my @arg_records; + my $record_queue = $self->{'commandlinequeue'}{'arg_queue'}; + my $start = time; + while($record = $record_queue->get()) { + push @arg_records, $record; + if(time - $start > 10) { + ::warning("Reading ".scalar(@arg_records). + " arguments took longer than 10 seconds."); + $opt::eta && ::warning("Consider removing --eta."); + $opt::bar && ::warning("Consider removing --bar."); + $opt::shuf && ::warning("Consider removing --shuf."); + last; + } + } + while($record = $record_queue->get()) { + push @arg_records, $record; + } + if($opt::shuf and @arg_records) { + my $i = @arg_records; + while (--$i) { + my $j = int rand($i+1); + @arg_records[$i,$j] = @arg_records[$j,$i]; + } + } + $record_queue->unget(@arg_records); + # $#arg_records = number of args - 1 + # We have read one @arg_record for this job (so add 1 more) + my $num_args = $#arg_records + 2; + # This jobs is not started so -1 + my $started_jobs = $self->{'this_job_no'} - 1; + my $max_args = ::max($Global::max_number_of_args,1); + $self->{'total_jobs'} = ::ceil($num_args / $max_args) + + $started_jobs; + ::debug("init","Total jobs: ".$self->{'total_jobs'}. + " ($num_args/$max_args + $started_jobs)\n"); + } + } + return $self->{'total_jobs'}; +} + +sub flush_total_jobs($) { + # Unset total_jobs to force recomputing + my $self = shift; + ::debug("init","flush Total jobs: "); + $self->{'total_jobs'} = undef; +} + +sub next_seq($) { + my $self = shift; + + return $self->{'commandlinequeue'}->seq(); +} + +sub quote_args($) { + my $self = shift; + return $self->{'commandlinequeue'}->quote_args(); +} + + +package Job; + +sub new($) { + my $class = shift; + my $commandlineref = shift; + return bless { + 'commandline' => $commandlineref, # CommandLine object + 'workdir' => undef, # --workdir + # filehandle for stdin (used for --pipe) + # filename for writing stdout to (used for --files) + # remaining data not sent to stdin (used for --pipe) + # tmpfiles to cleanup when job is done + 'unlink' => [], + # amount of data sent via stdin (used for --pipe) + 'transfersize' => 0, # size of files using --transfer + 'returnsize' => 0, # size of files using --return + 'pid' => undef, + # hash of { SSHLogins => number of times the command failed there } + 'failed' => undef, + 'sshlogin' => undef, + # The commandline wrapped with rsync and ssh + 'sshlogin_wrap' => undef, + 'exitstatus' => undef, + 'exitsignal' => undef, + # Timestamp for timeout if any + 'timeout' => undef, + 'virgin' => 1, + # Output used for SQL and CSV-output + 'output' => { 1 => [], 2 => [] }, + 'halfline' => { 1 => [], 2 => [] }, + }, ref($class) || $class; +} + +sub flush_cache($) { + my $self = shift; + $self->{'commandline'}->flush_cache(); +} + +sub replaced($) { + my $self = shift; + $self->{'commandline'} or ::die_bug("commandline empty"); + return $self->{'commandline'}->replaced(); +} + +{ + my $next_available_row; + + sub row($) { + my $self = shift; + if(not defined $self->{'row'}) { + if($opt::keeporder) { + $self->{'row'} = $self->seq(); + } else { + $self->{'row'} = ++$next_available_row; + } + } + return $self->{'row'}; + } +} + +sub seq($) { + my $self = shift; + return $self->{'commandline'}->seq(); +} + +sub set_seq($$) { + my $self = shift; + return $self->{'commandline'}->set_seq(shift); +} + +sub slot($) { + my $self = shift; + return $self->{'commandline'}->slot(); +} + +sub free_slot($) { + my $self = shift; + push @Global::slots, $self->slot(); +} + +{ + my($cattail); + + sub cattail() { + # Returns: + # $cattail = perl program for: + # cattail "decomp-prg" wpid [file_stdin] [file_to_unlink] + # decomp-prg = decompress program + # wpid = pid of writer program + # file_stdin = file_to_decompress + # file_to_unlink = unlink this file + if(not $cattail) { + $cattail = q{ + # cat followed by tail (possibly with rm as soon at the file is opened) + # If $writerpid dead: finish after this round + use Fcntl; + $|=1; + + my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV; + if($read_file) { + open(IN,"<",$read_file) || die("cattail: Cannot open $read_file"); + } else { + *IN = *STDIN; + } + while(! -s $comfile) { + # Writer has not opened the buffer file, so we cannot remove it yet + $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep); + usleep($sleep); + } + # The writer and we have both opened the file, so it is safe to unlink it + unlink $unlink_file; + unlink $comfile; + + my $first_round = 1; + my $flags; + fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags |= O_NONBLOCK; # Add non-blocking to the flags + fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle + + while(1) { + # clear EOF + seek(IN,0,1); + my $writer_running = kill 0, $writerpid; + $read = sysread(IN,$buf,131072); + if($read) { + if($first_round) { + # Only start the command if there any input to process + $first_round = 0; + open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd"); + } + + # Blocking print + while($buf) { + my $bytes_written = syswrite(OUT,$buf); + # syswrite may be interrupted by SIGHUP + substr($buf,0,$bytes_written) = ""; + } + # Something printed: Wait less next time + $sleep /= 2; + } else { + if(eof(IN) and not $writer_running) { + # Writer dead: There will never be sent more to the decompressor + close OUT; + exit; + } + # TODO This could probably be done more efficiently using select(2) + # Nothing read: Wait longer before next read + # Up to 100 milliseconds + $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep); + usleep($sleep); + } + } + + sub usleep { + # Sleep this many milliseconds. + my $secs = shift; + select(undef, undef, undef, $secs/1000); + } + }; + $cattail =~ s/#.*//mg; + $cattail =~ s/\s+/ /g; + } + return $cattail; + } +} + +sub openoutputfiles($) { + # Open files for STDOUT and STDERR + # Set file handles in $self->fh + my $self = shift; + my ($outfhw, $errfhw, $outname, $errname); + + if($opt::latestline) { + # Do not save to files: Use non-blocking pipe + my ($outfhr, $errfhr); + pipe($outfhr, $outfhw) || die; + $self->set_fh(1,'w',$outfhw); + $self->set_fh(2,'w',$outfhw); + $self->set_fh(1,'r',$outfhr); + $self->set_fh(2,'r',$outfhr); + # Make it possible to read non-blocking from the pipe + for my $fdno (1,2) { + ::set_fh_non_blocking($self->fh($fdno,'r')); + } + # Return immediately because we do not need setting filenames + return; + } elsif($Global::linebuffer and not + ($opt::keeporder or $Global::files or $opt::results or + $opt::compress or $opt::compress_program or + $opt::decompress_program)) { + # Do not save to files: Use non-blocking pipe + my ($outfhr, $errfhr); + pipe($outfhr, $outfhw) || die; + pipe($errfhr, $errfhw) || die; + $self->set_fh(1,'w',$outfhw); + $self->set_fh(2,'w',$errfhw); + $self->set_fh(1,'r',$outfhr); + $self->set_fh(2,'r',$errfhr); + # Make it possible to read non-blocking from the pipe + for my $fdno (1,2) { + ::set_fh_non_blocking($self->fh($fdno,'r')); + } + # Return immediately because we do not need setting filenames + return; + } elsif($opt::results and not $Global::csvsep and not $Global::jsonout) { + # If --results, but not --results *.csv/*.tsv + my $out = $self->{'commandline'}->results_out(); + my $seqname; + if($out eq $opt::results or $out =~ m:/$:) { + # $opt::results = simple string or ending in / + # => $out is a dir/ + # prefix/name1/val1/name2/val2/seq + $seqname = $out."seq"; + # prefix/name1/val1/name2/val2/stdout + $outname = $out."stdout"; + # prefix/name1/val1/name2/val2/stderr + $errname = $out."stderr"; + } else { + # $opt::results = replacement string not ending in / + # => $out is a file + $outname = $out; + $errname = "$out.err"; + $seqname = "$out.seq"; + } + ::write_or_exit($seqname, $self->seq()); + $outfhw = ::open_or_exit("+>", $outname); + $errfhw = ::open_or_exit("+>", $errname); + $self->set_fh(1,"unlink",""); + $self->set_fh(2,"unlink",""); + if($opt::sqlworker) { + # Save the filenames in SQL table + $Global::sql->update("SET Stdout = ?, Stderr = ? ". + "WHERE Seq = ". $self->seq(), + $outname, $errname); + } + } elsif(not $opt::ungroup) { + # To group we create temporary files for STDOUT and STDERR + # To avoid the cleanup unlink the files immediately (but keep them open) + if($Global::files) { + ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); + ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); + # --files => only remove stderr + $self->set_fh(1,"unlink",""); + $self->set_fh(2,"unlink",$errname); + } else { + ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); + ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); + $self->set_fh(1,"unlink",$outname); + $self->set_fh(2,"unlink",$errname); + } + } else { + # --ungroup + open($outfhw,">&",$Global::fh{1}) || die; + open($errfhw,">&",$Global::fh{2}) || die; + # File name must be empty as it will otherwise be printed + $outname = ""; + $errname = ""; + $self->set_fh(1,"unlink",$outname); + $self->set_fh(2,"unlink",$errname); + } + # Set writing FD + $self->set_fh(1,'w',$outfhw); + $self->set_fh(2,'w',$errfhw); + $self->set_fh(1,'name',$outname); + $self->set_fh(2,'name',$errname); + if($opt::compress) { + $self->filter_through_compress(); + } elsif(not $opt::ungroup) { + $self->grouped(); + } + if($Global::linebuffer) { + # Make it possible to read non-blocking from + # the buffer files + # Used for --linebuffer with -k, --files, --res, --compress* + for my $fdno (1,2) { + ::set_fh_non_blocking($self->fh($fdno,'r')); + } + } +} + +sub print_verbose_dryrun($) { + # If -v set: print command to stdout (possibly buffered) + # This must be done before starting the command + my $self = shift; + if($Global::verbose or $opt::dryrun) { + my $fh = $self->fh(1,"w"); + if($Global::verbose <= 1) { + print $fh $self->replaced(),"\n"; + } else { + # Verbose level > 1: Print the rsync and stuff + print $fh $self->wrapped(),"\n"; + } + } + if($opt::sqlworker) { + $Global::sql->update("SET Command = ? WHERE Seq = ".$self->seq(), + $self->replaced()); + } +} + +sub add_rm($) { + # Files to remove when job is done + my $self = shift; + push @{$self->{'unlink'}}, @_; +} + +sub get_rm($) { + # Files to remove when job is done + my $self = shift; + return @{$self->{'unlink'}}; +} + +sub cleanup($) { + # Remove files when job is done + my $self = shift; + unlink $self->get_rm(); + delete @Global::unlink{$self->get_rm()}; +} + +sub grouped($) { + my $self = shift; + # Set reading FD if using --group (--ungroup does not need) + for my $fdno (1,2) { + # Re-open the file for reading + # so fdw can be closed seperately + # and fdr can be seeked seperately (for --line-buffer) + my $fdr = ::open_or_exit("<", $self->fh($fdno,'name')); + $self->set_fh($fdno,'r',$fdr); + # Unlink if not debugging + $Global::debug or ::rm($self->fh($fdno,"unlink")); + } +} + +sub empty_input_wrapper($) { + # If no input: exit(0) + # If some input: Pass input as input to command on STDIN + # This avoids starting the command if there is no input. + # Input: + # $command = command to pipe data to + # Returns: + # $wrapped_command = the wrapped command + my $command = shift; + # The optimal block size differs + # It has been measured on: + # AMD 6376: 59000 + # /dev/null'; + my $script = + ::spacefree(0,q{ + if(sysread(STDIN, $buf, 1)) { + open($fh, "|-", @ARGV) || die; + syswrite($fh, $buf); + while($read = sysread(STDIN, $buf, 59000)) { + syswrite($fh, $buf); + } + close $fh; + exit ($?&127 ? 128+($?&127) : 1+$?>>8) + } + }); + ::debug("run",'Empty wrap: perl -e '.::Q($script)."\n"); + if($Global::cshell + and + length $command > 499) { + # csh does not like words longer than 1000 (499 quoted) + # $command = "perl -e '".base64_zip_eval()."' ". + # join" ",string_zip_base64( + # 'exec "'.::perl_quote_scalar($command).'"'); + return 'perl -e '.::Q($script)." ". + base64_wrap("exec \"$Global::shell\",'-c',\"". + ::perl_quote_scalar($command).'"'); + } else { + return 'perl -e '.::Q($script)." ". + $Global::shell." -c ".::Q($command); + } +} + +sub filter_through_compress($) { + my $self = shift; + # Send stdout to stdin for $opt::compress_program(1) + # Send stderr to stdin for $opt::compress_program(2) + # cattail get pid: $pid = $self->fh($fdno,'rpid'); + my $cattail = cattail(); + + for my $fdno (1,2) { + # Make a communication file. + my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac"); + close $fh; + # Compressor: (echo > $comfile; compress pipe) > output + # When the echo is written to $comfile, + # it is known that output file is opened, + # thus output file can then be removed by the decompressor. + # empty_input_wrapper is needed for plzip + my $qcom = ::Q($comfile); + my $wpid = open(my $fdw,"|-", "(echo > $qcom; ". + empty_input_wrapper($opt::compress_program).") >". + ::Q($self->fh($fdno,'name'))) || die $?; + $self->set_fh($fdno,'w',$fdw); + $self->set_fh($fdno,'wpid',$wpid); + # Decompressor: open output; -s $comfile > 0: rm $comfile output; + # decompress output > stdout + my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile, + $opt::decompress_program, $wpid, + $self->fh($fdno,'name'),$self->fh($fdno,'unlink')) + || die $?; + $self->set_fh($fdno,'r',$fdr); + $self->set_fh($fdno,'rpid',$rpid); + } +} + +sub set_fh($$$$) { + # Set file handle + my ($self, $fd_no, $key, $fh) = @_; + $self->{'fd'}{$fd_no,$key} = $fh; +} + +sub fh($) { + # Get file handle + my ($self, $fd_no, $key) = @_; + return $self->{'fd'}{$fd_no,$key}; +} + +sub write_block($) { + my $self = shift; + my $stdin_fh = $self->fh(0,"w"); + if(fork()) { + # Close in parent + close $stdin_fh; + } else { + # If writing is to a closed pipe: + # Do not call signal handler, but let nothing be written + local $SIG{PIPE} = undef; + + for my $part ( + grep { defined $_ } + $self->{'header'},$self->{'block'}) { + # syswrite may not write all in one go, + # so make sure everything is written. + my $written; + while($written = syswrite($stdin_fh,$$part)) { + substr($$part,0,$written) = ""; + } + } + close $stdin_fh; + exit(0); + } +} + +sub write($) { + my $self = shift; + my $remaining_ref = shift; + my $stdin_fh = $self->fh(0,"w"); + + my $len = length $$remaining_ref; + # syswrite may not write all in one go, + # so make sure everything is written. + my $written; + + # If writing is to a closed pipe: + # Do not call signal handler, but let nothing be written + local $SIG{PIPE} = undef; + while($written = syswrite($stdin_fh,$$remaining_ref)){ + substr($$remaining_ref,0,$written) = ""; + } +} + +sub set_block($$$$$$) { + # Copy stdin buffer from $block_ref up to $endpos + # Prepend with $header_ref if virgin (i.e. not --roundrobin) + # Remove $recstart and $recend if needed + # Input: + # $header_ref = ref to $header to prepend + # $buffer_ref = ref to $buffer containing the block + # $endpos = length of $block to pass on + # $recstart = --recstart regexp + # $recend = --recend regexp + # Returns: + # N/A + my $self = shift; + my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_; + $self->{'header'} = $header_ref; + if($opt::roundrobin or $opt::remove_rec_sep or defined $opt::retries) { + my $a = ""; + if(($opt::roundrobin or defined $opt::retries) and $self->virgin()) { + $a .= $$header_ref; + } + # Job is no longer virgin + $self->set_virgin(0); + # Make a full copy because $buffer will change + $a .= substr($$buffer_ref,0,$endpos); + $self->{'block'} = \$a; + if($opt::remove_rec_sep) { + remove_rec_sep($self->{'block'},$recstart,$recend); + } + $self->{'block_length'} = length ${$self->{'block'}}; + } else { + $self->set_virgin(0); + for(substr($$buffer_ref,0,$endpos)) { + $self->{'block'} = \$_; + } + $self->{'block_length'} = $endpos + length ${$self->{'header'}}; + } + $self->{'block_pos'} = 0; + $self->add_transfersize($self->{'block_length'}); +} + +sub block_ref($) { + my $self = shift; + return $self->{'block'}; +} + +sub block_length($) { + my $self = shift; + return $self->{'block_length'}; +} + +sub remove_rec_sep($) { + # Remove --recstart and --recend from $block + # Input: + # $block_ref = reference to $block to be modified + # $recstart = --recstart + # $recend = --recend + # Uses: + # $opt::regexp = Are --recstart/--recend regexp? + # Returns: + # N/A + my ($block_ref,$recstart,$recend) = @_; + # Remove record separator + if($opt::regexp) { + $$block_ref =~ s/$recend$recstart//gom; + $$block_ref =~ s/^$recstart//os; + $$block_ref =~ s/$recend$//os; + } else { + $$block_ref =~ s/\Q$recend$recstart\E//gom; + $$block_ref =~ s/^\Q$recstart\E//os; + $$block_ref =~ s/\Q$recend\E$//os; + } +} + +sub non_blocking_write($) { + my $self = shift; + my $something_written = 0; + + my $in = $self->fh(0,"w"); + my $rv = syswrite($in, + substr(${$self->{'block'}},$self->{'block_pos'})); + if (!defined($rv) && $! == ::EAGAIN()) { + # would block - but would have written + $something_written = 0; + # avoid triggering auto expanding block size + $Global::no_autoexpand_block ||= 1; + } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) { + # incomplete write + # Remove the written part + $self->{'block_pos'} += $rv; + $something_written = $rv; + } else { + # successfully wrote everything + # Empty block to free memory + my $a = ""; + $self->set_block(\$a,\$a,0,"",""); + $something_written = $rv; + } + ::debug("pipe", "Non-block: ", $something_written); + return $something_written; +} + + +sub virgin($) { + my $self = shift; + return $self->{'virgin'}; +} + +sub set_virgin($$) { + my $self = shift; + $self->{'virgin'} = shift; +} + +sub pid($) { + my $self = shift; + return $self->{'pid'}; +} + +sub set_pid($$) { + my $self = shift; + $self->{'pid'} = shift; +} + +sub starttime($) { + # Returns: + # UNIX-timestamp this job started + my $self = shift; + return sprintf("%.3f",$self->{'starttime'}); +} + +sub set_starttime($@) { + my $self = shift; + my $starttime = shift || ::now(); + $self->{'starttime'} = $starttime; + $opt::sqlworker and + $Global::sql->update("SET Starttime = ? WHERE Seq = ".$self->seq(), + $starttime); +} + +sub runtime($) { + # Returns: + # Run time in seconds with 3 decimals + my $self = shift; + return sprintf("%.3f", + int(($self->endtime() - $self->starttime())*1000)/1000); +} + +sub endtime($) { + # Returns: + # UNIX-timestamp this job ended + # 0 if not ended yet + my $self = shift; + return ($self->{'endtime'} || 0); +} + +sub set_endtime($$) { + my $self = shift; + my $endtime = shift; + $self->{'endtime'} = $endtime; + $opt::sqlworker and + $Global::sql->update("SET JobRuntime = ? WHERE Seq = ".$self->seq(), + $self->runtime()); +} + +sub is_timedout($) { + # Is the job timedout? + # Input: + # $delta_time = time that the job may run + # Returns: + # True or false + my $self = shift; + my $delta_time = shift; + return time > $self->{'starttime'} + $delta_time; +} + +sub kill($) { + my $self = shift; + $self->set_exitstatus(-1); + ::kill_sleep_seq($self->pid()); +} + +sub killreason($) { + my $self = shift; + return $self->{'killreason'}; +} + +sub set_killreason($) { + my $self = shift; + $self->{'killreason'} = shift; +} + +sub suspend($) { + my $self = shift; + my @pgrps = map { -$_ } $self->pid(); + kill "STOP", @pgrps; + $self->set_suspended(1); +} + +sub set_suspended($$) { + my $self = shift; + $self->{'suspended'} = shift; +} + +sub suspended($) { + my $self = shift; + return $self->{'suspended'}; +} + +sub resume($) { + my $self = shift; + my @pgrps = map { -$_ } $self->pid(); + kill "CONT", @pgrps; + $self->set_suspended(0); +} + +sub failed($) { + # return number of times failed for this $sshlogin + # Input: + # $sshlogin + # Returns: + # Number of times failed for $sshlogin + my $self = shift; + my $sshlogin = shift; + return $self->{'failed'}{$sshlogin}; +} + +sub failed_here($) { + # return number of times failed for the current $sshlogin + # Returns: + # Number of times failed for this sshlogin + my $self = shift; + return $self->{'failed'}{$self->sshlogin()}; +} + +sub add_failed($) { + # increase the number of times failed for this $sshlogin + my $self = shift; + my $sshlogin = shift; + $self->{'failed'}{$sshlogin}++; +} + +sub add_failed_here($) { + # increase the number of times failed for the current $sshlogin + my $self = shift; + $self->{'failed'}{$self->sshlogin()}++; +} + +sub reset_failed($) { + # increase the number of times failed for this $sshlogin + my $self = shift; + my $sshlogin = shift; + delete $self->{'failed'}{$sshlogin}; +} + +sub reset_failed_here($) { + # increase the number of times failed for this $sshlogin + my $self = shift; + delete $self->{'failed'}{$self->sshlogin()}; +} + +sub min_failed($) { + # Returns: + # the number of sshlogins this command has failed on + # the minimal number of times this command has failed + my $self = shift; + my $min_failures = + ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}}); + my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}}; + return ($number_of_sshlogins_failed_on,$min_failures); +} + +sub total_failed($) { + # Returns: + # $total_failures = the number of times this command has failed + my $self = shift; + my $total_failures = 0; + for (values %{$self->{'failed'}}) { + $total_failures += $_; + } + return $total_failures; +} + +{ + my $script; + + sub postpone_exit_and_cleanup { + # Command to remove files and dirs (given as args) without + # affecting the exit value in $?/$status. + if(not $script) { + $script = "perl -e '". + ::spacefree(0,q{ + $bash=shift; + $csh=shift; + for(@ARGV){ + unlink; + rmdir; + } + if($bash=~s/(\d+)h/$1/) { + exit $bash; + } + exit $csh; + }). + # `echo \$?h` is needed to make fish not complain + "' ".'"`echo \\\\\\\\\$?h`" "$status" '; + } + return $script + } +} + +{ + my $script; + + sub fifo_wrap() { + # Script to create a fifo, run a command on the fifo + # while copying STDIN to the fifo, and finally + # remove the fifo and return the exit code of the command. + if(not $script) { + # {} == $PARALLEL_TMP for --fifo + # To make it csh compatible a wrapper needs to: + # * mkfifo + # * spawn $command & + # * cat > fifo + # * waitpid to get the exit code from $command + # * be less than 1000 chars long + + # The optimal block size differs + # It has been measured on: + # AMD 6376: 4095 + # ppar -a big --pipepart --block -1 --test $1 --fifo 'cat {} >/dev/null'; + $script = "perl -e '". + (::spacefree + (0, q{ + ($s,$c,$f) = @ARGV; + # mkfifo $PARALLEL_TMP + system "mkfifo", $f; + # spawn $shell -c $command & + $pid = fork || exec $s, "-c", $c; + open($o,">",$f) || die $!; + # cat > $PARALLEL_TMP + while(sysread(STDIN,$buf,4095)){ + syswrite $o, $buf; + } + close $o; + # waitpid to get the exit code from $command + waitpid $pid,0; + # Cleanup + unlink $f; + exit $?/256; + }))."'"; + } + return $script; + } +} + +sub wrapped($) { + # Wrap command with: + # * --shellquote + # * --nice + # * --cat + # * --fifo + # * --sshlogin + # * --pipepart (@Global::cat_prepends) + # * --tee (@Global::cat_prepends) + # * --pipe + # * --tmux + # The ordering of the wrapping is important: + # * --nice/--cat/--fifo should be done on the remote machine + # * --pipepart/--pipe should be done on the local machine inside --tmux + # Uses: + # @opt::shellquote + # $opt::nice + # $Global::shell + # $opt::cat + # $opt::fifo + # @Global::cat_prepends + # $opt::pipe + # $opt::tmux + # Returns: + # $self->{'wrapped'} = the command wrapped with the above + my $self = shift; + if(not defined $self->{'wrapped'}) { + my $command = $self->replaced(); + # Bug in Bash and Ksh when running multiline aliases + # This will force them to run correctly, but will fail in + # tcsh so we do not do it. + # $command .= "\n\n"; + if(@opt::shellquote) { + # Quote one time for each --shellquote + my $c = $command; + for(@opt::shellquote) { + $c = ::Q($c); + } + # Prepend "echo" (it is written in perl because + # quoting '-e' causes problem in some versions and + # csh's version does something wrong) + $command = q(perl -e '$,=" "; print "@ARGV\n";' -- ) . ::Q($c); + } + if($Global::parallel_env) { + # If $PARALLEL_ENV set, put that in front of the command + # Used for env_parallel.* + if($Global::shell =~ /zsh/) { + # The extra 'eval' will make aliases work, too + $command = $Global::parallel_env."\n". + "eval ".::Q($command); + } else { + $command = $Global::parallel_env."\n".$command; + } + } + if($opt::cat) { + # In '--cat' and '--fifo' {} == $PARALLEL_TMP. + # This is to make it possible to compute $PARALLEL_TMP on + # the fly when running remotely. + # $ENV{PARALLEL_TMP} is set in the remote wrapper before + # the command is run. + # + # Prepend 'cat > $PARALLEL_TMP;' + # Append 'unlink $PARALLEL_TMP without affecting $?' + $command = + 'cat > "$PARALLEL_TMP";'. + $command.";". postpone_exit_and_cleanup(). + '"$PARALLEL_TMP"'; + } elsif($opt::fifo) { + # Prepend fifo-wrapper. In essence: + # mkfifo {} + # ( $command ) & + # # $command must read {}, otherwise this 'cat' will block + # cat > {}; + # wait; rm {} + # without affecting $? + $command = fifo_wrap(). " ". + $Global::shell. " ". ::Q($command). ' "$PARALLEL_TMP"'. ';'; + } + # Wrap with ssh + tranferring of files + $command = $self->sshlogin_wrap($command); + if(@Global::cat_prepends) { + # --pipepart: prepend: + # < /tmp/foo perl -e 'while(@ARGV) { + # sysseek(STDIN,shift,0) || die; $left = shift; + # while($read = sysread(STDIN,$buf, ($left > 60800 ? 60800 : $left))){ + # $left -= $read; syswrite(STDOUT,$buf); + # } + # }' 0 0 0 11 | + # + # --pipepart --tee: prepend: + # < dash-a-file + # + # --pipe --tee: wrap: + # (rm fifo; ... ) < fifo + # + # --pipe --shard X: + # (rm fifo; ... ) < fifo + $command = (shift @Global::cat_prepends). "($command)". + (shift @Global::cat_appends); + } elsif($opt::pipe and not $opt::roundrobin) { + # Wrap with EOF-detector to avoid starting $command if EOF. + $command = empty_input_wrapper($command); + } + if($opt::tmux) { + # Wrap command with 'tmux' + $command = $self->tmux_wrap($command); + } + if($Global::cshell + and + length $command > 499) { + # csh does not like words longer than 1000 (499 quoted) + # $command = "perl -e '".base64_zip_eval()."' ". + # join" ",string_zip_base64( + # 'exec "'.::perl_quote_scalar($command).'"'); + $command = base64_wrap("exec \"$Global::shell\",'-c',\"". + ::perl_quote_scalar($command).'"'); + } + $self->{'wrapped'} = $command; + } + return $self->{'wrapped'}; +} + +sub set_sshlogin($$) { + my $self = shift; + my $sshlogin = shift; + $self->{'sshlogin'} = $sshlogin; + delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong + delete $self->{'wrapped'}; + + if($opt::sqlworker) { + # Identify worker as --sqlworker often runs on different machines + # If local: Use hostname + my $host = $sshlogin->local() ? ::hostname() : $sshlogin->host(); + $Global::sql->update("SET Host = ? WHERE Seq = ".$self->seq(), $host); + } +} + +sub sshlogin($) { + my $self = shift; + return $self->{'sshlogin'}; +} + +sub string_base64($) { + # Base64 encode strings into 1000 byte blocks. + # 1000 bytes is the largest word size csh supports + # Input: + # @strings = to be encoded + # Returns: + # @base64 = 1000 byte block + $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;"; + my @base64 = unpack("(A1000)*",encode_base64((join"",@_),"")); + return @base64; +} + +sub string_zip_base64($) { + # Pipe string through 'bzip2 -9' and base64 encode it into 1000 + # byte blocks. + # 1000 bytes is the largest word size csh supports + # Zipping will make exporting big environments work, too + # Input: + # @strings = to be encoded + # Returns: + # @base64 = 1000 byte block + my($zipin_fh, $zipout_fh,@base64); + ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9"); + if(fork) { + close $zipin_fh; + $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;"; + # Split base64 encoded into 1000 byte blocks + @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),"")); + close $zipout_fh; + } else { + close $zipout_fh; + print $zipin_fh @_; + close $zipin_fh; + exit; + } + ::debug("base64","Orig:@_\nAs bzip2 base64:@base64\n"); + return @base64; +} + +sub base64_zip_eval() { + # Script that: + # * reads base64 strings from @ARGV + # * decodes them + # * pipes through 'bzip2 -dc' + # * evals the result + # Reverse of string_zip_base64 + eval + # Will be wrapped in ' so single quote is forbidden + # Returns: + # $script = 1-liner for perl -e + my $script = ::spacefree(0,q{ + @GNU_Parallel = split /_/, "use_IPC::Open3;_use_MIME::Base64"; + eval"@GNU_Parallel"; + $chld = $SIG{CHLD}; + $SIG{CHLD} = "IGNORE"; + # Search for bzip2. Not found => use default path + my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2"; + # $in = stdin on $zip, $out = stdout from $zip + # Forget my() to save chars for csh + # my($in, $out,$eval); + open3($in,$out,">&STDERR",$zip,"-dc"); + if(my $perlpid = fork) { + close $in; + $eval = join "", <$out>; + close $out; + } else { + close $out; + # Pipe decoded base64 into 'bzip2 -dc' + print $in (decode_base64(join"",@ARGV)); + close $in; + exit; + } + wait; + $SIG{CHLD} = $chld; + eval $eval; + }); + ::debug("base64",$script,"\n"); + return $script; +} + +sub base64_wrap($) { + # base64 encode Perl code + # Split it into chunks of < 1000 bytes + # Prepend it with a decoder that eval's it + # Input: + # $eval_string = Perl code to run + # Returns: + # $shell_command = shell command that runs $eval_string + my $eval_string = shift; + return + "perl -e ". + ::Q(base64_zip_eval())." ". + join" ",::shell_quote(string_zip_base64($eval_string)); +} + +sub base64_eval($) { + # Script that: + # * reads base64 strings from @ARGV + # * decodes them + # * evals the result + # Reverse of string_base64 + eval + # Will be wrapped in ' so single quote is forbidden. + # Spaces are stripped so spaces cannot be significant. + # The funny 'use IPC::Open3'-syntax is to avoid spaces and + # to make it clear that this is a GNU Parallel command + # when looking at the process table. + # Returns: + # $script = 1-liner for perl -e + my $script = ::spacefree(0,q{ + @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64"); + eval "@GNU_Parallel"; + my $eval = decode_base64(join"",@ARGV); + eval $eval; + }); + ::debug("base64",$script,"\n"); + return $script; +} + +sub sshlogin_wrap($) { + # Wrap the command with the commands needed to run remotely + # Input: + # $command = command to run + # Returns: + # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands + sub monitor_parent_sshd_script { + # This script is to solve the problem of + # * not mixing STDERR and STDOUT + # * terminating with ctrl-c + # If its parent is ssh: all good + # If its parent is init(1): ssh died, so kill children + my $monitor_parent_sshd_script; + + if(not $monitor_parent_sshd_script) { + $monitor_parent_sshd_script = + # This will be packed in ', so only use " + ::spacefree + (0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'. + '$tmpdir = $ENV{"TMPDIR"} || "'. + ::perl_quote_scalar($ENV{'PARALLEL_REMOTE_TMPDIR'}).'";'. + '$nice = '.$opt::nice.';'. + '$termseq = "'.$opt::termseq.'";'. + # } + q{ + # Check that $tmpdir is writable + -w $tmpdir || + die("$tmpdir\040is\040not\040writable.". + "\040Set\040PARALLEL_REMOTE_TMPDIR"); + # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR + do { + $ENV{PARALLEL_TMP} = $tmpdir."/par". + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $ENV{PARALLEL_TMP}); + # Set $script to a non-existent file name in $TMPDIR + do { + $script = $tmpdir."/par-job-$ENV{PARALLEL_SEQ}_". + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $script); + # Create a script from the hex code + # that removes itself and runs the commands + open($fh,">",$script) || die; + # \040 = space - but we remove spaces in the script + # ' needed due to rc-shell + print($fh("rm\040\'$script\'\n",$bashfunc.$cmd)); + close $fh; + my $parent = getppid; + my $done = 0; + $SIG{CHLD} = sub { $done = 1; }; + $pid = fork; + unless($pid) { + # Make own process group to be able to kill HUP it later + eval { setpgrp }; + # Set nice value + eval { setpriority(0,0,$nice) }; + # Run the script + exec($shell,$script); + die("exec\040failed: $!"); + } + while((not $done) and (getppid == $parent)) { + # Parent pid is not changed, so sshd is alive + # Exponential sleep up to 1 sec + $s = $s < 1 ? 0.001 + $s * 1.03 : $s; + select(undef, undef, undef, $s); + } + if(not $done) { + # sshd is dead: User pressed Ctrl-C + # Kill as per --termseq + my @term_seq = split/,/,$termseq; + if(not @term_seq) { + @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25); + } + while(@term_seq && kill(0,-$pid)) { + kill(shift @term_seq, -$pid); + select(undef, undef, undef, (shift @term_seq)/1000); + } + } + wait; + exit ($?&127 ? 128+($?&127) : 1+$?>>8) + }); + } + return $monitor_parent_sshd_script; + } + + sub vars_to_export { + # Uses: + # @opt::env + my @vars = ("parallel_bash_environment"); + for my $varstring (@opt::env) { + # Split up --env VAR1,VAR2 + push @vars, split /,/, $varstring; + } + for (@vars) { + if(-r $_ and not -d) { + # Read as environment definition bug #44041 + # TODO parse this + $Global::envdef = ::slurp_or_exit($_); + } + } + if(grep { /^_$/ } @vars) { + local $/ = "\n"; + # --env _ + # Include all vars that are not in a clean environment + if(open(my $vars_fh, "<", $Global::config_dir . "/ignored_vars")) { + my @ignore = <$vars_fh>; + chomp @ignore; + my %ignore; + @ignore{@ignore} = @ignore; + close $vars_fh; + push @vars, grep { not defined $ignore{$_} } keys %ENV; + @vars = grep { not /^_$/ } @vars; + } else { + ::error("Run '$Global::progname --record-env' ". + "in a clean environment first."); + ::wait_and_exit(255); + } + } + # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2) + # So --env myfunc should look for BASH_FUNC_myfunc() and BASH_FUNC_myfunc%% + + push(@vars, "PARALLEL_PID", "PARALLEL_SEQ", + "PARALLEL_SSHLOGIN", "PARALLEL_SSHHOST", + "PARALLEL_HOSTGROUPS", "PARALLEL_ARGHOSTGROUPS", + "PARALLEL_JOBSLOT", $opt::process_slot_var, + map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars); + # Keep only defined variables + return grep { defined($ENV{$_}) } @vars; + } + + sub env_as_eval { + # Returns: + # $eval = '$ENV{"..."}=...; ...' + my @vars = vars_to_export(); + my $csh_friendly = not grep { /\n/ } @ENV{@vars}; + my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars; + my @non_functions = (grep { !/PARALLEL_ENV/ } + grep { substr($ENV{$_},0,4) ne "() {" } @vars); + + # eval of @envset will set %ENV + my $envset = join"", map { + '$ENV{"'.::perl_quote_scalar($_).'"}="'. + ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions; + + # running @bashfunc on the command line, will set the functions + my @bashfunc = map { + my $v=$_; + s/BASH_FUNC_(.*)(\(\)|%%)/$1/; + "$_$ENV{$v};\nexport -f $_ 2> /dev/null;\n" } @bash_functions; + # eval $bashfuncset will set $bashfunc + my $bashfuncset; + if(@bashfunc) { + # Functions are not supported for all shells + if($Global::shell !~ m:(^|/)(ash|bash|rbash|zsh|rzsh|dash|ksh):) { + ::warning("Shell functions may not be supported in $Global::shell."); + } + $bashfuncset = + '@bash_functions=qw('."@bash_functions".");". + ::spacefree(1,'$shell="'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.q{ + if($shell=~/csh/) { + print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n"; + exec "false"; + } + }). + "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";'; + } else { + $bashfuncset = '$bashfunc = "";' + } + if($ENV{'parallel_bash_environment'}) { + $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";'; + } + ::debug("base64",$envset,$bashfuncset,"\n"); + return $csh_friendly,$envset,$bashfuncset; + } + + my $self = shift; + my $command = shift; + # TODO test that *sh -c 'parallel --env' use *sh + if(not defined $self->{'sshlogin_wrap'}{$command}) { + my $sshlogin = $self->sshlogin(); + $ENV{'PARALLEL_SEQ'} = $self->seq(); + $ENV{$opt::process_slot_var} = -1 + + ($ENV{'PARALLEL_JOBSLOT'} = $self->slot()); + $ENV{'PARALLEL_SSHLOGIN'} = $sshlogin->string(); + $ENV{'PARALLEL_SSHHOST'} = $sshlogin->host(); + if ($opt::hostgroups) { + $ENV{'PARALLEL_HOSTGROUPS'} = join '+', $sshlogin->hostgroups(); + $ENV{'PARALLEL_ARGHOSTGROUPS'} = join '+', $self->hostgroups(); + } + $ENV{'PARALLEL_PID'} = $$; + if($sshlogin->local()) { + if($opt::workdir) { + # Create workdir if needed. Then cd to it. + my $wd = $self->workdir(); + if($opt::workdir eq "." or $opt::workdir eq "...") { + # If $wd does not start with '/': Prepend $HOME + $wd =~ s:^([^/]):$ENV{'HOME'}/$1:; + } + ::mkdir_or_die($wd); + my $post = ""; + if($opt::workdir eq "...") { + $post = ";".exitstatuswrapper("rm -rf ".::Q($wd).";"); + + } + $command = "cd ".::Q($wd)." || exit 255; " . + $command . $post;; + } + if(@opt::env) { + # Prepend with environment setter, which sets functions in zsh + my ($csh_friendly,$envset,$bashfuncset) = env_as_eval(); + my $perl_code = $envset.$bashfuncset. + '@ARGV="'.::perl_quote_scalar($command).'";'. + "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;"; + if(length $perl_code > 999 + or + not $csh_friendly + or + $command =~ /\n/) { + # csh does not deal well with > 1000 chars in one word + # csh does not deal well with $ENV with \n + $self->{'sshlogin_wrap'}{$command} = base64_wrap($perl_code); + } else { + $self->{'sshlogin_wrap'}{$command} = "perl -e ".::Q($perl_code); + } + } else { + $self->{'sshlogin_wrap'}{$command} = $command; + } + } else { + my $pwd = ""; + if($opt::workdir) { + # Create remote workdir if needed. Then cd to it. + my $wd = ::pQ($self->workdir()); + $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}. + qq{print(STDERR "parallel: Cannot chdir to $wd\\n") &&}. + qq{exit 255;}; + } + my ($csh_friendly,$envset,$bashfuncset) = env_as_eval(); + my $cmd = $command; + # q// does not quote \, so we must do that + $cmd =~ s/\\/\\\\/g; + + my $remote_command = $sshlogin->hexwrap + ($pwd.$envset.$bashfuncset.'$cmd='."q\0".$cmd."\0;". + monitor_parent_sshd_script()); + my ($pre,$post,$cleanup)=("","",""); + # --transfer + $pre .= $self->sshtransfer(); + # --return + $post .= $self->sshreturn(); + # --cleanup + $post .= $self->sshcleanup(); + if($post) { + # We need to save the exit status of the job + $post = exitstatuswrapper($post); + } + $self->{'sshlogin_wrap'}{$command} = + ($pre + . $sshlogin->wrap($remote_command) + . ";" + . $post); + } + } + return $self->{'sshlogin_wrap'}{$command}; +} + +sub fill_templates($) { + # Replace replacement strings in template(s) + # Returns: + # @templates - File names of replaced templates + my $self = shift; + + if(%opt::template) { + my @template_name = + map { $self->{'commandline'}->replace_placeholders([$_],0,0) } + @{$self->{'commandline'}{'template_names'}}; + ::debug("tmpl","Names: @template_name\n"); + for(my $i = 0; $i <= $#template_name; $i++) { + ::write_or_exit + ($template_name[$i], + $self->{'commandline'}-> + replace_placeholders([$self->{'commandline'} + {'template_contents'}[$i]],0,0)); + } + if($opt::cleanup) { + $self->add_rm(@template_name); + } + } +} + +sub filter($) { + # Replace replacement strings in filter(s) and evaluate them + # Returns: + # $run - 1=yes, undef=no + my $self = shift; + my $run = 1; + if(@opt::filter) { + for my $eval ($self->{'commandline'}-> + replace_placeholders(\@opt::filter,0,0)) { + $run &&= eval $eval; + } + $self->{'commandline'}{'skip'} ||= not $run; + } + return $run; +} + +sub transfer($) { + # Files to transfer + # Non-quoted and with {...} substituted + # Returns: + # @transfer - File names of files to transfer + my $self = shift; + + my $transfersize = 0; + my @transfer = $self->{'commandline'}-> + replace_placeholders($self->{'commandline'}{'transfer_files'},0,0); + for(@transfer) { + # filesize + if(-e $_) { + $transfersize += (stat($_))[7]; + } + } + $self->add_transfersize($transfersize); + return @transfer; +} + +sub transfersize($) { + my $self = shift; + return $self->{'transfersize'}; +} + +sub add_transfersize($) { + my $self = shift; + my $transfersize = shift; + $self->{'transfersize'} += $transfersize; + $opt::sqlworker and + $Global::sql->update("SET Send = ? WHERE Seq = ".$self->seq(), + $self->{'transfersize'}); +} + +sub sshtransfer($) { + # Returns for each transfer file: + # rsync $file remote:$workdir + my $self = shift; + my @pre; + my $sshlogin = $self->sshlogin(); + my $workdir = $self->workdir(); + for my $file ($self->transfer()) { + push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";"; + } + return join("",@pre); +} + +sub return($) { + # Files to return + # Non-quoted and with {...} substituted + # Returns: + # @non_quoted_filenames + my $self = shift; + return $self->{'commandline'}-> + replace_placeholders($self->{'commandline'}{'return_files'},0,0); +} + +sub returnsize($) { + # This is called after the job has finished + # Returns: + # $number_of_bytes transferred in return + my $self = shift; + for my $file ($self->return()) { + if(-e $file) { + $self->{'returnsize'} += (stat($file))[7]; + } + } + return $self->{'returnsize'}; +} + +sub add_returnsize($) { + my $self = shift; + my $returnsize = shift; + $self->{'returnsize'} += $returnsize; + $opt::sqlworker and + $Global::sql->update("SET Receive = ? WHERE Seq = ".$self->seq(), + $self->{'returnsize'}); +} + +sub sshreturn($) { + # Returns for each return-file: + # rsync remote:$workdir/$file . + my $self = shift; + my $sshlogin = $self->sshlogin(); + my $pre = ""; + for my $file ($self->return()) { + $file =~ s:^\./::g; # Remove ./ if any + my $relpath = ($file !~ m:^/:) || + ($file =~ m:/\./:); # Is the path relative or /./? + my $cd = ""; + my $wd = ""; + if($relpath) { + # rsync -avR /foo/./bar/baz.c remote:/tmp/ + # == (on old systems) + # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/ + $wd = ::shell_quote_file($self->workdir()."/"); + } + # Only load File::Basename if actually needed + $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; + # dir/./file means relative to dir, so remove dir on remote + $file =~ m:(.*)/\./:; + my $basedir = $1 ? ::shell_quote_file($1."/") : ""; + my $nobasedir = $file; + $nobasedir =~ s:.*/\./::; + $cd = ::shell_quote_file(::dirname($nobasedir)); + my $rsync_cd = '--rsync-path='.::Q("cd $wd$cd; rsync"); + my $basename = ::Q(::shell_quote_file(::basename($file))); + # --return + # mkdir -p /home/tange/dir/subdir/; + # rsync (--protocol 30) -rlDzR + # --rsync-path="cd /home/tange/dir/subdir/; rsync" + # server:file.gz /home/tange/dir/subdir/ + $pre .= "mkdir -p $basedir$cd" . " && " . + $sshlogin->rsync(). " $rsync_cd -- ".$sshlogin->host().':'. + $basename . " ".$basedir.$cd.";"; + } + return $pre; +} + +sub sshcleanup($) { + # Return the sshcommand needed to remove the file + # Returns: + # ssh command needed to remove files from sshlogin + my $self = shift; + my $sshlogin = $self->sshlogin(); + my $workdir = $self->workdir(); + my $cleancmd = ""; + + for my $file ($self->remote_cleanup()) { + my @subworkdirs = parentdirs_of($file); + $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";"; + } + if(defined $opt::workdir and $opt::workdir eq "...") { + $cleancmd .= $sshlogin->wrap("rm -rf " . ::Q($workdir).';'); + } + return $cleancmd; +} + +sub remote_cleanup($) { + # Returns: + # Files to remove at cleanup + my $self = shift; + if($opt::cleanup) { + my @transfer = $self->transfer(); + my @return = $self->return(); + return (@transfer,@return); + } else { + return (); + } +} + +sub exitstatuswrapper(@) { + # Input: + # @shellcode = shell code to execute + # Returns: + # shell script that returns current status after executing @shellcode + if($Global::cshell) { + return ('set _EXIT_status=$status; ' . + join(" ",@_). + 'exit $_EXIT_status;'); + } elsif($Global::fish) { + return ('export _EXIT_status=$status; ' . + join(" ",@_). + 'exit $_EXIT_status;'); + } else { + return ('_EXIT_status=$?; ' . + join(" ",@_). + 'exit $_EXIT_status;'); + } +} + +sub workdir($) { + # Returns: + # the workdir on a remote machine + my $self = shift; + if(not defined $self->{'workdir'}) { + my $workdir; + if(defined $opt::workdir) { + if($opt::workdir eq ".") { + # . means current dir + my $home = $ENV{'HOME'}; + eval 'use Cwd'; + my $cwd = cwd(); + $workdir = $cwd; + if($home) { + # If homedir exists: remove the homedir from + # workdir if cwd starts with homedir + # E.g. /home/foo/my/dir => my/dir + # E.g. /tmp/my/dir => /tmp/my/dir + my ($home_dev, $home_ino) = (stat($home))[0,1]; + my $parent = ""; + my @dir_parts = split(m:/:,$cwd); + my $part; + while(defined ($part = shift @dir_parts)) { + $part eq "" and next; + $parent .= "/".$part; + my ($parent_dev, $parent_ino) = (stat($parent))[0,1]; + if($parent_dev == $home_dev and $parent_ino == $home_ino) { + # dev and ino is the same: We found the homedir. + $workdir = join("/",@dir_parts); + last; + } + } + } + if($workdir eq "") { + $workdir = "."; + } + } elsif($opt::workdir eq "...") { + $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$ + . "-" . $self->seq(); + } else { + $workdir = $self->{'commandline'}-> + replace_placeholders([$opt::workdir],0,0); + #$workdir = $opt::workdir; + # Rsync treats /./ special. We dont want that + $workdir =~ s:/\./:/:g; # Remove /./ + $workdir =~ s:(.)/+$:$1:; # Remove ending / if any + $workdir =~ s:^\./::g; # Remove starting ./ if any + } + } else { + $workdir = "."; + } + $self->{'workdir'} = $workdir; + } + return $self->{'workdir'}; +} + +sub parentdirs_of($) { + # Return: + # all parentdirs except . of this dir or file - sorted desc by length + my $d = shift; + my @parents = (); + while($d =~ s:/[^/]+$::) { + if($d ne ".") { + push @parents, $d; + } + } + return @parents; +} + +sub start($) { + # Setup STDOUT and STDERR for a job and start it. + # Returns: + # job-object or undef if job not to run + + sub open3_setpgrp_internal { + # Run open3+setpgrp followed by the command + # Input: + # $stdin_fh = Filehandle to use as STDIN + # $stdout_fh = Filehandle to use as STDOUT + # $stderr_fh = Filehandle to use as STDERR + # $command = Command to run + # Returns: + # $pid = Process group of job started + my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_; + my $pid; + local (*OUT,*ERR); + open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); + open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); + # The eval is needed to catch exception from open3 + eval { + if(not $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", "-")) { + # Each child gets its own process group to make it safe to killall + eval{ setpgrp(0,0) }; + eval{ setpriority(0,0,$opt::nice) }; + exec($Global::shell,"-c",$command) + || ::die_bug("open3-$stdin_fh ".substr($command,0,200)); + } + }; + return $pid; + } + + sub open3_setpgrp_external { + # Run open3 on $command wrapped with a perl script doing setpgrp + # Works on systems that do not support open3(,,,"-") + # Input: + # $stdin_fh = Filehandle to use as STDIN + # $stdout_fh = Filehandle to use as STDOUT + # $stderr_fh = Filehandle to use as STDERR + # $command = Command to run + # Returns: + # $pid = Process group of job started + my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_; + local (*OUT,*ERR); + open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); + open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); + + my $pid; + my @setpgrp_wrap = + ('perl','-e', + "eval\{setpgrp\}\;eval\{setpriority\(0,0,$opt::nice\)\}\;". + "exec '$Global::shell', '-c', \@ARGV"); + # The eval is needed to catch exception from open3 + eval { + $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", @setpgrp_wrap, $command) + || ::die_bug("open3-$stdin_fh"); + 1; + }; + return $pid; + } + + sub redefine_open3_setpgrp { + my $setgprp_cache = shift; + # Select and run open3_setpgrp_internal/open3_setpgrp_external + no warnings 'redefine'; + my ($outfh,$name) = ::tmpfile(SUFFIX => ".tst"); + # Test to see if open3(x,x,x,"-") is fully supported + # Can an exported bash function be called via open3? + my $script = 'if($pid=::open3($i,$o,$e,"-")) { wait; } '. + 'else { exec("bash","-c","testfun && true"); }'; + my $bash = + ::shell_quote_scalar_default( + "testfun() { rm $name; }; export -f testfun; ". + "perl -MIPC::Open3 -e ". + ::Q(::Q($script)) + ); + my $redefine_eval; + # Redirect STDERR temporarily, + # so errors on MacOS X are ignored. + open my $saveerr, ">&STDERR"; + open STDERR, '>', "/dev/null"; + # Run the test + ::debug("init",qq{bash -c $bash 2>/dev/null}); + qx{ bash -c $bash 2>/dev/null }; + open STDERR, ">&", $saveerr; + + if(-e $name) { + # Does not support open3(x,x,x,"-") + # or does not have bash: + # Use (slow) external version + unlink($name); + $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_external'; + ::debug("init","open3_setpgrp_external chosen\n"); + } else { + # Supports open3(x,x,x,"-") + # This is 0.5 ms faster to run + $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_internal'; + ::debug("init","open3_setpgrp_internal chosen\n"); + } + if(open(my $fh, ">", $setgprp_cache)) { + print $fh $redefine_eval; + close $fh; + } else { + ::debug("init","Cannot write to $setgprp_cache"); + } + eval $redefine_eval; + } + + sub open3_setpgrp { + my $setgprp_cache = $Global::cache_dir . "/tmp/sshlogin/" . + ::hostname() . "/setpgrp_func"; + sub read_cache() { + -e $setgprp_cache || return 0; + local $/ = undef; + open(my $fh, "<", $setgprp_cache) || return 0; + eval <$fh> || return 0; + close $fh; + return 1; + } + if(not read_cache()) { + redefine_open3_setpgrp($setgprp_cache); + } + # The sub is now redefined. Call it + return open3_setpgrp(@_); + } + + my $job = shift; + # Get the shell command to be executed (possibly with ssh infront). + my $command = $job->wrapped(); + my $pid; + + if($Global::interactive or $Global::stderr_verbose) { + $job->interactive_start(); + } + $job->openoutputfiles(); + # Must be run after $job->interactive_start(): + # $job->interactive_start() may call $job->skip() + if($job->{'commandline'}{'skip'} + or + not $job->filter()) { + # $job->skip() was called or job filtered + $command = "true"; + } else { + $job->print_verbose_dryrun(); + } + my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w")); + if($opt::dryrun or $opt::sqlmaster) { $command = "true"; } + $ENV{'PARALLEL_SEQ'} = $job->seq(); + $ENV{'PARALLEL_PID'} = $$; + $ENV{$opt::process_slot_var} = -1 + + ($ENV{'PARALLEL_JOBSLOT'} = $job->slot()); + $ENV{'PARALLEL_TMP'} = ::tmpname("par"); + $job->add_rm($ENV{'PARALLEL_TMP'}); + $job->fill_templates(); + $ENV{'SSHPASS'} = $job->{'sshlogin'}->{'password'}; + ::debug("run", $Global::total_running, " processes . Starting (", + $job->seq(), "): $command\n"); + if($opt::pipe) { + my ($stdin_fh) = ::gensym(); + $pid = open3_setpgrp($stdin_fh,$stdout_fh,$stderr_fh,$command); + if($opt::roundrobin and not $opt::keeporder) { + # --keep-order will make sure the order will be reproducible + ::set_fh_non_blocking($stdin_fh); + } + $job->set_fh(0,"w",$stdin_fh); + if($opt::tee or $opt::shard or $opt::bin) { $job->set_virgin(0); } + } elsif(($opt::tty or $opt::open_tty) and -c "/dev/tty" and + open(my $devtty_fh, "<", "/dev/tty")) { + # Give /dev/tty to the command if no one else is using it + # The eval is needed to catch exception from open3 + local (*IN,*OUT,*ERR); + open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); + open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); + *IN = $devtty_fh; + # The eval is needed to catch exception from open3 + my @wrap = ('perl','-e', + "eval\{setpriority\(0,0,$opt::nice\)\}\;". + "exec '$Global::shell', '-c', \@ARGV"); + eval { + $pid = ::open3("<&IN", ">&OUT", ">&ERR", @wrap, $command) + || ::die_bug("open3-/dev/tty"); + 1; + }; + close $devtty_fh; + $job->set_virgin(0); + } elsif($Global::semaphore) { + # Allow sem to read from stdin + $pid = open3_setpgrp("<&STDIN",$stdout_fh,$stderr_fh,$command); + $job->set_virgin(0); + } else { + $pid = open3_setpgrp(::gensym(),$stdout_fh,$stderr_fh,$command); + $job->set_virgin(0); + } + if($pid) { + # A job was started + $Global::total_running++; + $Global::total_started++; + $job->set_pid($pid); + $job->set_starttime(); + $Global::running{$job->pid()} = $job; + if($opt::timeout) { + $Global::timeoutq->insert($job); + } + $Global::newest_job = $job; + $Global::newest_starttime = ::now(); + return $job; + } else { + # No more processes + ::debug("run", "Cannot spawn more jobs.\n"); + return undef; + } +} + +sub interactive_start($) { + my $self = shift; + my $command = $self->wrapped(); + if($Global::interactive) { + my $answer; + ::status_no_nl("$command ?..."); + do{ + my $tty_fh = ::open_or_exit("<","/dev/tty"); + $answer = <$tty_fh>; + close $tty_fh; + # Sometime we get an empty string (not even \n) + # Do not know why, so let us just ignore it and try again + } while(length $answer < 1); + if (not ($answer =~ /^\s*y/i)) { + $self->{'commandline'}->skip(); + } + } else { + print $Global::original_stderr "$command\n"; + } +} + +{ + my $tmuxsocket; + my $qsocket; + + sub tmux_wrap($) { + # Wrap command with tmux for session pPID + # Input: + # $actual_command = the actual command being run (incl ssh wrap) + my $self = shift; + my $actual_command = shift; + # Temporary file name. Used for fifo to communicate exit val + my $tmpfifo = ::tmpname("tmx"); + $self->add_rm($tmpfifo); + if(length($tmpfifo) >=100) { + ::error("tmux does not support sockets with path > 100."); + ::wait_and_exit(255); + } + if($opt::tmuxpane) { + # Move the command into a pane in window 0 + $actual_command = $ENV{'PARALLEL_TMUX'}.' joinp -t :0 ; '. + $ENV{'PARALLEL_TMUX'}.' select-layout -t :0 tiled ; '. + $actual_command; + } + my $visual_command = $self->replaced(); + my $title = $visual_command; + if($visual_command =~ /\0/) { + ::error("Command line contains NUL. tmux is confused by NUL."); + ::wait_and_exit(255); + } + # ; causes problems + # ascii 194-245 annoys tmux + $title =~ tr/[\011-\016;\302-\365]/ /s; + $title = ::Q($title); + + my $l_act = length($actual_command); + my $l_tit = length($title); + my $l_fifo = length($tmpfifo); + # The line to run contains a 118 chars extra code + the title 2x + my $l_tot = 2 * $l_tit + $l_act + $l_fifo; + + my $quoted_space75 = ::Q(" ")x75; + while($l_tit < 1000 and + ( + (890 < $l_tot and $l_tot < 1350) + or + (9250 < $l_tot and $l_tot < 9800) + )) { + # tmux blocks for certain lengths: + # 900 < title + command < 1200 + # 9250 < title + command < 9800 + # but only if title < 1000, so expand the title with 75 spaces + # The measured lengths are: + # 996 < (title + whole command) < 1127 + # 9331 < (title + whole command) < 9636 + $title .= $quoted_space75; + $l_tit = length($title); + $l_tot = 2 * $l_tit + $l_act + $l_fifo; + } + + my $tmux; + $ENV{'PARALLEL_TMUX'} ||= "tmux"; + if(not $tmuxsocket) { + $tmuxsocket = ::tmpname("tms"); + $qsocket = ::Q($tmuxsocket); + ::debug("tmux", "Start: $ENV{'PARALLEL_TMUX'} -S $qsocket attach"); + if($opt::fg) { + if(not fork) { + # Run tmux in the foreground + # Wait for the socket to appear + while (not -e $tmuxsocket) { } + `$ENV{'PARALLEL_TMUX'} -S $qsocket attach`; + exit; + } + } + ::status("See output with: $ENV{'PARALLEL_TMUX'} -S $qsocket attach"); + } + $tmux = "sh -c ".::Q( + $ENV{'PARALLEL_TMUX'}. + " -S $qsocket new-session -s p$$ -d \"sleep .2\" >/dev/null 2>&1").";" . + $ENV{'PARALLEL_TMUX'}. + " -S $qsocket new-window -t p$$ -n $title"; + + ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ", + $Limits::Command::line_max_len, " tot ", + $l_tot, "\n"); + return "mkfifo ".::Q($tmpfifo)." && $tmux ". + # Run in tmux + ::Q + ( + "(".$actual_command.');'. + # The triple print is needed - otherwise the testsuite fails + q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ]. + ::Q($tmpfifo)."&". + "echo $title; echo \007Job finished at: `date`;sleep 10" + ). + # Run outside tmux + # Read a / separated line: 0h/2 for csh, 2/0 for bash. + # If csh the first will be 0h, so use the second as exit value. + # Otherwise just use the first value as exit value. + q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; }. + q{/(\d+)h/ and exit($1);exit$c' }.::Q($tmpfifo); + } +} + +sub is_already_in_results($) { + # Do we already have results for this job? + # Returns: + # $job_already_run = bool whether there is output for this or not + my $job = $_[0]; + if($Global::csvsep) { + if($opt::joblog) { + # OK: You can look for job run in joblog + return 0 + } else { + ::warning_once( + "--resume --results .csv/.tsv/.json is not supported yet\n"); + # TODO read and parse the file + return 0 + } + } + my $out = $job->{'commandline'}->results_out(); + ::debug("run", "Test ${out}stdout", -e "${out}stdout", "\n"); + return(-e $out."stdout" or -f $out); +} + +sub is_already_in_joblog($) { + my $job = shift; + return vec($Global::job_already_run,$job->seq(),1); +} + +sub set_job_in_joblog($) { + my $job = shift; + vec($Global::job_already_run,$job->seq(),1) = 1; +} + +sub retry() { + # This command should be retried + my $self = shift; + + $self->set_endtime(undef); + $self->reset_exitstatus(); + $self->set_killreason(undef); + $Global::JobQueue->unget($self); + ::debug("run", "Retry ", $self->seq(), "\n"); + return 1; +} + +sub should_be_retried($) { + # Should this job be retried? + # Returns + # 0 - do not retry + # 1 - job queued for retry + my $self = shift; + if($opt::memfree and $self->killreason() eq "mem") { + # Job was killed due to memfree => retry + return $self->retry(); + } + if (not defined $opt::retries) { return 0; } + if(not $self->exitstatus() and not $self->exitsignal()) { + # Completed with success. If there is a recorded failure: forget it + $self->reset_failed_here(); + return 0; + } else { + # The job failed. Should it be retried? + $self->add_failed_here(); + my $retries = $self->{'commandline'}-> + replace_placeholders([$opt::retries],0,0); + # 0 = Inf + if($retries == 0) { $retries = 2**31; } + # Ignore files already unlinked to avoid memory leak + $self->{'unlink'} = [ grep { -e $_ } @{$self->{'unlink'}} ]; + map { -e $_ or delete $Global::unlink{$_} } keys %Global::unlink; + if($self->total_failed() == $retries) { + # This has been retried enough + return 0; + } else { + # This command should be retried + return $self->retry(); + } + } +} + +{ + my (%print_later,$job_seq_to_print); + + sub print_earlier_jobs($) { + # Print jobs whose output is postponed due to --keep-order + # Returns: N/A + my $job = shift; + $print_later{$job->seq()} = $job; + $job_seq_to_print ||= 1; + my $returnsize = 0; + ::debug("run", "Looking for: $job_seq_to_print ", + "This: ", $job->seq(), "\n"); + for(;vec($Global::job_already_run,$job_seq_to_print,1); + $job_seq_to_print++) {} + while(my $j = $print_later{$job_seq_to_print}) { + $returnsize += $j->print(); + if($j->endtime()) { + # Job finished - look at the next + delete $print_later{$job_seq_to_print}; + $job_seq_to_print++; + next; + } else { + # Job not finished yet - look at it again next round + last; + } + } + return $returnsize; + } +} + +sub print($) { + # Print the output of the jobs + # Returns: N/A + my $self = shift; + + ::debug("print", ">>joboutput ", $self->replaced(), "\n"); + if($opt::dryrun) { + # Nothing was printed to this job: + # cleanup tmp files if --files was set + ::rm($self->fh(1,"name")); + } + if($opt::pipe and $self->virgin() and not $opt::tee) { + # Skip --joblog, --dryrun, --verbose + } else { + if($opt::ungroup) { + # NULL returnsize = 0 returnsize + $self->returnsize() or $self->add_returnsize(0); + if($Global::joblog and defined $self->{'exitstatus'}) { + # Add to joblog when finished + $self->print_joblog(); + # Printing is only relevant for grouped/--line-buffer output. + $opt::ungroup and return; + } + } + # Check for disk full + ::exit_if_disk_full(); + } + + my $returnsize = $self->returnsize(); + my @fdno; + if($opt::latestline) { + @fdno = (1); + } else { + @fdno = (sort { $a <=> $b } keys %Global::fh); + } + for my $fdno (@fdno) { + # Sort by file descriptor numerically: 1,2,3,..,9,10,11 + $fdno == 0 and next; + my $out_fh = $Global::fh{$fdno}; + my $in_fh = $self->fh($fdno,"r"); + if(not $in_fh) { + if(not $Job::file_descriptor_warning_printed{$fdno}++) { + # ::warning("File descriptor $fdno not defined\n"); + } + next; + } + ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n"); + if($Global::linebuffer) { + # Line buffered print out + $self->print_linebuffer($fdno,$in_fh,$out_fh); + } elsif($Global::files) { + $self->print_files($fdno,$in_fh,$out_fh); + } elsif($opt::results) { + $self->print_results($fdno,$in_fh,$out_fh); + } else { + $self->print_normal($fdno,$in_fh,$out_fh); + } + flush $out_fh; + } + ::debug("print", "<{'exitstatus'} + and not ($self->virgin() and $opt::pipe)) { + if($Global::joblog and not $opt::sqlworker) { + # Add to joblog when finished + $self->print_joblog(); + } + if($opt::sqlworker and not $opt::results) { + $Global::sql->output($self); + } + if($Global::csvsep) { + # Add output to CSV when finished + $self->print_csv(); + } + if($Global::jsonout) { + $self->print_json(); + } + } + return $returnsize - $self->returnsize(); +} + +{ + my %jsonmap; + + sub print_json($) { + my $self = shift; + sub jsonquote($) { + my $a = shift; + if(not $jsonmap{"\001"}) { + map { $jsonmap{sprintf("%c",$_)} = + sprintf '\u%04x', $_ } 0..31; + } + $a =~ s/\\/\\\\/g; + $a =~ s/\"/\\"/g; + $a =~ s/([\000-\037])/$jsonmap{$1}/g; + return $a; + } + + my $cmd; + if($Global::verbose <= 1) { + $cmd = jsonquote($self->replaced()); + } else { + # Verbose level > 1: Print the rsync and stuff + $cmd = jsonquote(join " ", @{$self->{'commandline'}}); + } + my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'}; + + # Memory optimization: Overwrite with the joined output + $self->{'output'}{1} = join("", @{$self->{'output'}{1}}); + $self->{'output'}{2} = join("", @{$self->{'output'}{2}}); + # { + # "Seq": 12, + # "Host": "/usr/bin/ssh foo@lo", + # "Starttime": 1608344711.743, + # "JobRuntime": 0.01, + # "Send": 0, + # "Receive": 10, + # "Exitval": 0, + # "Signal": 0, + # "Command": "echo 1", + # "V": [ + # "1" + # ], + # "Stdout": "1\n", + # "Stderr": "" + # } + # + printf($Global::csv_fh + q({ "Seq": %s, "Host": "%s", "Starttime": %s, "JobRuntime": %s, ). + q("Send": %s, "Receive": %s, "Exitval": %s, "Signal": %s, ). + q("Command": "%s", "V": [ %s ], "Stdout": "%s", "Stderr": "%s" }). + "\n", + $self->seq(), + jsonquote($self->sshlogin()->string()), + $self->starttime(), sprintf("%0.3f",$self->runtime()), + $self->transfersize(), $self->returnsize(), + $self->exitstatus(), $self->exitsignal(), $cmd, + (join ",", + map { '"'.jsonquote($_).'"' } @$record_ref[1..$#$record_ref], + ), + jsonquote($self->{'output'}{1}), + jsonquote($self->{'output'}{2}) + ); + } +} + +{ + my $header_printed; + + sub print_csv($) { + my $self = shift; + my $cmd; + if($Global::verbose <= 1) { + $cmd = $self->replaced(); + } else { + # Verbose level > 1: Print the rsync and stuff + $cmd = join " ", @{$self->{'commandline'}}; + } + my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'}; + + if(not $header_printed) { + # Variable headers + # Normal => V1..Vn + # --header : => first value from column + my @V; + if($opt::header) { + my $i = 1; + @V = (map { $Global::input_source_header{$i++} } + @$record_ref[1..$#$record_ref]); + } else { + my $V = "V1"; + @V = (map { $V++ } @$record_ref[1..$#$record_ref]); + } + print $Global::csv_fh + (map { $$_ } + combine_ref("Seq", "Host", "Starttime", "JobRuntime", + "Send", "Receive", "Exitval", "Signal", "Command", + @V, + "Stdout","Stderr" + )),"\n"; + $header_printed++; + } + # Memory optimization: Overwrite with the joined output + $self->{'output'}{1} = join("", @{$self->{'output'}{1}}); + $self->{'output'}{2} = join("", @{$self->{'output'}{2}}); + print $Global::csv_fh + (map { $$_ } + combine_ref + ($self->seq(), + $self->sshlogin()->string(), + $self->starttime(), sprintf("%0.3f",$self->runtime()), + $self->transfersize(), $self->returnsize(), + $self->exitstatus(), $self->exitsignal(), \$cmd, + \@$record_ref[1..$#$record_ref], + \$self->{'output'}{1}, + \$self->{'output'}{2})),"\n"; + } +} + +sub combine_ref($) { + # Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu) + my @part = @_; + my $sep = $Global::csvsep; + my $quot = '"'; + my @out = (); + + my $must_be_quoted; + for my $column (@part) { + # Memory optimization: Content transferred as reference + if(ref $column ne "SCALAR") { + # Convert all columns to scalar references + my $v = $column; + $column = \$v; + } + if(not defined $$column) { + $$column = ''; + next; + } + + $must_be_quoted = 0; + + if($$column =~ s/$quot/$quot$quot/go){ + # Replace " => "" + $must_be_quoted ||=1; + } + if($$column =~ /[\s\Q$sep\E]/o){ + # Put quotes around if the column contains , + $must_be_quoted ||=1; + } + + $Global::use{"bytes"} ||= eval "use bytes; 1;"; + if ($$column =~ /\0/) { + # Contains \0 => put quotes around + $must_be_quoted ||=1; + } + if($must_be_quoted){ + push @out, \$sep, \$quot, $column, \$quot; + } else { + push @out, \$sep, $column; + } + } + # Remove the first $sep: ,val,"val" => val,"val" + shift @out; + return @out; +} + +sub print_files($) { + # Print the name of the file containing stdout on stdout + # Uses: + # $opt::pipe + # $opt::group = Print when job is done + # $opt::linebuffer = Print ASAP + # Returns: N/A + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + + # If the job is dead: close printing fh. Needed for --compress + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + if($opt::compress) { + # Kill the decompressor which will not be needed + CORE::kill "TERM", $self->fh($fdno,"rpid"); + } + close $in_fh; + + if($opt::pipe and $self->virgin()) { + # Nothing was printed to this job: + # cleanup unused tmp files because --files was set + for my $fdno (1,2) { + ::rm($self->fh($fdno,"name")); + ::rm($self->fh($fdno,"unlink")); + } + } elsif($fdno == 1 and $self->fh($fdno,"name")) { + print $out_fh $self->tag(),$self->fh($fdno,"name"), $Global::files_sep; + if($Global::membuffer) { + push @{$self->{'output'}{$fdno}}, + $self->tag(), $self->fh($fdno,"name"); + } + $self->add_returnsize(-s $self->fh($fdno,"name")); + # Mark as printed - do not print again + $self->set_fh($fdno,"name",undef); + } +} + + +# Different print types +# (--ll | --ll --bar | --lb | --group | --parset | --sql-worker) +# (--files | --results (.json|.csv|.tsv) ) +# --color-failed +# --color +# --keep-order +# --tag +# --bar +{ + my ($up,$eol,$currow,$maxrow); + my ($minvisible,%print_later,%notvisible); + my (%binmodeset,%tab); + + sub latestline_init() { + # cursor_up cuu1 = up one line + $up = `sh -c "tput cuu1 /dev/null`; + chomp($up); + $eol = `sh -c "tput el /dev/null`; + chomp($eol); + if($eol eq "") { $eol = "\033[K"; } + $currow = 1; + $maxrow = 1; + $minvisible = 1; + for(0..8) { + $tab{$_} = " "x(8-($_%8)); + } + } + + sub mbtrunc($$) { + # Simple mbtrunc to avoid using Text::WideChar::Util + my $str = shift; + my $len = shift; + if(::mbswidth($str) == length($str)) { + $str = substr($str,0,$len); + } else { + # mb chars (ヌー平行) are wider than 1 char on screen + # We need at most $len chars - they may be wide + $str =~ s/(.{$len}).*/$1/; + my $rlen = int((::mbswidth($str) - $len)/2+0.5); + do { + $str =~ s/.{$rlen}$//; + $rlen = int((::mbswidth($str) - $len)/2+0.5); + } while($rlen >= 1); + } + return $str; + } + + sub print_latest_line($) { + my $self = shift; + my $out_fh = shift; + if(not defined $self->{$out_fh,'latestline'}) { return; } + my $row = $self->row(); + # Is row visible? + if(not ($minvisible <= $row + and + $row < $minvisible + ::terminal_rows() - 1)) { + return; + } + if(not $binmodeset{$out_fh}++) { + # Enable utf8 if possible + eval q{ binmode $out_fh, "encoding(utf8)"; }; + } + my ($color,$reset_color) = $self->color(); + my $termcol = ::terminal_columns(); + my $untabify_tag = ::decode_utf8($self->untabtag()); + my $untabify_str = + ::untabify(::decode_utf8($self->{$out_fh,'latestline'})); + # -1 to make space for $truncated_str + my $maxtaglen = $termcol - 1; + $untabify_tag = mbtrunc($untabify_tag,$maxtaglen); + my $taglen = ::mbswidth($untabify_tag); + my $maxstrlen = $termcol - $taglen - 1; + $untabify_str = mbtrunc($untabify_str,$maxstrlen); + my $strlen = ::mbswidth($untabify_str); + my $truncated_tag = ""; + my $truncated_str = ""; + if($termcol - $taglen < 2) { + $truncated_tag = ">"; + } else { + if($termcol - $taglen - $strlen <= 2) { + $truncated_str = ">"; + } + } + $maxrow = ($row > $maxrow) ? $row : $maxrow; + printf($out_fh + ("%s%s%s%s". # up down \r eol + "%s%s". # tag trunc_tag + "%s%s%s%s". # color line trunc reset_color + "%s" # down + ), + "$up"x($currow - $row), "\n"x($row - $currow), "\r", $eol, + $untabify_tag,$truncated_tag, + $color, $untabify_str, $truncated_str, $reset_color, + "\n"x($maxrow - $row + 1)); + $currow = $maxrow + 1; + } + + sub print_linebuffer($) { + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + if(defined $self->{'exitstatus'}) { + # If the job is dead: close printing fh. Needed for --compress + close $self->fh($fdno,"w"); + if($opt::compress) { + if($?) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + # Blocked reading in final round + for my $fdno (1,2) { ::set_fh_blocking($self->fh($fdno,'r')); } + } + if($opt::latestline) { $print_later{$self->row()} = $self; } + } + if(not $self->virgin()) { + if($Global::files or ($opt::results and not $Global::csvsep)) { + # Print filename + if($fdno == 1 and not $self->fh($fdno,"printed")) { + print $out_fh $self->tag(),$self->fh($fdno,"name"),"\n"; + if($Global::membuffer) { + push(@{$self->{'output'}{$fdno}}, $self->tag(), + $self->fh($fdno,"name")); + } + $self->set_fh($fdno,"printed",1); + } + # No need for reading $in_fh, as it is from "cat >/dev/null" + } else { + # Read halflines and print full lines + my $outputlength = 0; + my $halfline_ref = $self->{'halfline'}{$fdno}; + my ($buf,$i,$rv); + # 1310720 gives 1.2 GB/s + # 131072 gives 0.9 GB/s + # The optimal block size differs + # It has been measured on: + # AMD 6376: 60800 (>70k is also reasonable) + # Intel i7-3632QM: 52-59k, 170-175k + # seq 64 | ppar --_test $1 --lb \ + # 'yes {} `seq 1000`|head -c 10000000' >/dev/null + while($rv = sysread($in_fh, $buf, 60800)) { + $outputlength += $rv; + # TODO --recend + # Treat both \n and \r as line end + # Only test for \r if there is no \n + # Test: + # perl -e '$a="x"x1000000; + # $b="$a\r$a\n$a\r$a\n"; + # map { print $b,$_ } 1..10' + $i = ((rindex($buf,"\n")+1) || (rindex($buf,"\r")+1)); + if($i) { + if($opt::latestline) { + # Keep the latest full line + my $l = join('', @$halfline_ref, + substr($buf,0,$i-1)); + # "ab\rb\n" = "bb", but we cannot process that correctly. + # Line may be: + # foo \r bar \n + # foo \r bar \r baz \r + # If so: Remove 'foo \r' + $l =~ s/.*\r//g; + my $j = ((rindex($l,"\n")+1) || + (rindex($l,"\r")+1)); + $self->{$out_fh,'latestline'} = substr($l,$j); + # Remove the processed part + # by keeping the unprocessed part + @$halfline_ref = (substr($buf,$i)); + } else { + # One or more complete lines were found + if($Global::color) { + my $print = join("",@$halfline_ref, + substr($buf,0,$i)); + chomp($print); + my ($color,$reset_color) = $self->color(); + my $colortag = $color.$self->tag(); + # \n => reset \n color tag + $print =~ s{([\n\r])(?=.|$)} + {$reset_color$1$colortag}gs; + print($out_fh $colortag, $print, + $reset_color, "\n"); + } elsif($opt::tag or defined $opt::tagstring) { + # Replace ^ with $tag within the full line + if($Global::cache_replacement_eval) { + # Replace with the same value for tag + my $tag = $self->tag(); + unshift @$halfline_ref, $tag; + # TODO --recend that can be partially in + # @$halfline_ref + substr($buf,0,$i-1) =~ + s/([\n\r])(?=.|$)/$1$tag/gs; + } else { + # Replace with freshly computed tag-value + unshift @$halfline_ref, $self->tag(); + substr($buf,0,$i-1) =~ + s/([\n\r])(?=.|$)/$1.$self->tag()/gse; + } + # The length changed, + # so find the new ending pos + $i = ::max((rindex($buf,"\n")+1), + (rindex($buf,"\r")+1)); + # Print the partial line (halfline) + # and the last half + print $out_fh @$halfline_ref, substr($buf,0,$i); + } else { + # Print the partial line (halfline) + # and the last half + print $out_fh @$halfline_ref, substr($buf,0,$i); + } + # Buffer in memory for SQL and CSV-output + if($Global::membuffer) { + push(@{$self->{'output'}{$fdno}}, + @$halfline_ref, substr($buf,0,$i)); + } + # Remove the printed part by keeping the unprinted + @$halfline_ref = (substr($buf,$i)); + } + } else { + # No newline, so append to the halfline + push @$halfline_ref, $buf; + } + } + $self->add_returnsize($outputlength); + if($opt::latestline) { $self->print_latest_line($out_fh); } + } + if(defined $self->{'exitstatus'}) { + if($Global::files or ($opt::results and not $Global::csvsep)) { + $self->add_returnsize(-s $self->fh($fdno,"name")); + } else { + if($opt::latestline) { + # Force re-computing color if --colorfailed + if($opt::colorfailed) { delete $self->{'color'}; } + if($self->{$out_fh,'latestline'} ne "") { + $self->print_latest_line($out_fh); + } + if(@{$self->{'halfline'}{$fdno}}) { + my $l = join('', @{$self->{'halfline'}{$fdno}}); + if($l ne "") { + $self->{$out_fh,'latestline'} = $l; + } + } else { + $self->{$out_fh,'latestline'} = undef; + } + # Print latest line from jobs that are already done + while($print_later{$minvisible}) { + $print_later{$minvisible}->print_latest_line($out_fh); + delete $print_later{$minvisible}; + $minvisible++; + } + # Print latest line from jobs that are on screen now + for(my $row = $minvisible; + $row < $minvisible -1 + ::terminal_rows(); + $row++) { + $print_later{$row} and + $print_later{$row}->print_latest_line($out_fh); + } + } else { + # If the job is dead: print the remaining partial line + # read remaining (already done for $opt::latestline) + my $halfline_ref = $self->{'halfline'}{$fdno}; + if(grep /./, @$halfline_ref) { + my $returnsize = 0; + for(@{$self->{'halfline'}{$fdno}}) { + $returnsize += length $_; + } + $self->add_returnsize($returnsize); + if($opt::tag or defined $opt::tagstring) { + # Prepend $tag the the remaining half line + unshift @$halfline_ref, $self->tag(); + } + # Print the partial line (halfline) + print $out_fh @{$self->{'halfline'}{$fdno}}; + # Buffer in memory for SQL and CSV-output + if($Global::membuffer) { + push(@{$self->{'output'}{$fdno}}, @$halfline_ref); + } + @$halfline_ref = (); + } + } + } + if($self->fh($fdno,"rpid") and + CORE::kill 0, $self->fh($fdno,"rpid")) { + # decompress still running + } else { + # decompress done: close fh + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed."); + $self->set_exitstatus(255); + } + } + } + } + } +} + +sub free_ressources() { + my $self = shift; + if(not $opt::ungroup) { + my $fh; + for my $fdno (sort { $a <=> $b } keys %Global::fh) { + $fh = $self->fh($fdno,"w"); + $fh and close $fh; + $fh = $self->fh($fdno,"r"); + $fh and close $fh; + } + } +} + +sub print_parset($) { + # Wrap output with shell script code to set as variables + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + my $outputlength = 0; + + ::debug("parset","print $Global::parset"); + if($Global::parset eq "assoc") { + # Start: (done in parse_parset()) + # eval "`echo 'declare -A myassoc; myassoc=( + # Each: (done here) + # [$'a\tb']=$'a\tb\tc ddd' + # End: (done in wait_and_exit()) + # )'`" + print '[',::Q($self->{'commandline'}-> + replace_placeholders(["\177<\177>"],0,0)),']='; + } elsif($Global::parset eq "array") { + # Start: (done in parse_parset()) + # eval "`echo 'myassoc=( + # Each: (done here) + # $'a\tb\tc ddd' + # End: (done in wait_and_exit()) + # )'`" + } elsif($Global::parset eq "var") { + # Start: (done in parse_parset()) + # + # Each: (done here) + # var=$'a\tb\tc ddd' + # End: (done in wait_and_exit()) + # + if(not @Global::parset_vars) { + ::error("Too few named destination variables"); + ::wait_and_exit(255); + } + print shift @Global::parset_vars,"="; + } + local $/ = "\n"; + my $tag = $self->tag(); + my @out; + while(<$in_fh>) { + $outputlength += length $_; + # Tag lines with \r, too + $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs; + push @out, $tag,$_; + } + # Remove last newline + # This often makes it easier to use the output in shell + @out and ${out[$#out]} =~ s/\n$//s; + print ::Q(join("",@out)),"\n"; + return $outputlength; +} + +sub print_normal($) { + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + my $buf; + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + if(not $self->virgin()) { + seek $in_fh, 0, 0; + # $in_fh is now ready for reading at position 0 + my $outputlength = 0; + my @output; + + if($Global::parset and $fdno == 1) { + $outputlength += $self->print_parset($fdno,$in_fh,$out_fh); + } elsif(defined $opt::tag or defined $opt::tagstring + or $Global::color or $opt::colorfailed) { + if($Global::color or $opt::colorfailed) { + my ($color,$reset_color) = $self->color(); + my $colortag = $color.$self->tag(); + # Read line by line + local $/ = "\n"; + while(<$in_fh>) { + $outputlength += length $_; + # Tag lines with \r, too + chomp; + s{([\n\r])(?=.|$)}{$reset_color$1$colortag}gs; + print $out_fh $colortag,$_,$reset_color,"\n"; + } + } else { + my $tag = $self->tag(); + my $pretag = 1; + my $s; + while(sysread($in_fh,$buf,32767)) { + $outputlength += length $buf; + $buf =~ s/(?<=[\r\n])(?=.)/$tag/gs; + print $out_fh ($pretag ? $tag : ""),$buf; + if($Global::membuffer) { + push @{$self->{'output'}{$fdno}}, + ($pretag ? $tag : ""),$buf; + } + # Should next print start with a tag? + $s = substr($buf, -1); + # This is faster than ($s eq "\n") || ($s eq "\r") + $pretag = ($s eq "\n") ? 1 : ($s eq "\r"); + } + } + } else { + # Most efficient way of copying data from $in_fh to $out_fh + # Intel i7-3632QM: 25k- + while(sysread($in_fh,$buf,32767)) { + print $out_fh $buf; + $outputlength += length $buf; + if($Global::membuffer) { + push @{$self->{'output'}{$fdno}}, $buf; + } + } + } + if($fdno == 1) { + $self->add_returnsize($outputlength); + } + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed."); + $self->set_exitstatus(255); + } + } +} + +sub print_results($) { + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + my $buf; + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + if(not $self->virgin()) { + seek $in_fh, 0, 0; + # $in_fh is now ready for reading at position 0 + my $outputlength = 0; + my @output; + + if($Global::membuffer) { + # Read data into membuffer + if($opt::tag or $opt::tagstring) { + # Read line by line + local $/ = "\n"; + my $tag = $self->tag(); + while(<$in_fh>) { + $outputlength += length $_; + # Tag lines with \r, too + $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs; + push @{$self->{'output'}{$fdno}}, $tag, $_; + } + } else { + # Most efficient way of copying data from $in_fh to $out_fh + while(sysread($in_fh,$buf,60000)) { + $outputlength += length $buf; + push @{$self->{'output'}{$fdno}}, $buf; + } + } + } else { + # Not membuffer: No need to read the file + if($opt::compress) { + $outputlength = -1; + } else { + # Determine $outputlength = file length + seek($in_fh, 0, 2) || ::die_bug("cannot seek result"); + $outputlength = tell($in_fh); + } + } + if($fdno == 1) { $self->add_returnsize($outputlength); } + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed."); + $self->set_exitstatus(255); + } + } +} + +sub print_joblog($) { + my $self = shift; + my $cmd; + if($Global::verbose <= 1) { + $cmd = $self->replaced(); + } else { + # Verbose level > 1: Print the rsync and stuff + $cmd = $self->wrapped(); + } + # Newlines make it hard to parse the joblog + $cmd =~ s/\n/\0/g; + print $Global::joblog + join("\t", $self->seq(), $self->sshlogin()->string(), + $self->starttime(), sprintf("%10.3f",$self->runtime()), + $self->transfersize(), $self->returnsize(), + $self->exitstatus(), $self->exitsignal(), $cmd + ). "\n"; + flush $Global::joblog; + $self->set_job_in_joblog(); +} + +sub tag($) { + my $self = shift; + if(not defined $self->{'tag'} or not $Global::cache_replacement_eval) { + if(defined $opt::tag or defined $opt::tagstring) { + $self->{'tag'} = + ($self->{'commandline'}-> + replace_placeholders([$opt::tagstring],0,0)). + "\t"; + } else { + # No tag + $self->{'tag'} = ""; + } + } + return $self->{'tag'}; +} + +sub untabtag($) { + # tag with \t replaced with spaces + my $self = shift; + my $tag = $self->tag(); + if(not defined $self->{'untab'}{$tag}) { + $self->{'untab'}{$tag} = ::untabify($tag); + } + return $self->{'untab'}{$tag}; +} + +{ + my (@color,$eol,$reset_color,$init); + + sub init_color() { + if(not $init) { + $init = 1; + # color combinations that are readable: black/white text + # on colored background, but not white on yellow + my @color_combinations = + # Force each color code to have the same length in chars + # This will make \t work as expected + ((map { [sprintf("%03d",$_),"000"] } + 6..7,9..11,13..15,40..51,75..87,113..123,147..159, + 171..182,185..231,249..254), + (map { [sprintf("%03d",$_),231] } + 1..9,12..13,16..45,52..81,88..114,124..149, + 160..178,180,182..184,196..214,232..250)); + # reorder list so adjacent colors are dissimilar + # %23 and %7 were found experimentally + my @order = reverse sort { + (($a%23) <=> ($b%23)) + or + (($b%7) <=> ($a%7)); + } 0..$#color_combinations; + @order = @order[54 .. $#color_combinations, 0 .. 53]; + @color = map { + # TODO Can this be done with `tput` codes? + "\033[48;5;".$_->[0].";38;5;".$_->[1]."m" + } @color_combinations[ @order ]; + + # clr_eol el = clear to end of line + $eol = `sh -c "tput el /dev/null`; + chomp($eol); + if($eol eq "") { $eol = "\033[K"; } + # exit_attribute_mode sgr0 = turn off all attributes + $reset_color = `sh -c "tput sgr0 /dev/null`; + chomp($reset_color); + if($reset_color eq "") { $reset_color = "\033[m"; } + } + } + + sub color($) { + my $self = shift; + if(not defined $self->{'color'}) { + if($Global::color) { + # Choose a value based on the seq + $self->{'color'} = $color[$self->seq() % ($#color+1)].$eol; + $self->{'reset_color'} = $reset_color; + } else { + $self->{'color'} = ""; + $self->{'reset_color'} = ""; + } + if($opt::colorfailed) { + if($self->exitstatus()) { + # White on Red + # Can this be done more generally? + $self->{'color'} = + "\033[48;5;"."196".";38;5;"."231"."m".$eol; + $self->{'reset_color'} = $reset_color; + } + } + } + return ($self->{'color'},$self->{'reset_color'}); + } +} + +sub hostgroups($) { + my $self = shift; + if(not defined $self->{'hostgroups'}) { + $self->{'hostgroups'} = + $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'}; + } + return @{$self->{'hostgroups'}}; +} + +sub exitstatus($) { + my $self = shift; + return $self->{'exitstatus'}; +} + +sub set_exitstatus($$) { + my $self = shift; + my $exitstatus = shift; + if($exitstatus) { + # Overwrite status if non-zero + $self->{'exitstatus'} = $exitstatus; + } else { + # Set status but do not overwrite + # Status may have been set by --timeout + $self->{'exitstatus'} ||= $exitstatus; + } + $opt::sqlworker and + $Global::sql->update("SET Exitval = ? WHERE Seq = ".$self->seq(), + $exitstatus); +} + +sub reset_exitstatus($) { + my $self = shift; + undef $self->{'exitstatus'}; +} + +sub exitsignal($) { + my $self = shift; + return $self->{'exitsignal'}; +} + +sub set_exitsignal($$) { + my $self = shift; + my $exitsignal = shift; + $self->{'exitsignal'} = $exitsignal; + $opt::sqlworker and + $Global::sql->update("SET _Signal = ? WHERE Seq = ".$self->seq(), + $exitsignal); +} + +{ + my $total_jobs; + + sub should_we_halt { + # Should we halt? Immediately? Gracefully? + # Returns: N/A + my $job = shift; + my $limit; + if($Global::semaphore) { + # Emulate Bash's +128 if there is a signal + $Global::halt_exitstatus = + ($job->exitstatus() + or + $job->exitsignal() ? $job->exitsignal() + 128 : 0); + } + if($job->exitstatus() or $job->exitsignal()) { + # Job failed + $Global::exitstatus++; + $Global::total_failed++; + if($Global::halt_fail) { + ::status("$Global::progname: This job failed:", + $job->replaced()); + $limit = $Global::total_failed; + } + } elsif($Global::halt_success) { + ::status("$Global::progname: This job succeeded:", + $job->replaced()); + $limit = $Global::total_completed - $Global::total_failed; + } + if($Global::halt_done) { + ::status("$Global::progname: This job finished:", + $job->replaced()); + $limit = $Global::total_completed; + } + if(not defined $limit) { + return "" + } + # --halt # => 1..100 (number of jobs failed, 101 means > 100) + # --halt % => 1..100 (pct of jobs failed) + if($Global::halt_pct and not $Global::halt_count) { + $total_jobs ||= $Global::JobQueue->total_jobs(); + # From the pct compute the number of jobs that must fail/succeed + $Global::halt_count = $total_jobs * $Global::halt_pct; + } + if($limit >= $Global::halt_count) { + # At least N jobs have failed/succeded/completed + # or at least N% have failed/succeded/completed + # So we should prepare for exit + if($Global::halt_fail or $Global::halt_done) { + # Set exit status + if(not defined $Global::halt_exitstatus) { + if($Global::halt_pct) { + # --halt now,fail=X% or soon,fail=X% + # --halt now,done=X% or soon,done=X% + $Global::halt_exitstatus = + ::ceil($Global::total_failed / $total_jobs * 100); + } elsif($Global::halt_count) { + # --halt now,fail=X or soon,fail=X + # --halt now,done=X or soon,done=X + $Global::halt_exitstatus = + ::min($Global::total_failed,101); + } + if($Global::halt_count and $Global::halt_count == 1) { + # --halt now,fail=1 or soon,fail=1 + # --halt now,done=1 or soon,done=1 + # Emulate Bash's +128 if there is a signal + $Global::halt_exitstatus = + ($job->exitstatus() + or + $job->exitsignal() ? $job->exitsignal() + 128 : 0); + } + } + ::debug("halt","Pct: ",$Global::halt_pct, + " count: ",$Global::halt_count, + " status: ",$Global::halt_exitstatus,"\n"); + } elsif($Global::halt_success) { + $Global::halt_exitstatus = 0; + } + if($Global::halt_when eq "soon") { + $Global::start_no_new_jobs ||= 1; + if(scalar(keys %Global::running) > 0) { + # Only warn if there are more jobs running + ::status + ("$Global::progname: Starting no more jobs. ". + "Waiting for ". (keys %Global::running). + " jobs to finish."); + } + } + return($Global::halt_when); + } + return ""; + } +} + + +package CommandLine; + +sub new($) { + my $class = shift; + my $seq = shift; + my $commandref = shift; + $commandref || die; + my $arg_queue = shift; + my $context_replace = shift; + my $max_number_of_args = shift; # for -N and normal (-n1) + my $transfer_files = shift; + my $return_files = shift; + my $template_names = shift; + my $template_contents = shift; + my $replacecount_ref = shift; + my $len_ref = shift; + my %replacecount = %$replacecount_ref; + my %len = %$len_ref; + for (keys %$replacecount_ref) { + # Total length of this replacement string {} replaced with all args + $len{$_} = 0; + } + return bless { + 'command' => $commandref, + 'seq' => $seq, + 'len' => \%len, + 'arg_list' => [], + 'arg_list_flat' => [], + 'arg_list_flat_orig' => [undef], + 'arg_queue' => $arg_queue, + 'max_number_of_args' => $max_number_of_args, + 'replacecount' => \%replacecount, + 'context_replace' => $context_replace, + 'transfer_files' => $transfer_files, + 'return_files' => $return_files, + 'template_names' => $template_names, + 'template_contents' => $template_contents, + 'replaced' => undef, + }, ref($class) || $class; +} + +sub flush_cache() { + my $self = shift; + for my $arglist (@{$self->{'arg_list'}}) { + for my $arg (@$arglist) { + $arg->flush_cache(); + } + } + $self->{'arg_queue'}->flush_cache(); + $self->{'replaced'} = undef; +} + +sub seq($) { + my $self = shift; + return $self->{'seq'}; +} + +sub set_seq($$) { + my $self = shift; + $self->{'seq'} = shift; +} + +sub slot($) { + # Find the number of a free job slot and return it + # Uses: + # @Global::slots - list with free jobslots + # Returns: + # $jobslot = number of jobslot + my $self = shift; + if(not $self->{'slot'}) { + if(not @Global::slots) { + # $max_slot_number will typically be $Global::max_jobs_running + push @Global::slots, ++$Global::max_slot_number; + } + $self->{'slot'} = shift @Global::slots; + } + return $self->{'slot'}; +} + +{ + my $already_spread; + my $darwin_max_len; + + sub populate($) { + # Add arguments from arg_queue until the number of arguments or + # max line length is reached + # Uses: + # $Global::usable_command_line_length + # $opt::cat + # $opt::fifo + # $Global::JobQueue + # $opt::m + # $opt::X + # $Global::max_jobs_running + # Returns: N/A + my $self = shift; + my $next_arg; + my $max_len = $Global::usable_command_line_length || die; + if($^O eq "darwin") { + # Darwin's limit is affected by: + # * number of environment names (variables+functions) + # * size of environment + # * the length of arguments: + # a one-char argument lowers the limit by 5 + # To be safe assume all arguments are one-char + # The max_len is cached between runs, but if the size of + # the environment is different we need to recompute the + # usable max length for this run of GNU Parallel + # See https://unix.stackexchange.com/a/604943/2972 + if(not $darwin_max_len) { + my $envc = (keys %ENV); + my $envn = length join"",(keys %ENV); + my $envv = length join"",(values %ENV); + $darwin_max_len = -146+($max_len - $envn - $envv) - $envc*10; + ::debug("init", + "length: $darwin_max_len ". + "3+($max_len - $envn - $envv)/5 - $envc*2"); + } + $max_len = $darwin_max_len; + } + if($opt::cat or $opt::fifo) { + # Get the empty arg added by --pipepart (if any) + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); + # $PARALLEL_TMP will point to a tempfile that will be used as {} + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}-> + unget([Arg->new('"$PARALLEL_TMP"')]); + } + while (not $self->{'arg_queue'}->empty()) { + $next_arg = $self->{'arg_queue'}->get(); + if(not defined $next_arg) { + next; + } + $self->push($next_arg); + if($self->len() >= $max_len) { + # Command length is now > max_length + # If there are arguments: remove the last + # If there are no arguments: Error + # TODO stuff about -x opt_x + if($self->number_of_args() > 1) { + # There is something to work on + $self->{'arg_queue'}->unget($self->pop()); + last; + } else { + my $args = join(" ", map { $_->orig() } @$next_arg); + ::error("Command line too long (". + $self->len(). " >= ". + $max_len. + ") at input ". + $self->{'arg_queue'}->arg_number(). + ": ". + ((length $args > 50) ? + (substr($args,0,50))."..." : + $args)); + $self->{'arg_queue'}->unget($self->pop()); + ::wait_and_exit(255); + } + } + + if(defined $self->{'max_number_of_args'}) { + if($self->number_of_args() >= $self->{'max_number_of_args'}) { + last; + } + } + } + if(($opt::m or $opt::X) and not $already_spread + and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) { + # -m or -X and EOF => Spread the arguments over all jobslots + # (unless they are already spread) + $already_spread ||= 1; + if($self->number_of_args() > 1) { + $self->{'max_number_of_args'} = + ::ceil($self->number_of_args()/$Global::max_jobs_running); + $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} = + $self->{'max_number_of_args'}; + $self->{'arg_queue'}->unget($self->pop_all()); + while($self->number_of_args() < $self->{'max_number_of_args'}) { + $self->push($self->{'arg_queue'}->get()); + } + } + $Global::JobQueue->flush_total_jobs(); + } + + if($opt::sqlmaster) { + # Insert the V1..Vn for this $seq in SQL table + # instead of generating one + $Global::sql->insert_records($self->seq(), $self->{'command'}, + $self->{'arg_list_flat_orig'}); + } + } +} + +sub push($) { + # Add one or more records as arguments + # Returns: N/A + my $self = shift; + my $record = shift; + push @{$self->{'arg_list_flat_orig'}}, map { $_->orig() } @$record; + push @{$self->{'arg_list_flat'}}, @$record; + push @{$self->{'arg_list'}}, $record; + # Make @arg available for {= =} + *Arg::arg = $self->{'arg_list_flat_orig'}; + + my $quote_arg = ($Global::quote_replace and not $Global::quoting); + my $col; + for my $perlexpr (keys %{$self->{'replacecount'}}) { + if($perlexpr =~ /^(-?\d+)(?:\D.*|)$/) { + # Positional replacement string + # Deal with negative positional replacement string + $col = ($1 < 0) ? $1 : $1-1; + if(defined($record->[$col])) { + $self->{'len'}{$perlexpr} += + length $record->[$col]->replace($perlexpr,$quote_arg,$self); + } + } else { + for my $arg (@$record) { + if(defined $arg) { + $self->{'len'}{$perlexpr} += + length $arg->replace($perlexpr,$quote_arg,$self); + } + } + } + } +} + +sub pop($) { + # Remove last argument + # Returns: + # the last record + my $self = shift; + my $record = pop @{$self->{'arg_list'}}; + # pop off arguments from @$record + splice @{$self->{'arg_list_flat_orig'}}, -($#$record+1), $#$record+1; + splice @{$self->{'arg_list_flat'}}, -($#$record+1), $#$record+1; + my $quote_arg = ($Global::quote_replace and not $Global::quoting); + for my $perlexpr (keys %{$self->{'replacecount'}}) { + if($perlexpr =~ /^(\d+) /) { + # Positional + defined($record->[$1-1]) or next; + $self->{'len'}{$perlexpr} -= + length $record->[$1-1]->replace($perlexpr,$quote_arg,$self); + } else { + for my $arg (@$record) { + if(defined $arg) { + $self->{'len'}{$perlexpr} -= + length $arg->replace($perlexpr,$quote_arg,$self); + } + } + } + } + return $record; +} + +sub pop_all($) { + # Remove all arguments and zeros the length of replacement perlexpr + # Returns: + # all records + my $self = shift; + my @popped = @{$self->{'arg_list'}}; + for my $perlexpr (keys %{$self->{'replacecount'}}) { + $self->{'len'}{$perlexpr} = 0; + } + $self->{'arg_list'} = []; + $self->{'arg_list_flat_orig'} = [undef]; + $self->{'arg_list_flat'} = []; + return @popped; +} + +sub number_of_args($) { + # The number of records + # Returns: + # number of records + my $self = shift; + # This is really the number of records + return $#{$self->{'arg_list'}}+1; +} + +sub number_of_recargs($) { + # The number of args in records + # Returns: + # number of args records + my $self = shift; + my $sum = 0; + my $nrec = scalar @{$self->{'arg_list'}}; + if($nrec) { + $sum = $nrec * (scalar @{$self->{'arg_list'}[0]}); + } + return $sum; +} + +sub args_as_string($) { + # Returns: + # all unmodified arguments joined with ' ' (similar to {}) + my $self = shift; + return (join " ", map { $_->orig() } + map { @$_ } @{$self->{'arg_list'}}); +} + +sub results_out($) { + sub max_file_name_length { + # Figure out the max length of a subdir + # TODO and the max total length + # Ext4 = 255,130816 + # Uses: + # $Global::max_file_length is set + # Returns: + # $Global::max_file_length + my $testdir = shift; + + my $upper = 100_000_000; + # Dir length of 8 chars is supported everywhere + my $len = 8; + my $dir = "d"x$len; + do { + rmdir($testdir."/".$dir); + $len *= 16; + $dir = "d"x$len; + } while ($len < $upper and mkdir $testdir."/".$dir); + # Then search for the actual max length between $len/16 and $len + my $min = $len/16; + my $max = $len; + while($max-$min > 5) { + # If we are within 5 chars of the exact value: + # it is not worth the extra time to find the exact value + my $test = int(($min+$max)/2); + $dir = "d"x$test; + if(mkdir $testdir."/".$dir) { + rmdir($testdir."/".$dir); + $min = $test; + } else { + $max = $test; + } + } + $Global::max_file_length = $min; + return $min; + } + + my $self = shift; + my $out = $self->replace_placeholders([$opt::results],0,0); + if($out eq $opt::results) { + # $opt::results simple string: Append args_as_dirname + my $args_as_dirname = $self->args_as_dirname(0); + # Output in: prefix/name1/val1/name2/val2/stdout + $out = $opt::results."/".$args_as_dirname; + if(-d $out or eval{ File::Path::mkpath($out); }) { + # OK + } else { + # mkpath failed: Argument too long or not quoted + # Set $Global::max_file_length, which will keep the individual + # dir names shorter than the max length + max_file_name_length($opt::results); + # Quote dirnames with + + $args_as_dirname = $self->args_as_dirname(1); + # prefix/name1/val1/name2/val2/ + $out = $opt::results."/".$args_as_dirname; + File::Path::mkpath($out); + } + $out .="/"; + } else { + if($out =~ m:/$:s) { + # / = dir + if(-d $out or eval{ File::Path::mkpath($out); }) { + # OK + } else { + ::error("Cannot make dir '$out'."); + ::wait_and_exit(255); + } + } else { + $out =~ m:(.*)/:s; + File::Path::mkpath($1); + } + } + return $out; +} + +{ + my %map; + my %stringmap; + my $sep; + + # test: '' . .. a. a.. + ++ 0..255 on fat12 ext4 + sub args_as_dirname($) { + # Returns: + # all arguments joined with '/' (similar to {}) + # Chars that are not safe on all file systems are quoted. + sub init() { + # ext4: / \t \n \0 \\ \r + # fat: 0..31 " * / : < > ? \ | Maybe also: # [ ] ; = , + # exfat: 128..255 + # Other FS: , [ ] { } ( ) ! ; " ' * ? < > | + # + # Quote these as: + # + = ++ + # \0 = +0 + # \t = +t + # \\ = +b (backslash) + # \n = +n + # \r = +r + # / = +z (zlash) + # ? = +y (whY?) + # " = +d (double quote) + # ' = +q (quote) + # * = +a (asterisk) + # < = +l (less than) + # > = +g (greater than) + # : = +k (kolon) + # ! = +x (eXclamation) + # | = +p (pipe) + # # = +h (hash) + # ; = +s (semicolon) + # = = +e (equal) + # , = +c (comma) + # 1..32 128..255 = +XX (hex value) + # [ ] = +e +f + # ( ) = +i +j + # { } = +v +w + # Quote '' as +m (eMpty) + # Quote . as +_ + # Quote .. as +__ + # (Unused: ou) + %map = qw( + + ++ + \0 +0 + \t +t + \\ +b + \n +n + \r +r + / +z + ? +y + " +d + ' +q + * +a + < +l + > +g + : +k + ! +x + | +p + # +h + ; +s + = +e + , +c + [ +e + ( +i + { +v + ] +f + ) +j + } +w + ); + # 1..32 128..255 = +XX (hex value) + map { $map{sprintf "%c",$_} = sprintf "+%02x",$_ } 1..32, 128..255; + # Default value = itself + map { $map{sprintf "%c",$_} ||= sprintf "%c",$_ } 0..255; + # Quote '' as +m (eMpty) + $stringmap{""} = "+m"; + # Quote . as +_ + $stringmap{"."} = "+_"; + # Quote .. as +__ + $stringmap{".."} = "+__"; + # Set dir separator + eval 'use File::Spec; $sep = File::Spec->catfile("", "");'; + $sep ||= '/'; + } + # If $Global::max_file_length: Keep subdirs < $Global::max_file_length + my $self = shift; + my $quote = shift; + my @res = (); + if(not $sep) { init(); } + + for my $rec_ref (@{$self->{'arg_list'}}) { + # If headers are used, sort by them. + # Otherwise keep the order from the command line. + my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1); + for my $n (@header_indexes_sorted) { + CORE::push(@res, + $Global::input_source_header{$n}, + $quote ? + ( + grep { $_ ne "\0noarg" } map { + my $s = $_; + # Quote + as ++ + $s =~ s/(.)/$map{$1}/gs; + if($Global::max_file_length) { + # Keep each subdir shorter than the longest + # allowed file name + $s = substr($s,0,$Global::max_file_length); + } + $s; } + $rec_ref->[$n-1]->orig() + ) : + ( + grep { $_ ne "\0noarg" } map { + my $s = $_; + # Quote / as +z and + as ++ + $s =~ s/($sep|\+)/$map{$1}/gos; + if($Global::max_file_length) { + # Keep each subdir shorter than the longest + # allowed file name + $s = substr($s,0,$Global::max_file_length); + } + $s; } + $rec_ref->[$n-1]->orig() + ) + ); + } + } + return join $sep, map { $stringmap{$_} || $_ } @res; + } +} + +sub header_indexes_sorted($) { + # Sort headers first by number then by name. + # E.g.: 1a 1b 11a 11b + # Returns: + # Indexes of %Global::input_source_header sorted + my $max_col = shift; + + no warnings 'numeric'; + for my $col (1 .. $max_col) { + # Make sure the header is defined. If it is not: use column number + if(not defined $Global::input_source_header{$col}) { + $Global::input_source_header{$col} = $col; + } + } + my @header_indexes_sorted = sort { + # Sort headers numerically then asciibetically + $Global::input_source_header{$a} <=> $Global::input_source_header{$b} + or + $Global::input_source_header{$a} cmp $Global::input_source_header{$b} + } 1 .. $max_col; + return @header_indexes_sorted; +} + +sub len($) { + # Uses: + # @opt::shellquote + # The length of the command line with args substituted + my $self = shift; + my $len = 0; + # Add length of the original command with no args + # Length of command w/ all replacement args removed + $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1; + ::debug("length", "noncontext + command: $len\n"); + # MacOS has an overhead of 8 bytes per argument + my $darwin = ($^O eq "darwin") ? 8 : 0; + my $recargs = $self->number_of_recargs(); + if($self->{'context_replace'}) { + # Context is duplicated for each arg + $len += $recargs * $self->{'len'}{'context'}; + for my $replstring (keys %{$self->{'replacecount'}}) { + # If the replacements string is more than once: mulitply its length + $len += $self->{'len'}{$replstring} * + $self->{'replacecount'}{$replstring}; + ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*", + $self->{'replacecount'}{$replstring}, "\n"); + } + # echo 11 22 33 44 55 66 77 88 99 1010 + # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 + # 5 + ctxgrp*arg + ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'}, + " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n"); + # Add space between context groups + $len += ($recargs-1) * ($self->{'len'}{'contextgroups'}); + if($darwin) { + $len += $recargs * $self->{'len'}{'contextgroups'} * $darwin; + } + } else { + # Each replacement string may occur several times + # Add the length for each time + $len += 1*$self->{'len'}{'context'}; + ::debug("length", "context+noncontext + command: $len\n"); + for my $replstring (keys %{$self->{'replacecount'}}) { + # (space between recargs + length of replacement) + # * number this replacement is used + $len += ($recargs -1 + $self->{'len'}{$replstring}) * + $self->{'replacecount'}{$replstring}; + if($darwin) { + $len += ($recargs * $self->{'replacecount'}{$replstring} + * $darwin); + } + } + } + if(defined $Global::parallel_env) { + # If we are using --env, add the prefix for that, too. + $len += length $Global::parallel_env; + } + if($Global::quoting) { + # Pessimistic length if -q is set + # Worse than worst case: ' => "'" + " => '"' + # TODO can we count the number of expanding chars? + # and count them in arguments, too? + $len *= 3; + } + if(@opt::shellquote) { + # Pessimistic length if --shellquote is set + # Worse than worst case: ' => "'" + for(@opt::shellquote) { + $len *= 3; + } + $len *= 5; + } + if(@opt::sshlogin) { + # Pessimistic length if remote + # Worst case is BASE64 encoding 3 bytes -> 4 bytes + $len = int($len*4/3); + } + return $len; +} + +sub replaced($) { + # Uses: + # $Global::quote_replace + # $Global::quoting + # Returns: + # $replaced = command with place holders replaced and prepended + my $self = shift; + if(not defined $self->{'replaced'}) { + # Don't quote arguments if the input is the full command line + my $quote_arg = ($Global::quote_replace and not $Global::quoting); + # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP + $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg; + $self->{'replaced'} = $self-> + replace_placeholders($self->{'command'},$Global::quoting, + $quote_arg); + my $len = length $self->{'replaced'}; + if ($len != $self->len()) { + ::debug("length", $len, " != ", $self->len(), + " ", $self->{'replaced'}, "\n"); + } else { + ::debug("length", $len, " == ", $self->len(), + " ", $self->{'replaced'}, "\n"); + } + } + return $self->{'replaced'}; +} + +sub replace_placeholders($$$$) { + # Replace foo{}bar with fooargbar + # Input: + # $targetref = command as shell words + # $quote = should everything be quoted? + # $quote_arg = should replaced arguments be quoted? + # Uses: + # @Arg::arg = arguments as strings to be use in {= =} + # Returns: + # @target with placeholders replaced + my $self = shift; + my $targetref = shift; + my $quote = shift; + my $quote_arg = shift; + my %replace; + + # Token description: + # \0spc = unquoted space + # \0end = last token element + # \0ign = dummy token to be ignored + # \177<...\177> = replacement expression + # " " = quoted space, that splits -X group + # text = normal text - possibly part of -X group + my $spacer = 0; + my @tokens = grep { length $_ > 0 } map { + if(/^\177<|^ $/) { + # \177<...\177> or space + $_ + } else { + # Split each space/tab into a token + split /(?=\s)|(?<=\s)/ + } + } + # Split \177< ... \177> into own token + map { split /(?=\177<)|(?<=\177>)/ } + # Insert "\0spc" between every element + # This space should never be quoted + map { $spacer++ ? ("\0spc",$_) : $_ } + map { $_ eq "" ? "\0empty" : $_ } + @$targetref; + + if(not @tokens) { + # @tokens is empty: Return empty array + return @tokens; + } + ::debug("replace", "Tokens ".join":",@tokens,"\n"); + # Make it possible to use $arg[2] in {= =} + *Arg::arg = $self->{'arg_list_flat_orig'}; + # Flat list: + # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ] + # $self->{'arg_list_flat'} = [ Arg11, Arg12, Arg21, Arg22, Arg31, Arg32 ] + if(not @{$self->{'arg_list_flat'}}) { + @{$self->{'arg_list_flat'}} = Arg->new(""); + } + my $argref = $self->{'arg_list_flat'}; + # Number of arguments - used for positional arguments + my $n = $#$argref+1; + + # $self is actually a CommandLine-object, + # but it looks nice to be able to say {= $job->slot() =} + my $job = $self; + # @replaced = tokens with \177< \177> replaced + my @replaced; + if($self->{'context_replace'}) { + my @ctxgroup; + for my $t (@tokens,"\0end") { + # \0end = last token was end of tokens. + if($t eq "\t" or $t eq " " or $t eq "\0end" or $t eq "\0spc") { + # Context group complete: Replace in it + if(grep { /^\177} + { + if($1) { + # Positional replace + # Find the relevant arg and replace it + ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace + $argref->[$1 > 0 ? $1-1 : $n+$1]-> + replace($2,$quote_arg,$self) + : ""); + } else { + # Normal replace + $normal_replace ||= 1; + ($arg ? $arg->replace($2,$quote_arg,$self) : ""); + } + }sgxe) { + # Token is \177<..\177> + } else { + if($Global::escape_string_present) { + # Command line contains \177: + # Unescape it \177\256 => \177 + $a =~ s/\177\176/\177/g; + } + } + $a + } @ctxgroup; + $normal_replace or last; + $space = "\0spc"; + } + } else { + # Context group has no a replacement string: Copy it once + CORE::push @replaced, map { + if($Global::escape_string_present) { + # Command line contains \177: + # Unescape it \177\176 => \177 + $a =~ s/\177\176/\177/g; + } + $_; + } @ctxgroup; + } + # New context group + @ctxgroup=(); + } + if($t eq "\0spc" or $t eq " ") { + CORE::push @replaced,$t; + } else { + CORE::push @ctxgroup,$t; + } + } + } else { + # @group = @token + # Replace in group + # Push output + # repquote = no if {} first on line, no if $quote, yes otherwise + for my $t (@tokens) { + if($t =~ /^\177} + { + if($1) { + # Positional replace + # Find the relevant arg and replace it + ($argref->[$1 > 0 ? $1-1 : $n+$1] ? + # If defined: replace + $argref->[$1 > 0 ? $1-1 : $n+$1]-> + replace($2,$quote_arg,$self) + : ""); + } else { + # Normal replace + $normal_replace ||= 1; + ($arg ? $arg->replace($2,$quote_arg,$self) : ""); + } + }sgxe; + CORE::push @replaced, $space, $a; + $normal_replace or last; + $space = "\0spc"; + } + } else { + # No replacement + CORE::push @replaced, map { + $Global::escape_string_present and s/\177\176/\177/g; $_; + } $t; + } + } + } + *Arg::arg = []; + ::debug("replace","Replaced: ".join":",@replaced,"\n"); + + # Put tokens into groups that may be quoted. + my @quotegroup; + my @quoted; + for (map { $_ eq "\0empty" ? "" : $_ } + grep { $_ ne "\0ign" and $_ ne "\0noarg" and $_ ne "'\0noarg'" } + @replaced, "\0end") { + if($_ eq "\0spc" or $_ eq "\0end") { + # \0spc splits quotable groups + if($quote) { + if(@quotegroup) { + CORE::push @quoted, ::Q(join"",@quotegroup);; + } + } else { + CORE::push @quoted, join"",@quotegroup; + } + @quotegroup = (); + } else { + CORE::push @quotegroup, $_; + } + } + ::debug("replace","Quoted: ".join":",@quoted,"\n"); + return wantarray ? @quoted : "@quoted"; +} + +sub skip($) { + # Skip this job + my $self = shift; + $self->{'skip'} = 1; +} + + +package CommandLineQueue; + +sub new($) { + sub merge_rpl_parts(@) { + # '{=' 'perlexpr' '=}' => '{= perlexpr =}' + # Input: + # @in = the @command as given by the user + # Uses: + # $Global::parensleft + # $Global::parensright + # Returns: + # @command with parts merged to keep {= and =} as one + my @in = @_; + my @out; + my $l = quotemeta($Global::parensleft); + my $r = quotemeta($Global::parensright); + + while(@in) { + my $s = shift @in; + $_ = $s; + # Remove matching (right most) parens + while(s/(.*)$l.*?$r/$1/os) {} + if(/$l/o) { + # Missing right parens + while(@in) { + $s .= " ".shift @in; + $_ = $s; + while(s/(.*)$l.*?$r/$1/os) {} + if(not /$l/o) { + last; + } + } + } + push @out, $s; + } + return @out; + } + + sub escape_177($) { + # Escape \177 => \177\176 + my $s = shift; + $Global::escape_string_present += $s =~ s/\177/\177\176/g; + return $s; + } + + sub replace_parens($) { + # Needs to match rightmost left parens (Perl defaults to leftmost) + # to deal with: {={==} and {={==}=} + # Replace {= -> \177< and =} -> \177> + # + # Complex way to do: + # s/{=(.*)=}/\177<$1\177>/g + # which would not work + my $s = shift; + $s =~ s[\Q$Global::parensleft\E # Match {= + # Match . unless the next string is {= or =} + # needed to force matching the shortest {= =} + ((?:(?! \Q$Global::parensleft\E|\Q$Global::parensright\E ).)*?) + \Q$Global::parensright\E ] # Match =} + {\177<$1\177>}gxs; + # Now {= perlexpr =} => \177< perlexpr \177> + return $s; + } + + sub replace_rpl_def($) { + my $s = shift; + # Replace rpl-definitions with the corresponding perl code + for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) { + # Replace long --rpl's before short ones, as a short may be a + # substring of a long: + # --rpl '% s/a/b/' --rpl '%% s/b/a/' + # + # Replace the shorthand string (--rpl) + # with the {= perl expr =} + # + # Avoid searching for shorthand strings inside existing {= perl expr =} + # + # Replace $$1 in {= perl expr =} with groupings in shorthand string + # + # parallel --rpl '{/(\.\S+)/(\.\S+)} s/$$1/$$2_REPLACE/g;' \ + # echo {/.gz/.lz} ::: UU.tar.gz + # + # {/.gz/.lz} => + # \177< + # $_pAr_gRp1 = ".gz"; + # $_pAr_gRp2 = ".lz"; + # s/${_pAr_gRp1}/${_pAr_gRp2}_REPLACE/g; + # \177> + # {/.gz/.lz:%8.2s} => + # \177< + # $_pAr_gRp1 = ".gz"; + # $_pAr_gRp2 = ".lz"; + # s/${_pAr_gRp1}/${_pAr_gRp2}_REPLACE/g;; + # $_ = sprintf("%8.2f",$_); + # \177> + # + sub replacer_rpl { + my $rpl = shift; + my $rv = $Global::rpl{$rpl}; + return replacer($rv,@_); + } + sub replacer { + my $rv = shift; + my $unchanged = shift; + my $position = shift; + my $grp_regexp = shift; + my $grp_string = shift; + my $formatstring = shift; + $grp_string =~ /^${grp_regexp}$/ or + ::die_bug("Match failed: '$grp_regexp' on $grp_string"); + # Dummy entry to start $grp[n] at 1. + my @grp = (1); + if($] >= 5.010) { + @grp = (1, @{^CAPTURE}); + } else { + for(my $i = 1; defined $grp[$#grp]; $i++) { + push @grp, eval '$'.$i; + } + } + # replace $$1 with ${_pAr_gRp1}, $$2 with ${_pAr_gRp2} + # in the code to be executed + $rv =~ s/\$\$ (\d+)/\$\{_pAr_gRp$1\}/gx; + # prepend with $_pAr_gRp1 = perlquote($1), + my $set_args = ""; + for(my $i = 1;defined $grp[$i]; $i++) { + $set_args .= "\$_pAr_gRp$i = \"" . + ::perl_quote_scalar($grp[$i]) . "\";"; + } + # :%8.2f => %8.2f + $formatstring =~ s/^://; + my $formatcode = ""; + if(length $formatstring > 0) { + $formatcode = ";\$_ = sprintf('$formatstring',\$_);"; + } + if($position =~ s/\.(\d+)//) { + # {2.3} => There must be a corresponding --match + my $field = $1; + my $re = $opt::match[$position-1]; + if($re=~/^\+(\d+)/) { + # multiple --match: Reuse --match $1 + $re = $opt::match[$1-1]; + } + if(not defined $re) { + ::error("{$position.$field} requires ". + "corresponding --match"); + ::wait_and_exit(255); + } + if($re =~ /\001/) { + ::error("\\001 is not supported in --match"); + ::wait_and_exit(255); + } + $rv = "m\001$re\001 or ". + "::warning(\"'\$_' did not match '$re'\"); ". + "\$_ = \$$field; $rv"; + } + ::debug("rpl","match: $rv ¤ $unchanged ¤ $position ¤ ". + "$set_args ¤ $rv ¤ $grp_regexp ¤ $formatstring\n"); + return($unchanged . "\177<" . $position . $set_args . + $rv . $formatcode. "\177>"); + } + if($rpl =~ /^\{/) { + my ($prefix,$grp_regexp,$postfix) = + # Ignore { and } + $rpl =~ /^ \{ # { + ( [^(]* ) # Prefix (no '{' ) - e.g. %% + ( \(.*\) )? # Group capture regexp - e.g (.*) + ( [^)]* ) # Postfix (no '}' ) - e.g. end + \} $ # } + /xs; + my $format_regexp = ":%.*?"; + q{ + # Regexp using named captures - kept for documentation + # It is easier to understand than the backward compatible version + # Look for: { position prefix group format postfix } + while($s =~ + s{(? (?: ^|\177> ) (?: [^\177]*|[\177][^<>] )*?) + \{ + (? -?\d+(?:\.\d+)? \s*)? + \Q$prefix\E + (?$grp_regexp) + \Q$postfix\E + (? $format_regexp) + \} + } + { + replacer_rpl($rpl, $+{unchanged}, + $+{position}, $grp_regexp, + $+{grp}, $+{format}); + }gsex){}; + # Look for: { position prefix group postfix } + while($s =~ + s{(? (?: ^|\177> ) (?: [^\177]*|[\177][^<>] )*?) + \{ + (? -?\d+(?:\.\d+)? \s*)? + \Q$prefix\E + (?$grp_regexp) + \Q$postfix\E + \} + } + { + replacer_rpl($rpl, $+{unchanged}, + $+{position}, $grp_regexp, + $+{grp}); + }gsex){} + }; + { + # This a rewrite of the above to perl 5.8 + # (does not use $+{...} which was introduced in 5.010 + # Look for: { position prefix group format postfix } + while($s =~ + s{( (?: ^|\177> ) (?: [^\177]*|[\177][^<>] )*?) + \{ + (-?\d+(?:\.\d+)? \s*)? + \Q$prefix\E + ($grp_regexp) + \Q$postfix\E + ($format_regexp) + \} + } + { + replacer_rpl($rpl, $1, $2, $grp_regexp, $3, $+); + }gsex){} + # Look for: { position prefix group postfix } + # (Same above - except "format") + while($s =~ + s{( (?: ^|\177> ) (?: [^\177]*|[\177][^<>] )*?) + \{ + (-?\d+(?:\.\d+)? \s*)? + \Q$prefix\E + ($grp_regexp) + \Q$postfix\E + \} + } + { + replacer_rpl($rpl, $1, $2, $grp_regexp, $3); + }gsex){} + } + } else { + my ($prefix,$grp_regexp,$postfix) = + $rpl =~ /^( [^(]* ) # Prefix - e.g. {%% + ( \(.*\) )? # Group capture regexp - e.g (.*) + ( [^)]* )$ # Postfix - e.g } + /xs; + q { + # Regexp using named captures - kept for documentation + # Look for: prefix group postfix + while($s =~ + s{(? (?: ^|\177> ) (?: [^\177]*|[\177][^<>] )*?) + (?) + \Q$prefix\E \s* + (?$grp_regexp) + \Q$postfix\E + } + { + replacer_rpl($rpl, $+{unchanged}, $+{position}, + $grp_regexp, $+{grp}); + }gsex){}; + }; + { + # This a rewrite of the above to perl 5.8 + # (does not use $+{...} which was introduced in 5.010 + # Look for: prefix group postfix + while($s =~ + s{( (?: ^|\177> ) (?: [^\177]*|[\177][^<>] )*?) + () + \Q$prefix\E \s* + ($grp_regexp) + \Q$postfix\E + } + { + replacer_rpl($rpl, $1, $2, $grp_regexp, $3); + }gsex){}; + } + } + } + # Perl positional replacement fields {=1.2 perlexpr =} + while($s =~ + s{ \177< # {= + (-?\d+(?:\.\d+)\s*) # position + (([^\177]+|\177\177)*) # perl expr + \177> # =} + } + { replacer($2, '', $1,''); }gsex){}; + return $s; + } + + my $class = shift; + my $commandref = shift; + my $read_from = shift; + my $context_replace = shift || 0; + my $max_number_of_args = shift; + my $transfer_files = shift; + my $return_files = shift; + my $template_names = shift; + my $template_contents = shift; + my @unget = (); + my $posrpl; + my ($replacecount_ref, $len_ref); + my @command = @$commandref; + my $seq = 1; + + # Replace replacement strings with {= perl expr =} + # '{=' 'perlexpr' '=}' => '{= perlexpr =}' + @command = merge_rpl_parts(@command); + + # Protect matching inside {= perl expr =} + # by replacing {= and =} with \177< and \177> + # in options that can contain replacement strings: + # @command, --transferfile, --return, + # --tagstring, --workdir, --results + for(@command, @$transfer_files, @$return_files, + @$template_names, @$template_contents, + $opt::tagstring, $opt::workdir, $opt::results, $opt::retries, + @opt::filter) { + # Skip if undefined + defined($_) or next; + # Escape \177 => \177\176 + $_ = escape_177($_); + # {= perl expr =} => \177< perl expr \177> + $_ = replace_parens($_); + # Replace rpl-definitions with the corresponding perl code + $_ = replace_rpl_def($_); + } + + # Add {} if no replacement strings in @command + ($replacecount_ref, $len_ref, @command) = + replacement_counts_and_lengths($transfer_files, $return_files, + $template_names, $template_contents, + @command); + if("@command" =~ /^[^ \t\n=]*\177append()) { + $seq = $Global::sql->max_seq() + 1; + } + + return bless { + ('unget' => \@unget, + 'command' => \@command, + 'replacecount' => $replacecount_ref, + 'arg_queue' => RecordQueue->new($read_from,$opt::colsep), + 'context_replace' => $context_replace, + 'len' => $len_ref, + 'max_number_of_args' => $max_number_of_args, + 'size' => undef, + 'transfer_files' => $transfer_files, + 'return_files' => $return_files, + 'template_names' => $template_names, + 'template_contents' => $template_contents, + 'seq' => $seq, + ) + }, ref($class) || $class; +} + + + +sub replacement_counts_and_lengths($$@) { + # Count the number of different replacement strings. + # Find the lengths of context for context groups and non-context + # groups. + # If no {} found in @command: add it to @command + # + # Input: + # \@transfer_files = array of filenames to transfer + # \@return_files = array of filenames to return + # \@template_names = array of names to copy to + # \@template_contents = array of contents to write + # @command = command template + # Output: + # \%replacecount, \%len, @command + my $transfer_files = shift; + my $return_files = shift; + my $template_names = shift; + my $template_contents = shift; + my @command = @_; + my (%replacecount,%len); + my $sum = 0; + while($sum == 0) { + # Count how many times each replacement string is used + my @cmd = @command; + my $contextlen = 0; + my $noncontextlen = 0; + my $contextgroups = 0; + for my $c (@cmd) { + while($c =~ s/ \177<( (?: [^\177]*|[\177][^<>] )*?)\177> /\000/xs) { + # %replacecount = { "perlexpr" => number of times seen } + # e.g { "s/a/b/" => 2 } + $replacecount{$1}++; + $sum++; + } + # Measure the length of the context around the {= perl expr =} + # Use that {=...=} has been replaced with \000 above + # So there is no need to deal with \177< + while($c =~ s/ (\S*\000\S*) //xs) { + my $w = $1; + $w =~ tr/\000//d; # Remove all \000's + $contextlen += length($w); + $contextgroups++; + } + # All {= perl expr =} have been removed: The rest is non-context + $noncontextlen += length $c; + } + for(@$transfer_files, @$return_files, + @$template_names, @$template_contents, + @opt::filter, + $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) { + # Options that can contain replacement strings + defined($_) or next; + my $t = $_; + while($t =~ s/ \177<( (?: [^\177]*|[\177][^<>] )* )\177> //xs) { + # %replacecount = { "perlexpr" => number of times seen } + # e.g { "$_++" => 2 } + # But for tagstring we just need to mark it as seen + $replacecount{$1} ||= 1; + } + } + if($opt::bar) { + # If the command does not contain {} force it to be computed + # as it is being used by --bar + $replacecount{""} ||= 1; + } + + $len{'context'} = 0+$contextlen; + $len{'noncontext'} = $noncontextlen; + $len{'contextgroups'} = $contextgroups; + $len{'noncontextgroups'} = @cmd-$contextgroups; + ::debug("length", "@command Context: ", $len{'context'}, + " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'}, + " NonCtxGrp: ", $len{'noncontextgroups'}, "\n"); + if($sum == 0) { + if(not @command) { + # Default command = {} + @command = ("\177<\177>"); + } elsif(($opt::pipe or $opt::pipepart) + and not $opt::fifo and not $opt::cat) { + # With --pipe / --pipe-part you can have no replacement + last; + } else { + # Append {} to the command if there are no {...}'s and no {=...=} + push @command, ("\177<\177>"); + } + } + } + return(\%replacecount,\%len,@command); +} + +sub get($) { + my $self = shift; + if(@{$self->{'unget'}}) { + my $cmd_line = shift @{$self->{'unget'}}; + return ($cmd_line); + } else { + if($opt::sqlworker) { + # Get the sequence number from the SQL table + $self->set_seq($SQL::next_seq); + # Get the command from the SQL table + $self->{'command'} = $SQL::command_ref; + my @command; + # Recompute replace counts based on the read command + ($self->{'replacecount'}, + $self->{'len'}, @command) = + replacement_counts_and_lengths($self->{'transfer_files'}, + $self->{'return_files'}, + $self->{'template_name'}, + $self->{'template_contents'}, + @$SQL::command_ref); + if("@command" =~ /^[^ \t\n=]*\177new($self->seq(), + $self->{'command'}, + $self->{'arg_queue'}, + $self->{'context_replace'}, + $self->{'max_number_of_args'}, + $self->{'transfer_files'}, + $self->{'return_files'}, + $self->{'template_names'}, + $self->{'template_contents'}, + $self->{'replacecount'}, + $self->{'len'}, + ); + $cmd_line->populate(); + ::debug("run","cmd_line->number_of_args ", + $cmd_line->number_of_args(), "\n"); + if(not $Global::no_more_input and ($opt::pipe or $opt::pipepart)) { + if($cmd_line->replaced() eq "") { + # Empty command - pipe requires a command + ::error("--pipe/--pipepart must have a command to pipe into ". + "(e.g. 'cat')."); + ::wait_and_exit(255); + } + } elsif($cmd_line->number_of_args() == 0) { + # We did not get more args - maybe at EOF string? + return undef; + } + $self->set_seq($self->seq()+1); + return $cmd_line; + } +} + +sub unget($) { + my $self = shift; + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && + $self->{'arg_queue'}->empty(); + ::debug("run", "CommandLineQueue->empty $empty"); + return $empty; +} + +sub seq($) { + my $self = shift; + return $self->{'seq'}; +} + +sub set_seq($$) { + my $self = shift; + $self->{'seq'} = shift; +} + +sub quote_args($) { + my $self = shift; + # If there is not command emulate |bash + return $self->{'command'}; +} + + +package Limits::Command; + +# Maximal command line length (for -m and -X) +sub max_length($) { + # Find the max_length of a command line and cache it + # Returns: + # number of chars on the longest command line allowed + if(not $Limits::Command::line_max_len) { + # Disk cache of max command line length + my $len_cache = $Global::cache_dir . "/tmp/sshlogin/" . ::hostname() . + "/linelen"; + my $cached_limit; + local $/ = undef; + if(open(my $fh, "<", $len_cache)) { + $cached_limit = <$fh>; + $cached_limit || ::warning("Invalid content in $len_cache"); + close $fh; + } + if(not $cached_limit) { + $cached_limit = real_max_length(); + # If $HOME is write protected: Do not fail + my $dir = ::dirname($len_cache); + -d $dir or eval { File::Path::mkpath($dir); }; + open(my $fh, ">", $len_cache.$$); + print $fh $cached_limit; + close $fh; + rename $len_cache.$$, $len_cache || ::die_bug("rename cache file"); + } + $Limits::Command::line_max_len = tmux_length($cached_limit); + } + return int($Limits::Command::line_max_len); +} + +sub real_max_length() { + # Find the max_length of a command line + # Returns: + # The maximal command line length with 1 byte arguments + # return find_max(" c"); + return find_max("c"); +} + +sub find_max($) { + my $string = shift; + # This is slow on Cygwin, so give Cygwin users a warning + if($^O eq "cygwin" or $^O eq "msys") { + ::warning("Finding the maximal command line length. ". + "This may take up to 1 minute.") + } + # Use an upper bound of 100 MB if the shell allows for infinite + # long lengths + my $upper = 100_000_000; + my $lower; + # 1000 is supported everywhere, so the search can start anywhere 1..999 + # 324 makes the search much faster on Cygwin, so let us use that + my $len = 324; + do { + if($len > $upper) { return $len }; + $lower = $len; + $len *= 4; + ::debug("init", "Maxlen: $lower<$len<$upper(".($upper-$lower)."): "); + } while (is_acceptable_command_line_length($len,$string)); + # Then search for the actual max length between + # last successful length ($len/16) and upper bound + return binary_find_max(int($len/16),$len,$string); +} + + +# Prototype forwarding +sub binary_find_max($$$); +sub binary_find_max($$$) { + # Given a lower and upper bound find the max (length or args) of a + # command line + # Returns: + # number of chars on the longest command line allowed + my ($lower, $upper, $string) = (@_); + if($lower == $upper + or $lower == $upper-1 + or $lower/$upper > 0.99) { + # $lower is +- 1 or within 1%: Don't search more + return $lower; + } + # Unevenly split binary search which is faster for Microsoft Windows. + # Guessing too high is cheap. Guessing too low is expensive. + my $split = ($^O eq "cygwin" or $^O eq "msys") ? 0.93 : 0.5; + my $middle = int (($upper-$lower)*$split + $lower); + ::debug("init", "Maxlen: $lower<$middle<$upper(".($upper-$lower)."): "); + if (is_acceptable_command_line_length($middle,$string)) { + return binary_find_max($middle,$upper,$string); + } else { + return binary_find_max($lower,$middle,$string); + } +} + +{ + my $prg; + + sub is_acceptable_command_line_length($$) { + # Test if a command line of this length can run + # in the current environment + # If the string is " x" it tests how many args are allowed + # Returns: + # 0 if the command line length is too long + # 1 otherwise + my $len = shift; + my $string = shift; + if($Global::parallel_env) { + $len += length $Global::parallel_env; + } + # Force using non-built-in command + $prg ||= ::which("echo"); + my $l = length ::qqx("$prg ".${string}x(($len-1-length $prg)/length $string)); + if($l < $len/2) { + # The command returned OK, but did not output $len chars + # => this failed (Centos3 does this craziness) + return 0 + } + ::debug("init", "$len=$?\n"); + return not $?; + } +} + +sub tmux_length($) { + # If $opt::tmux set, find the limit for tmux + # tmux 1.8 has a 2kB limit + # tmux 1.9 has a 16kB limit + # tmux 2.0 has a 16kB limit + # tmux 2.1 has a 16kB limit + # tmux 2.2 has a 16kB limit + # Input: + # $len = maximal command line length + # Returns: + # $tmux_len = maximal length runable in tmux + local $/ = "\n"; + my $len = shift; + if($opt::tmux) { + $ENV{'PARALLEL_TMUX'} ||= "tmux"; + if(not ::which($ENV{'PARALLEL_TMUX'})) { + ::error($ENV{'PARALLEL_TMUX'}." not found in \$PATH."); + ::wait_and_exit(255); + } + my @out; + for my $l (1, 2020, 16320, 30000, $len) { + my $tmpfile = ::tmpname("tms"); + my $qtmp = ::Q($tmpfile); + my $tmuxcmd = $ENV{'PARALLEL_TMUX'}. + " -S $qtmp new-session -d -n echo $l". + ("t"x$l). " && echo $l; rm -f $qtmp"; + push @out, ::qqx($tmuxcmd); + ::rm($tmpfile); + } + ::debug("tmux","tmux-out ",@out); + chomp @out; + # The arguments is given 3 times on the command line + # and the tmux wrapping is around 30 chars + # (29 for tmux1.9, 33 for tmux1.8) + my $tmux_len = ::max(@out); + $len = ::min($len,int($tmux_len/4-33)); + ::debug("tmux","tmux-length ",$len); + } + return $len; +} + + +package RecordQueue; + +sub new($) { + my $class = shift; + my $fhs = shift; + my $colsep = shift; + my @unget = (); + my $arg_sub_queue; + if($opt::sqlworker) { + # Open SQL table + $arg_sub_queue = SQLRecordQueue->new(); + } elsif(defined $colsep) { + # Open one file with colsep or CSV + $arg_sub_queue = RecordColQueue->new($fhs); + } else { + # Open one or more files if multiple -a + $arg_sub_queue = MultifileQueue->new($fhs); + } + return bless { + 'unget' => \@unget, + 'arg_number' => 0, + 'arg_sub_queue' => $arg_sub_queue, + }, ref($class) || $class; +} + +sub get($) { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + $self->{'arg_number'}++; + # Flush cached computed replacements in Arg-objects + # To fix: parallel --bar echo {%} ::: a b c ::: d e f + my $ret = shift @{$self->{'unget'}}; + if($ret) { + map { $_->flush_cache() } @$ret; + } + return $ret; + } + my $ret = $self->{'arg_sub_queue'}->get(); + if($ret) { + if(grep { index($_->orig(),"\0") > 0 } @$ret) { + # Allow for \0 in position 0 because GNU Parallel uses "\0noarg" + # to mean no-string + ::warning("A NUL character in the input was replaced with \\0.", + "NUL cannot be passed through in the argument list.", + "Did you mean to use the --null option?"); + for(grep { index($_->orig(),"\0") > 0 } @$ret) { + # Replace \0 with \\0 + my $a = $_->orig(); + $a =~ s/\0/\\0/g; + $_->set_orig($a); + } + } + if(defined $Global::max_number_of_args + and $Global::max_number_of_args == 0) { + ::debug("run", "Read 1 but return 0 args\n"); + # \0noarg => nothing (not the empty string) + map { $_->set_orig("\0noarg"); } @$ret; + } + # Flush cached computed replacements in Arg-objects + # To fix: parallel --bar echo {%} ::: a b c ::: d e f + map { $_->flush_cache() } @$ret; + } + return $ret; +} + +sub unget($) { + my $self = shift; + ::debug("run", "RecordQueue-unget\n"); + $self->{'arg_number'} -= @_; + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && + $self->{'arg_sub_queue'}->empty(); + ::debug("run", "RecordQueue->empty $empty"); + return $empty; +} + +sub flush_cache($) { + my $self = shift; + for my $record (@{$self->{'unget'}}) { + for my $arg (@$record) { + $arg->flush_cache(); + } + } + $self->{'arg_sub_queue'}->flush_cache(); +} + +sub arg_number($) { + my $self = shift; + return $self->{'arg_number'}; +} + + +package RecordColQueue; + +sub new($) { + my $class = shift; + my $fhs = shift; + my @unget = (); + my $arg_sub_queue = MultifileQueue->new($fhs); + return bless { + 'unget' => \@unget, + 'arg_sub_queue' => $arg_sub_queue, + }, ref($class) || $class; +} + +sub get($) { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + if($self->{'arg_sub_queue'}->empty()) { + return undef; + } + my $in_record = $self->{'arg_sub_queue'}->get(); + if(defined $in_record) { + my @out_record = (); + for my $arg (@$in_record) { + ::debug("run", "RecordColQueue::arg $arg\n"); + my $line = $arg->orig(); + ::debug("run", "line='$line'\n"); + if($line ne "") { + if($opt::csv) { + # Parse CSV and put it into a record + chomp $line; + if(not $Global::csv->parse($line)) { + die "CSV has unexpected format: ^$line^"; + } + for($Global::csv->fields()) { + push @out_record, Arg->new($_); + } + } else { + # Split --colsep into record + for my $s (split /$opt::colsep/o, $line, -1) { + push @out_record, Arg->new($s); + } + } + } else { + push @out_record, Arg->new(""); + } + } + return \@out_record; + } else { + return undef; + } +} + +sub unget($) { + my $self = shift; + ::debug("run", "RecordColQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && + $self->{'arg_sub_queue'}->empty(); + ::debug("run", "RecordColQueue->empty $empty"); + return $empty; +} + +sub flush_cache($) { + my $self = shift; + for my $arg (@{$self->{'unget'}}) { + $arg->flush_cache(); + } + $self->{'arg_sub_queue'}->flush_cache(); +} + + +package SQLRecordQueue; + +sub new($) { + my $class = shift; + my @unget = (); + return bless { + 'unget' => \@unget, + }, ref($class) || $class; +} + +sub get($) { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + return $Global::sql->get_record(); +} + +sub unget($) { + my $self = shift; + ::debug("run", "SQLRecordQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + if(@{$self->{'unget'}}) { return 0; } + my $get = $self->get(); + if(defined $get) { + $self->unget($get); + } + my $empty = not $get; + ::debug("run", "SQLRecordQueue->empty $empty"); + return $empty; +} + +sub flush_cache($) { + my $self = shift; + for my $record (@{$self->{'unget'}}) { + for my $arg (@$record) { + $arg->flush_cache(); + } + } +} + + +package MultifileQueue; + +@Global::unget_argv=(); + +sub new($$) { + my $class = shift; + my $fhs = shift; + for my $fh (@$fhs) { + if(-t $fh and -t ($Global::status_fd || *STDERR)) { + ::warning( + "Input is read from the terminal. You are either an expert", + "(in which case: YOU ARE AWESOME!) or maybe you forgot", + "::: or :::: or -a or to pipe data into parallel. If so", + "consider going through the tutorial: man parallel_tutorial", + "Press CTRL-D to exit."); + } + } + return bless { + 'unget' => \@Global::unget_argv, + 'fhs' => $fhs, + 'arg_matrix' => undef, + }, ref($class) || $class; +} + +sub get($) { + my $self = shift; + if($opt::link) { + return $self->link_get(); + } else { + return $self->nest_get(); + } +} + +sub unget($) { + my $self = shift; + ::debug("run", "MultifileQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @Global::unget_argv) && + not @{$self->{'unget'}}; + for my $fh (@{$self->{'fhs'}}) { + $empty &&= eof($fh); + } + ::debug("run", "MultifileQueue->empty $empty "); + return $empty; +} + +sub flush_cache($) { + my $self = shift; + for my $record (@{$self->{'unget'}}, @{$self->{'arg_matrix'}}) { + for my $arg (@$record) { + $arg->flush_cache(); + } + } +} + +sub link_get($) { + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + my @record = (); + my $prepend; + my $empty = 1; + for my $i (0..$#{$self->{'fhs'}}) { + my $fh = $self->{'fhs'}[$i]; + my $arg = read_arg_from_fh($fh); + if(defined $arg) { + # Record $arg for recycling at end of file + push @{$self->{'arg_matrix'}[$i]}, $arg; + push @record, $arg; + $empty = 0; + } else { + ::debug("run", "EOA "); + # End of file: Recycle arguments + push @{$self->{'arg_matrix'}[$i]}, shift @{$self->{'arg_matrix'}[$i]}; + # return last @{$args->{'args'}{$fh}}; + push @record, @{$self->{'arg_matrix'}[$i]}[-1]; + } + } + if($empty) { + return undef; + } else { + return \@record; + } +} + +sub nest_get($) { + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + my @record = (); + my $prepend; + my $empty = 1; + my $no_of_inputsources = $#{$self->{'fhs'}} + 1; + if(not $self->{'arg_matrix'}) { + # Initialize @arg_matrix with one arg from each file + # read one line from each file + my @first_arg_set; + my $all_empty = 1; + for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) { + my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); + if(defined $arg) { + $all_empty = 0; + } + $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new(""); + push @first_arg_set, $self->{'arg_matrix'}[$fhno][0]; + } + if($all_empty) { + # All filehandles were at eof or eof-string + return undef; + } + return [@first_arg_set]; + } + + # Treat the case with one input source special. For multiple + # input sources we need to remember all previously read values to + # generate all combinations. But for one input source we can + # forget the value after first use. + if($no_of_inputsources == 1) { + my $arg = read_arg_from_fh($self->{'fhs'}[0]); + if(defined($arg)) { + return [$arg]; + } + return undef; + } + for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) { + if(eof($self->{'fhs'}[$fhno])) { + next; + } else { + # read one + my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); + defined($arg) || next; # If we just read an EOF string: Treat this as EOF + my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1; + $self->{'arg_matrix'}[$fhno][$len] = $arg; + # make all new combinations + my @combarg = (); + for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) { + push(@combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}], + # Is input source --link'ed to the next? + $opt::linkinputsource[$fhn+1]); + } + # Find only combinations with this new entry + $combarg[2*$fhno] = [$len,$len]; + # map combinations + # [ 1, 3, 7 ], [ 2, 4, 1 ] + # => + # [ m[0][1], m[1][3], m[2][7] ], [ m[0][2], m[1][4], m[2][1] ] + my @mapped; + for my $c (expand_combinations(@combarg)) { + my @a; + for my $n (0 .. $no_of_inputsources - 1 ) { + push @a, $self->{'arg_matrix'}[$n][$$c[$n]]; + } + push @mapped, \@a; + } + # append the mapped to the ungotten arguments + push @{$self->{'unget'}}, @mapped; + # get the first + if(@mapped) { + return shift @{$self->{'unget'}}; + } + } + } + # all are eof or at EOF string; return from the unget queue + return shift @{$self->{'unget'}}; +} + +{ + my $cr_count = 0; + my $nl_count = 0; + my $dos_crnl_determined; + sub read_arg_from_fh($) { + # Read one Arg from filehandle + # Returns: + # Arg-object with one read line + # undef if end of file + my $fh = shift; + my $prepend; + my $arg; + my $half_record = 0; + do {{ + # This makes 10% faster + if(not defined ($arg = <$fh>)) { + if(defined $prepend) { + return Arg->new($prepend); + } else { + return undef; + } + } + if(not $dos_crnl_determined and not defined $opt::d) { + # Warn if input has CR-NL and -d is not set + if($arg =~ /\r$/) { + $cr_count++; + } else { + $nl_count++; + } + if($cr_count == 3 or $nl_count == 3) { + $dos_crnl_determined = 1; + if($nl_count == 0 and $cr_count == 3) { + ::warning('The first three values end in CR-NL. '. + 'Consider using -d "\r\n"'); + } + } + } + if($opt::csv) { + # We need to read a full CSV line. + if(($arg =~ y/"/"/) % 2 ) { + # The number of " on the line is uneven: + # If we were in a half_record => we have a full record now + # If we were outside a half_record => + # we are in a half record now + $half_record = not $half_record; + } + if($half_record) { + # CSV half-record with quoting: + # col1,"col2 2""x3"" board newline <-this one + # cont",col3 + $prepend .= $arg; + redo; + } else { + # Now we have a full CSV record + } + } + # Remove delimiter + chomp $arg; + if($Global::end_of_file_string and + $arg eq $Global::end_of_file_string) { + # Ignore the rest of input file + close $fh; + ::debug("run", "EOF-string ($arg) met\n"); + if(defined $prepend) { + return Arg->new($prepend); + } else { + return undef; + } + } + if(defined $prepend) { + $arg = $prepend.$arg; # For line continuation + undef $prepend; + } + if($Global::ignore_empty) { + if($arg =~ /^\s*$/) { + redo; # Try the next line + } + } + if($Global::max_lines) { + if($arg =~ /\s$/) { + # Trailing space => continued on next line + $prepend = $arg; + redo; + } + } + }} while (1 == 0); # Dummy loop {{}} for redo + if(defined $arg) { + return Arg->new($arg); + } else { + ::die_bug("multiread arg undefined"); + } + } +} + +# Prototype forwarding +sub expand_combinations(@); +sub expand_combinations(@) { + # Input: + # ([xmin,xmax], [ymin,ymax], ...) + # Returns: ([x,y,...],[x,y,...]) + # where xmin <= x <= xmax and ymin <= y <= ymax + my $minmax_ref = shift; + my $link = shift; # This is linked to the next input source + my $xmin = $$minmax_ref[0]; + my $xmax = $$minmax_ref[1]; + my @p; + if(@_) { + my @rest = expand_combinations(@_); + if($link) { + # Linked to next col with --link/:::+/::::+ + # TODO BUG does not wrap values if not same number of vals + push(@p, map { [$$_[0], @$_] } + grep { $xmin <= $$_[0] and $$_[0] <= $xmax } @rest); + } else { + # If there are more columns: Compute those recursively + for(my $x = $xmin; $x <= $xmax; $x++) { + push @p, map { [$x, @$_] } @rest; + } + } + } else { + for(my $x = $xmin; $x <= $xmax; $x++) { + push @p, [$x]; + } + } + return @p; +} + + +package Arg; + +sub new($) { + my $class = shift; + my $orig = shift; + my @hostgroups; + if($opt::hostgroups) { + if($orig =~ s:@(.+)::) { + # We found hostgroups on the arg + @hostgroups = split(/\+|,/, $1); + if(not grep { defined $Global::hostgroups{$_} } @hostgroups) { + # This hostgroup is not defined using -S + # Add it + ::warning("Adding hostgroups: @hostgroups"); + # Add sshlogin + for(grep { not defined $Global::hostgroups{$_} } @hostgroups) { + my $sshlogin = SSHLogin->new($_); + my $sshlogin_string = $sshlogin->string(); + $Global::host{$sshlogin_string} = $sshlogin; + $Global::hostgroups{$sshlogin_string} = 1; + } + } + } else { + # No hostgroup on the arg => any hostgroup + @hostgroups = (keys %Global::hostgroups); + } + } + return bless { + 'orig' => $orig, + 'hostgroups' => \@hostgroups, + }, ref($class) || $class; +} + +sub Q($) { + # Q alias for ::shell_quote_scalar + my $ret = ::Q($_[0]); + no warnings 'redefine'; + *Q = \&::Q; + return $ret; +} + +sub pQ($) { + # pQ alias for ::perl_quote_scalar + my $ret = ::pQ($_[0]); + no warnings 'redefine'; + *pQ = \&::pQ; + return $ret; +} + +sub hash($) { + $Global::use{"DBI"} ||= eval "use B; 1;"; + B::hash(@_); +} + +sub total_jobs() { + return $Global::JobQueue->total_jobs(); +} + +{ + my %perleval; + my $job; + sub skip() { + # shorthand for $job->skip(); + $job->skip(); + } + sub slot() { + # shorthand for $job->slot(); + $job->slot(); + } + sub seq() { + # shorthand for $job->seq(); + $job->seq(); + } + sub uq() { + # Do not quote this arg + $Global::unquote_arg = 1; + } + sub yyyy_mm_dd_hh_mm_ss(@) { + # ISO8601 2038-01-19T03:14:08 + ::strftime("%Y-%m-%dT%H:%M:%S", localtime(shift || time())); + } + sub yyyy_mm_dd_hh_mm(@) { + # ISO8601 2038-01-19T03:14 + ::strftime("%Y-%m-%dT%H:%M", localtime(shift || time())); + } + sub yyyy_mm_dd(@) { + # ISO8601 2038-01-19 + ::strftime("%Y-%m-%d", localtime(shift || time())); + } + sub hh_mm_ss(@) { + # ISO8601 03:14:08 + ::strftime("%H:%M:%S", localtime(shift || time())); + } + sub hh_mm(@) { + # ISO8601 03:14 + ::strftime("%H:%M", localtime(shift || time())); + } + sub yyyymmddhhmmss(@) { + # ISO8601 20380119 + ISO8601 031408 + ::strftime("%Y%m%d%H%M%S", localtime(shift || time())); + } + sub yyyymmddhhmm(@) { + # ISO8601 20380119 + ISO8601 0314 + ::strftime("%Y%m%d%H%M", localtime(shift || time())); + } + sub yyyymmdd(@) { + # ISO8601 20380119 + ::strftime("%Y%m%d", localtime(shift || time())); + } + sub hhmmss(@) { + # ISO8601 031408 + ::strftime("%H%M%S", localtime(shift || time())); + } + sub hhmm(@) { + # ISO8601 0314 + ::strftime("%H%M", localtime(shift || time())); + } + + sub replace($$$$) { + # Calculates the corresponding value for a given perl expression + # Returns: + # The calculated string (quoted if asked for) + my $self = shift; + my $perlexpr = shift; # E.g. $_=$_ or s/.gz// + my $quote = shift; # should the string be quoted? + # This is actually a CommandLine-object, + # but it looks nice to be able to say {= $job->slot() =} + $job = shift; + # Positional replace treated as normal replace + $perlexpr =~ s/^(-?\d+)? *//; + if(not $Global::cache_replacement_eval + or + not $self->{'cache'}{$perlexpr}) { + # Only compute the value once + # Use $_ as the variable to change + local $_; + if($Global::trim eq "n") { + $_ = $self->{'orig'}; + } else { + # Trim the input + $_ = trim_of($self->{'orig'}); + } + ::debug("replace", "eval ", $perlexpr, " ", $_, "\n"); + if(not $perleval{$perlexpr}) { + # Make an anonymous function of the $perlexpr + # And more importantly: Compile it only once + if($perleval{$perlexpr} = + eval('sub { no strict; no warnings; my $job = shift; '. + $perlexpr.' }')) { + # All is good + } else { + # The eval failed. Maybe $perlexpr is invalid perl? + ::error("Cannot use $perlexpr: $@"); + ::wait_and_exit(255); + } + } + # Execute the function + $perleval{$perlexpr}->($job); + $self->{'cache'}{$perlexpr} = $_; + if($Global::unquote_arg) { + # uq() was called in perlexpr + $self->{'cache'}{'unquote'}{$perlexpr} = 1; + # Reset for next perlexpr + $Global::unquote_arg = 0; + } + } + # Return the value quoted if needed + if($self->{'cache'}{'unquote'}{$perlexpr}) { + return($self->{'cache'}{$perlexpr}); + } else { + return($quote ? Q($self->{'cache'}{$perlexpr}) + : $self->{'cache'}{$perlexpr}); + } + } +} + +sub flush_cache($) { + # Flush cache of computed values + my $self = shift; + $self->{'cache'} = undef; +} + +sub orig($) { + my $self = shift; + return $self->{'orig'}; +} + +sub set_orig($$) { + my $self = shift; + $self->{'orig'} = shift; +} + +sub trim_of($) { + # Removes white space as specifed by --trim: + # n = nothing + # l = start + # r = end + # lr|rl = both + # Returns: + # string with white space removed as needed + my @strings = map { defined $_ ? $_ : "" } (@_); + my $arg; + if($Global::trim eq "n") { + # skip + } elsif($Global::trim eq "l") { + for my $arg (@strings) { $arg =~ s/^\s+//; } + } elsif($Global::trim eq "r") { + for my $arg (@strings) { $arg =~ s/\s+$//; } + } elsif($Global::trim eq "rl" or $Global::trim eq "lr") { + for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; } + } else { + ::error("--trim must be one of: r l rl lr."); + ::wait_and_exit(255); + } + return wantarray ? @strings : "@strings"; +} + + +package TimeoutQueue; + +sub new($) { + my $class = shift; + my $delta_time = shift; + my ($pct); + if($delta_time =~ /(\d+(\.\d+)?)%/) { + # Timeout in percent + $pct = $1/100; + $delta_time = 1_000_000; + } + $delta_time = ::multiply_time_units($delta_time); + + return bless { + 'queue' => [], + 'delta_time' => $delta_time, + 'pct' => $pct, + 'remedian_idx' => 0, + 'remedian_arr' => [], + 'remedian' => undef, + }, ref($class) || $class; +} + +sub delta_time($) { + my $self = shift; + return $self->{'delta_time'}; +} + +sub set_delta_time($$) { + my $self = shift; + $self->{'delta_time'} = shift; +} + +sub remedian($) { + my $self = shift; + return $self->{'remedian'}; +} + +sub set_remedian($$) { + # Set median of the last 999^3 (=997002999) values using Remedian + # + # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A + # robust averaging method for large data sets." Journal of the + # American Statistical Association 85.409 (1990): 97-104. + my $self = shift; + my $val = shift; + my $i = $self->{'remedian_idx'}++; + my $rref = $self->{'remedian_arr'}; + $rref->[0][$i%999] = $val; + $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2]; + $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2]; + $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2]; +} + +sub update_median_runtime($) { + # Update delta_time based on runtime of finished job if timeout is + # a percentage + my $self = shift; + my $runtime = shift; + if($self->{'pct'}) { + $self->set_remedian($runtime); + $self->{'delta_time'} = $self->{'pct'} * $self->remedian(); + ::debug("run", "Timeout: $self->{'delta_time'}s "); + } +} + +sub process_timeouts($) { + # Check if there was a timeout + my $self = shift; + # $self->{'queue'} is sorted by start time + while (@{$self->{'queue'}}) { + my $job = $self->{'queue'}[0]; + if($job->endtime()) { + # Job already finished. No need to timeout the job + # This could be because of --keep-order + shift @{$self->{'queue'}}; + } elsif($job->is_timedout($self->{'delta_time'})) { + # Need to shift off queue before kill + # because kill calls usleep that calls process_timeouts + shift @{$self->{'queue'}}; + ::warning("This job was killed because it timed out:", + $job->replaced()); + $job->kill(); + } else { + # Because they are sorted by start time the rest are later + last; + } + } +} + +sub insert($) { + my $self = shift; + my $in = shift; + push @{$self->{'queue'}}, $in; +} + + +package SQL; + +sub new($) { + my $class = shift; + my $dburl = shift; + $Global::use{"DBI"} ||= eval "use DBI; 1;"; + # +DBURL = append to this DBURL + my $append = $dburl=~s/^\+//; + my %options = parse_dburl(get_alias($dburl)); + my %driveralias = ("sqlite" => "SQLite", + "sqlite3" => "SQLite", + "pg" => "Pg", + "postgres" => "Pg", + "postgresql" => "Pg", + "csv" => "CSV", + "oracle" => "Oracle", + "ora" => "Oracle"); + my $driver = $driveralias{$options{'databasedriver'}} || + $options{'databasedriver'}; + my $database = $options{'database'}; + my $host = $options{'host'} ? ";host=".$options{'host'} : ""; + my $port = $options{'port'} ? ";port=".$options{'port'} : ""; + my $dsn = "DBI:$driver:dbname=$database$host$port"; + my $userid = $options{'user'}; + my $password = $options{'password'};; + if(not grep /$driver/, DBI->available_drivers) { + ::error("$driver not supported. Are you missing a perl DBD::$driver module?"); + ::wait_and_exit(255); + } + my $dbh; + if($driver eq "CSV") { + # CSV does not use normal dsn + if(-d $database) { + $dbh = DBI->connect("dbi:CSV:", "", "", { f_dir => "$database", }) + or die $DBI::errstr; + } else { + ::error("$database is not a directory."); + ::wait_and_exit(255); + } + } else { + $dbh = DBI->connect($dsn, $userid, $password, + { RaiseError => 1, AutoInactiveDestroy => 1 }) + or die $DBI::errstr; + } + $dbh->{'PrintWarn'} = $Global::debug || 0; + $dbh->{'PrintError'} = $Global::debug || 0; + $dbh->{'RaiseError'} = 1; + $dbh->{'ShowErrorStatement'} = 1; + $dbh->{'HandleError'} = sub {}; + if(not defined $options{'table'}) { + ::error("The DBURL ($dburl) must contain a table."); + ::wait_and_exit(255); + } + if($options{'table'} =~ m:/:) { + ::error("The table name ($options{'table'}) cannot contain /"); + ::wait_and_exit(255); + } + + return bless { + 'dbh' => $dbh, + 'driver' => $driver, + 'max_number_of_args' => undef, + 'table' => $options{'table'}, + 'append' => $append, + }, ref($class) || $class; +} + +# Prototype forwarding +sub get_alias($); +sub get_alias($) { + my $alias = shift; + $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql: + if ($alias !~ /^:/) { + return $alias; + } + + # Find the alias + my $path; + if (-l $0) { + ($path) = readlink($0) =~ m|^(.*)/|; + } else { + ($path) = $0 =~ m|^(.*)/|; + } + + my @deprecated = ("$ENV{HOME}/.dburl.aliases", + "$path/dburl.aliases", "$path/dburl.aliases.dist"); + for (@deprecated) { + if(-r $_) { + ::warning("$_ is deprecated. ". + "Use .sql/aliases instead (read man sql)."); + } + } + my @urlalias=(); + check_permissions("$ENV{HOME}/.sql/aliases"); + check_permissions("$ENV{HOME}/.dburl.aliases"); + my @search = ("$ENV{HOME}/.sql/aliases", + "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases", + "$path/dburl.aliases", "$path/dburl.aliases.dist"); + for my $alias_file (@search) { + # local $/ needed if -0 set + local $/ = "\n"; + if(-r $alias_file) { + my $in = ::open_or_exit("<",$alias_file); + push @urlalias, <$in>; + close $in; + } + } + my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/; + # If we saw this before: we have an alias loop + if(grep {$_ eq $alias_part } @Private::seen_aliases) { + ::error("$alias_part is a cyclic alias."); + exit -1; + } else { + push @Private::seen_aliases, $alias_part; + } + + my $dburl; + for (@urlalias) { + /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; } + } + + if($dburl) { + return get_alias($dburl.$rest); + } else { + ::error("$alias is not defined in @search"); + exit(-1); + } +} + +sub check_permissions($) { + my $file = shift; + + if(-e $file) { + if(not -o $file) { + my $username = (getpwuid($<))[0]; + ::warning("$file should be owned by $username: ". + "chown $username $file"); + } + my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, + $atime,$mtime,$ctime,$blksize,$blocks) = stat($file); + if($mode & 077) { + my $username = (getpwuid($<))[0]; + ::warning("$file should be only be readable by $username: ". + "chmod 600 $file"); + } + } +} + +sub parse_dburl($) { + sub undef_if_empty { + if(defined($_[0]) and $_[0] eq "") { + return undef; + } + return $_[0]; + } + my $url = shift; + my %options = (); + # sql:mysql://[[user][:password]@][host][:port]/[database[/table][?query]] + + if($url=~m!^(?:sql:)? # You can prefix with 'sql:' + ((?:oracle|ora|mysql|pg|postgres|postgresql|influx|influxdb)(?:s|ssl|)| + (?:sqlite|sqlite2|sqlite3|csv)):// # Databasedriver ($1) + (?: + ([^:@/][^:@]*|) # Username ($2) + (?: + :([^@]*) # Password ($3) + )? + @)? + ([^:/]*)? # Hostname ($4) + (?: + : + ([^/]*)? # Port ($5) + )? + (?: + / + ([^?]*)? # Database ($6) + )? + (?: + / + ([^?/]*)? # Table ($7) + ) + (?: + \? + (.*)? # Query ($8) + )? + $!ix) { + $options{databasedriver} = undef_if_empty(lc(uri_unescape($1))); + $options{user} = undef_if_empty(uri_unescape($2)); + $options{password} = undef_if_empty(uri_unescape($3)); + $options{host} = undef_if_empty(uri_unescape($4)); + $options{port} = undef_if_empty(uri_unescape($5)); + $options{database} = undef_if_empty(uri_unescape($6)) + || $options{user} || $ENV{'USER'}; + $options{table} = undef_if_empty(uri_unescape($7)) + || $options{user} || $ENV{'USER'}; + $options{query} = undef_if_empty(uri_unescape($8)); + ::debug("sql", "dburl $url\n"); + ::debug("sql", "databasedriver ", $options{databasedriver}, + " user ", $options{user}, + " password ", $options{password}, " host ", $options{host}, + " port ", $options{port}, " database ", $options{database}, + " table ", $options{table}, " query ", $options{query}, "\n"); + } else { + ::error("$url is not a valid DBURL"); + exit 255; + } + return %options; +} + +sub uri_unescape($) { + # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm + # to avoid depending on URI::Escape + # This section is (C) Gisle Aas. + # Note from RFC1630: "Sequences which start with a percent sign + # but are not followed by two hexadecimal characters are reserved + # for future extension" + my $str = shift; + if (@_ && wantarray) { + # not executed for the common case of a single argument + my @str = ($str, @_); # need to copy + foreach (@str) { + s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; + } + return @str; + } + $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str; + $str; +} + +sub run($) { + my $self = shift; + my $stmt = shift; + if($self->{'driver'} eq "CSV") { + $stmt=~ s/;$//; + if($stmt eq "BEGIN" or + $stmt eq "COMMIT") { + return undef; + } + } + my @retval; + my $dbh = $self->{'dbh'}; + ::debug("sql","$opt::sqlmaster$opt::sqlworker run $stmt\n"); + # Execute with the rest of the args - if any + my $rv; + my $sth; + my $lockretry = 0; + while($lockretry < 10) { + $sth = $dbh->prepare($stmt); + if($sth + and + eval { $rv = $sth->execute(@_) }) { + last; + } else { + if($@ =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/ + or + $DBI::errstr =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/) { + # This is fine: + # It is just a worker that reported back too late - + # another worker had finished the job first + # and the table was then dropped + $rv = $sth = 0; + last; + } + if($DBI::errstr =~ /locked/) { + ::debug("sql", "Lock retry: $lockretry"); + $lockretry++; + ::usleep(rand()*300); + } elsif(not $sth) { + # Try again + $lockretry++; + } else { + ::error($DBI::errstr); + ::wait_and_exit(255); + } + } + } + if($lockretry >= 10) { + ::die_bug("retry > 10: $DBI::errstr"); + } + if($rv < 0 and $DBI::errstr){ + ::error($DBI::errstr); + ::wait_and_exit(255); + } + return $sth; +} + +sub get($) { + my $self = shift; + my $sth = $self->run(@_); + my @retval; + # If $sth = 0 it means the table was dropped by another process + while($sth) { + my @row = $sth->fetchrow_array(); + @row or last; + push @retval, \@row; + } + return \@retval; +} + +sub table($) { + my $self = shift; + return $self->{'table'}; +} + +sub append($) { + my $self = shift; + return $self->{'append'}; +} + +sub update($) { + my $self = shift; + my $stmt = shift; + my $table = $self->table(); + $self->run("UPDATE $table $stmt",@_); +} + +sub output($) { + my $self = shift; + my $commandline = shift; + + $self->update("SET Stdout = ?, Stderr = ? WHERE Seq = ". + $commandline->seq(), + join("",@{$commandline->{'output'}{1}}), + join("",@{$commandline->{'output'}{2}})); +} + +sub max_number_of_args($) { + # Maximal number of args for this table + my $self = shift; + if(not $self->{'max_number_of_args'}) { + # Read the number of args from the SQL table + my $table = $self->table(); + my $v = $self->get("SELECT * FROM $table LIMIT 1;"); + my @reserved_columns = qw(Seq Host Starttime JobRuntime Send + Receive Exitval _Signal Command Stdout Stderr); + if(not $v) { + ::error("$table contains no records"); + } + # Count the number of Vx columns + $self->{'max_number_of_args'} = $#{$v->[0]} - $#reserved_columns; + } + return $self->{'max_number_of_args'}; +} + +sub set_max_number_of_args($$) { + my $self = shift; + $self->{'max_number_of_args'} = shift; +} + +sub create_table($) { + my $self = shift; + if($self->append()) { return; } + my $max_number_of_args = shift; + $self->set_max_number_of_args($max_number_of_args); + my $table = $self->table(); + $self->run(qq(DROP TABLE IF EXISTS $table;)); + # BIGINT and TEXT are not supported in these databases or are too small + my %vartype = ( + "Oracle" => { "BIGINT" => "NUMBER(19,0)", + "TEXT" => "CLOB", }, + "mysql" => { "TEXT" => "BLOB", }, + "CSV" => { "BIGINT" => "INT", + "FLOAT" => "REAL", }, + ); + my $BIGINT = $vartype{$self->{'driver'}}{"BIGINT"} || "BIGINT"; + my $TEXT = $vartype{$self->{'driver'}}{"TEXT"} || "TEXT"; + my $FLOAT = $vartype{$self->{'driver'}}{"FLOAT"} || "FLOAT(44)"; + my $v_def = join "", map { "V$_ $TEXT," } (1..$self->max_number_of_args()); + $self->run(qq{CREATE TABLE $table + (Seq $BIGINT, + Host $TEXT, + Starttime $FLOAT, + JobRuntime $FLOAT, + Send $BIGINT, + Receive $BIGINT, + Exitval $BIGINT, + _Signal $BIGINT, + Command $TEXT,}. + $v_def. + qq{Stdout $TEXT, + Stderr $TEXT);}); +} + +sub insert_records($) { + my $self = shift; + my $seq = shift; + my $command_ref = shift; + my $record_ref = shift; + my $table = $self->table(); + # For SQL encode the command with \177 space as split points + my $command = join("\177 ",@$command_ref); + my @v_cols = map { ", V$_" } (1..$self->max_number_of_args()); + # Two extra value due to $seq, Exitval, Send + my $v_vals = join ",", map { "?" } (1..$self->max_number_of_args()+4); + $self->run("INSERT INTO $table (Seq,Command,Exitval,Send @v_cols) ". + "VALUES ($v_vals);", $seq, $command, -1000, + 0, @$record_ref[1..$#$record_ref]); +} + + +sub get_record($) { + my $self = shift; + my @retval; + my $table = $self->table(); + my @v_cols = map { ", V$_" } (1..$self->max_number_of_args()); + my $rand = "Reserved-".$$.rand(); + my $v; + my $more_pending; + + do { + if($self->{'driver'} eq "CSV") { + # Sub SELECT is not supported in CSV + # So to minimize the race condition below select a job at random + my $r = $self->get("SELECT Seq, Command @v_cols FROM $table ". + "WHERE Exitval = -1000 LIMIT 100;"); + $v = [ sort { rand() > 0.5 } @$r ]; + } else { + # Avoid race condition where multiple workers get the same job + # by setting Stdout to a unique string + # (SELECT * FROM (...) AS dummy) is needed due to sillyness in MySQL + $self->update("SET Stdout = ?,Exitval = ? ". + "WHERE Seq = (". + " SELECT * FROM (". + " SELECT min(Seq) FROM $table WHERE Exitval = -1000". + " ) AS dummy". + ") AND Exitval = -1000;", $rand, -1210); + # If a parallel worker overwrote the unique string this will get nothing + $v = $self->get("SELECT Seq, Command @v_cols FROM $table ". + "WHERE Stdout = ?;", $rand); + } + if($v->[0]) { + my $val_ref = $v->[0]; + # Mark record as taken + my $seq = shift @$val_ref; + # Save the sequence number to use when running the job + $SQL::next_seq = $seq; + $self->update("SET Exitval = ? WHERE Seq = ".$seq, -1220); + # Command is encoded with '\177 space' as delimiter + my @command; + my $encoded_cmd = shift @$val_ref; + if($encoded_cmd =~ /\177 /) { + @command = split /\177 /, $encoded_cmd; + } else { + # (\257 space for backward compatability) + @command = split /\257 /, $encoded_cmd; + # Recode \257 => \177 + for (@command,@$val_ref) { + s/\257/\177>/g; + s/\257\256/\257/g; + } + } + $SQL::command_ref = \@command; + for (@$val_ref) { + push @retval, Arg->new($_); + } + } else { + # If the record was updated by another job in parallel, + # then we may not be done, so see if there are more jobs pending + $more_pending = + $self->get("SELECT Seq FROM $table WHERE Exitval = ?;", -1210); + } + } while (not $v->[0] and $more_pending->[0]); + + if(@retval) { + return \@retval; + } else { + return undef; + } +} + +sub total_jobs($) { + my $self = shift; + my $table = $self->table(); + my $v = $self->get("SELECT count(*) FROM $table;"); + if($v->[0]) { + return $v->[0]->[0]; + } else { + ::die_bug("SQL::total_jobs"); + } +} + +sub max_seq($) { + my $self = shift; + my $table = $self->table(); + my $v = $self->get("SELECT max(Seq) FROM $table;"); + if($v->[0]) { + return $v->[0]->[0]; + } else { + ::die_bug("SQL::max_seq"); + } +} + +sub finished($) { + # Check if there are any jobs left in the SQL table that do not + # have a "real" exitval + my $self = shift; + if($opt::wait or $Global::start_sqlworker) { + my $table = $self->table(); + my $rv = $self->get("select Seq,Exitval from $table ". + "where Exitval <= -1000 limit 1"); + return not $rv->[0]; + } else { + return 1; + } +} + +package Semaphore; + +# This package provides a counting semaphore +# +# If a process dies without releasing the semaphore the next process +# that needs that entry will clean up dead semaphores +# +# The semaphores are stored in $PARALLEL_HOME/semaphores/id- Each +# file in $PARALLEL_HOME/semaphores/id-/ is the process ID of the +# process holding the entry. If the process dies, the entry can be +# taken by another process. + +sub new($) { + my $class = shift; + my $id = shift; + my $count = shift; + $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex + $id = "id-".$id; # To distinguish it from a process id + my $parallel_locks = $Global::cache_dir . "/semaphores"; + -d $parallel_locks or ::mkdir_or_die($parallel_locks); + my $lockdir = "$parallel_locks/$id"; + my $lockfile = $lockdir.".lock"; + if(-d $parallel_locks and -w $parallel_locks + and -r $parallel_locks and -x $parallel_locks) { + # skip + } else { + ::error("Semaphoredir must be writable: '$parallel_locks'"); + ::wait_and_exit(255); + } + + if($count < 1) { ::die_bug("semaphore-count: $count"); } + return bless { + 'lockfile' => $lockfile, + 'lockfh' => Symbol::gensym(), + 'lockdir' => $lockdir, + 'id' => $id, + 'idfile' => $lockdir."/".$id, + 'pid' => $$, + 'pidfile' => $lockdir."/".$$.'@'.::hostname(), + 'count' => $count + 1 # nlinks returns a link for the 'id-' as well + }, ref($class) || $class; +} + +sub remove_dead_locks($) { + my $self = shift; + my $lockdir = $self->{'lockdir'}; + + for my $d (glob "$lockdir/*") { + $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next; + my ($pid, $host) = ($1, $2); + if($host eq ::hostname()) { + if(kill 0, $pid) { + ::debug("sem", "Alive: $pid $d\n"); + } else { + ::debug("sem", "Dead: $d\n"); + ::rm($d); + } + } + } +} + +sub acquire($) { + my $self = shift; + my $sleep = 1; # 1 ms + my $start_time = time; + while(1) { + # Can we get a lock? + $self->atomic_link_if_count_less_than() and last; + $self->remove_dead_locks(); + # Retry slower and slower up to 1 second + $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); + # Random to avoid every sleeping job waking up at the same time + ::usleep(rand()*$sleep); + if($opt::semaphoretimeout) { + if($opt::semaphoretimeout > 0 + and + time - $start_time > $opt::semaphoretimeout) { + # Timeout: Take the semaphore anyway + ::warning("Semaphore timed out. Stealing the semaphore."); + if(not -e $self->{'idfile'}) { + open (my $fh, ">", $self->{'idfile'}) or + ::die_bug("timeout_write_idfile: $self->{'idfile'}"); + close $fh; + } + link $self->{'idfile'}, $self->{'pidfile'}; + last; + } + if($opt::semaphoretimeout < 0 + and + time - $start_time > -$opt::semaphoretimeout) { + # Timeout: Exit + ::warning("Semaphore timed out. Exiting."); + exit(1); + last; + } + } + } + ::debug("sem", "acquired $self->{'pid'}\n"); +} + +sub release($) { + my $self = shift; + ::rm($self->{'pidfile'}); + if($self->nlinks() == 1) { + # This is the last link, so atomic cleanup + $self->lock(); + if($self->nlinks() == 1) { + ::rm($self->{'idfile'}); + rmdir $self->{'lockdir'}; + } + $self->unlock(); + } + ::debug("run", "released $self->{'pid'}\n"); +} + +sub pid_change($) { + # This should do what release()+acquire() would do without having + # to re-acquire the semaphore + my $self = shift; + + my $old_pidfile = $self->{'pidfile'}; + $self->{'pid'} = $$; + $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname(); + my $retval = link $self->{'idfile'}, $self->{'pidfile'}; + ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n"); + ::rm($old_pidfile); +} + +sub atomic_link_if_count_less_than($) { + # Link $file1 to $file2 if nlinks to $file1 < $count + my $self = shift; + my $retval = 0; + $self->lock(); + my $nlinks = $self->nlinks(); + ::debug("sem","$nlinks<$self->{'count'} "); + if($nlinks < $self->{'count'}) { + -d $self->{'lockdir'} or ::mkdir_or_die($self->{'lockdir'}); + if(not -e $self->{'idfile'}) { + open (my $fh, ">", $self->{'idfile'}) or + ::die_bug("write_idfile: $self->{'idfile'}"); + close $fh; + } + $retval = link $self->{'idfile'}, $self->{'pidfile'}; + ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n"); + } + $self->unlock(); + ::debug("sem", "atomic $retval"); + return $retval; +} + +sub nlinks($) { + my $self = shift; + if(-e $self->{'idfile'}) { + return (stat(_))[3]; + } else { + return 0; + } +} + +sub lock($) { + my $self = shift; + my $sleep = 100; # 100 ms + my $total_sleep = 0; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $locked = 0; + while(not $locked) { + if(tell($self->{'lockfh'}) == -1) { + # File not open + open($self->{'lockfh'}, ">", $self->{'lockfile'}) + or ::debug("run", "Cannot open $self->{'lockfile'}"); + } + if($self->{'lockfh'}) { + # File is open + chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw + if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) { + # The file is locked: No need to retry + $locked = 1; + last; + } else { + if ($! =~ m/Function not implemented/) { + ::warning("flock: $!", + "Will wait for a random while."); + ::usleep(rand(5000)); + # File cannot be locked: No need to retry + $locked = 2; + last; + } + } + } + # Locking failed in first round + # Sleep and try again + $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); + # Random to avoid every sleeping job waking up at the same time + ::usleep(rand()*$sleep); + $total_sleep += $sleep; + if($opt::semaphoretimeout) { + if($opt::semaphoretimeout > 0 + and + $total_sleep/1000 > $opt::semaphoretimeout) { + # Timeout: Take the semaphore anyway + ::warning("Semaphore timed out. Taking the semaphore."); + $locked = 3; + last; + } + if($opt::semaphoretimeout < 0 + and + $total_sleep/1000 > -$opt::semaphoretimeout) { + # Timeout: Exit + ::warning("Semaphore timed out. Exiting."); + $locked = 4; + last; + } + } else { + if($total_sleep/1000 > 30) { + ::warning("Semaphore stuck for 30 seconds. ". + "Consider using --semaphoretimeout."); + } + } + } + ::debug("run", "locked $self->{'lockfile'}"); +} + +sub unlock($) { + my $self = shift; + ::rm($self->{'lockfile'}); + close $self->{'lockfh'}; + ::debug("run", "unlocked\n"); +} + +# Keep perl -w happy + +$opt::x = $Semaphore::timeout = $Semaphore::wait = +$Job::file_descriptor_warning_printed = $Global::envdef = @Arg::arg = +$Global::max_slot_number = $opt::session; + +package main; + + +sub main() { + unpack_combined_executable(); + save_stdin_stdout_stderr(); + save_original_signal_handler(); + parse_options(); + ::debug("init", "Open file descriptors: ", join(" ",keys %Global::fh), "\n"); + my $number_of_args; + if($Global::max_number_of_args) { + $number_of_args = $Global::max_number_of_args; + } elsif ($opt::X or $opt::m or $opt::xargs) { + $number_of_args = undef; + } else { + $number_of_args = 1; + } + + my @command = @ARGV; + my @input_source_fh; + if($opt::pipepart) { + if($opt::tee) { + @input_source_fh = map { open_or_exit("<",$_) } @opt::a; + # Remove the first: It will be the file piped. + shift @input_source_fh; + if(not @input_source_fh and not $opt::pipe) { + @input_source_fh = (*STDIN); + } + } else { + # -a is used for data - not for command line args + @input_source_fh = map { open_or_exit("<",$_) } "/dev/null"; + } + } else { + @input_source_fh = map { open_or_exit("<",$_) } @opt::a; + if(not @input_source_fh and not $opt::pipe) { + @input_source_fh = (*STDIN); + } + } + + if($opt::skip_first_line) { + # Skip the first line for the first file handle + my $fh = $input_source_fh[0]; + <$fh>; + } + + set_input_source_header(\@command,\@input_source_fh); + if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) { + # Parallel check all hosts are up. Remove hosts that are down + filter_hosts(); + } + if($opt::sqlmaster and $opt::sqlworker) { + # Start a real --sqlworker in the background later + $Global::start_sqlworker = 1; + $opt::sqlworker = undef; + } + + $Global::start_time = ::now(); + if($opt::nonall or $opt::onall) { + onall(\@input_source_fh,@command); + wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); + } + + $Global::JobQueue = JobQueue->new( + \@command, \@input_source_fh, $Global::ContextReplace, + $number_of_args, \@Global::transfer_files, \@Global::ret_files, + \@Global::template_names, \@Global::template_contents + ); + + if($opt::sqlmaster) { + # Create SQL table to hold joblog + output + # Figure out how many arguments are in a job + # (It is affected by --colsep, -N, $number_source_fh) + my $record_queue = $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}; + my $record = $record_queue->get(); + my $no_of_values = $number_of_args * (1+$#{$record}); + $record_queue->unget($record); + $Global::sql->create_table($no_of_values); + if($opt::sqlworker) { + # Start a real --sqlworker in the background later + $Global::start_sqlworker = 1; + $opt::sqlworker = undef; + } + } + + if($opt::pipepart) { + pipepart_setup(); + } elsif($opt::pipe) { + if($opt::tee) { + pipe_tee_setup(); + } elsif($opt::shard or $opt::bin) { + pipe_shard_setup(); + } elsif($opt::groupby) { + pipe_group_by_setup(); + } + } + + if($opt::eta or $opt::bar or $opt::shuf or $Global::halt_pct) { + # Count the number of jobs or shuffle all jobs + # before starting any. + # Must be done after ungetting any --pipepart jobs. + $Global::JobQueue->total_jobs(); + } + # Compute $Global::max_jobs_running + # Must be done after ungetting any --pipepart jobs. + max_jobs_running(); + init_run_jobs(); + my $sem; + if($Global::semaphore) { + $sem = acquire_semaphore(); + } + $SIG{TERM} = $Global::original_sig{TERM}; + $SIG{HUP} = \&start_no_new_jobs; + + if($opt::progress) { + ::status_no_nl(init_progress()); + } + if($opt::tee or $opt::shard or $opt::bin) { + # All jobs must be running in parallel for --tee/--shard/--bin + while(start_more_jobs()) {} + $Global::start_no_new_jobs = 1; + if(not $Global::JobQueue->empty()) { + if($opt::tee) { + ::error("--tee requires --jobs to be higher. Try --jobs 0."); + } elsif($opt::bin) { + ::error("--bin requires --jobs to be higher than the number of", + "arguments. Increase --jobs."); + } elsif($opt::shard) { + ::error("--shard requires --jobs to be higher than the number of", + "arguments. Increase --jobs."); + } else { + ::die_bug("--bin/--shard/--tee should not get here"); + } + ::wait_and_exit(255); + } + } elsif($opt::pipe and not $opt::pipepart and not $opt::semaphore) { + # Fill all jobslots + while(start_more_jobs()) {} + spreadstdin(); + } elsif($opt::fast) { + fast(); + } else { + # Reap the finished jobs and start more + while(reapers() + start_more_jobs()) {} + } + ::debug("init", "Start draining\n"); + drain_job_queue(@command); + ::debug("init", "Done draining\n"); + reapers(); + ::debug("init", "Done reaping\n"); + if($Global::semaphore) { $sem->release(); } + cleanup(); + ::debug("init", "Halt\n"); + halt(); +} + +main(); diff --git a/tools/ci/inria/main b/tools/ci/inria/main index e3d2387d892e..b222c491782b 100755 --- a/tools/ci/inria/main +++ b/tools/ci/inria/main @@ -181,11 +181,13 @@ case "${OCAML_ARCH}" in cleanup=true check_make_alldepend=true export OCAMLTEST_SKIP_TESTS="$memory_model_tests" + init_submodule_flexdll=true ;; cygwin64) cleanup=true check_make_alldepend=true export OCAMLTEST_SKIP_TESTS="$memory_model_tests" + init_submodule_flexdll=true ;; mingw) build='--build=i686-pc-cygwin' diff --git a/tools/objinfo.ml b/tools/objinfo.ml index 5557cd7e4e46..c4c523d173ee 100644 --- a/tools/objinfo.ml +++ b/tools/objinfo.ml @@ -35,6 +35,8 @@ let uid_deps = ref false module Magic_number = Misc.Magic_number +let yesno_of_bool oc b = output_string oc (if b then "YES" else "no") + let dummy_crc = String.make 32 '-' let null_crc = String.make 32 '0' @@ -67,13 +69,19 @@ let print_cmo_infos cu = printf "YES\n"; printf "Primitives declared in this module:\n"; List.iter print_line l); - printf "Force link: %s\n" (if cu.cu_force_link then "YES" else "no") + printf "Force link: %a\n" yesno_of_bool cu.cu_force_link let print_spaced_string s = printf " %s" s +let dllib (suffixed, name) = + if suffixed then + Printf.sprintf "%s--" name + else + name + let print_cma_infos (lib : Cmo_format.library) = - printf "Force custom: %s\n" (if lib.lib_custom then "YES" else "no"); + printf "Force custom: %a\n" yesno_of_bool lib.lib_custom; printf "Extra C object files:"; (* PR#4949: print in linking order *) List.iter print_spaced_string (List.rev lib.lib_ccobjs); @@ -81,7 +89,7 @@ let print_cma_infos (lib : Cmo_format.library) = List.iter print_spaced_string (List.rev lib.lib_ccopts); printf "\n"; print_string "Extra dynamically-loaded libraries:"; - List.iter print_spaced_string (List.rev lib.lib_dllibs); + List.iter print_spaced_string (List.rev_map dllib lib.lib_dllibs); printf "\n"; List.iter print_cmo_infos lib.lib_units @@ -249,11 +257,13 @@ let print_cmx_infos (ui, crc) = printf "Currying functions:%a\n" pr_funs ui.ui_curry_fun; printf "Apply functions:%a\n" pr_funs ui.ui_apply_fun; printf "Send functions:%a\n" pr_funs ui.ui_send_fun; - printf "Force link: %s\n" (if ui.ui_force_link then "YES" else "no"); + printf "Force link: %a\n" yesno_of_bool ui.ui_force_link; printf "For pack: %s\n" (match ui.ui_for_pack with | None -> "no" - | Some pack -> "YES: " ^ pack) + | Some pack -> "YES: " ^ pack); + printf + "Requires caml_standard_library_nat: %a\n" yesno_of_bool ui.ui_need_stdlib let print_cmxa_infos (lib : Cmx_format.library_infos) = printf "Extra C object files:"; @@ -288,9 +298,57 @@ let p_list title print = function p_title title; List.iter print l +let p_runtime_id ({Misc.RuntimeID.dev; release; no_flat_float_array; fp; tsan; + int31; static; no_compression; ansi; reserved} as t) = + let version = + if release > Config.release_number then + "" + else + if release = 0 then + " (Objective Caml 3.12)" + else if release < 16 then + Printf.sprintf " (OCaml 4.%02d)" (release - 1) + else + Printf.sprintf " (OCaml 5.%d)" (release - 16) + in + printf "\t%s = Release %d%s%s\n" + (Misc.RuntimeID.to_string t) + release version (if dev then " - development/altered version" else ""); + if reserved > 0 then + printf "\t - %d reserved header bit%s\n" + reserved (if reserved = 1 then "" else "s"); + if no_flat_float_array then + printf "\t - Flat float array representation disabled\n"; + if fp then + printf "\t - Frame pointers enabled\n"; + if tsan then + printf "\t - TSAN enabled\n"; + if int31 then + printf "\t - Compiled without 64-bit support\n"; + if static then + printf "\t - Compiled without support dynamic loading\n"; + if no_compression then + printf "\t - Compiled without support for compressed marshalling\n"; + if ansi then + printf "\t - Windows Unicode support disabled\n" + +let p_runtime (runtime, id, search) = + let runtime = + let some id = runtime ^ "-" ^ Misc.RuntimeID.to_string id in + Option.fold ~none:runtime ~some id + in + let runtime = + match search with + | Byterntm.Search -> runtime + | Byterntm.Absolute dir -> dir ^ runtime + | Byterntm.Absolute_then_search dir -> Printf.sprintf "[%s]%s" dir runtime + in + printf "Runtime:\n\t%s\n" runtime; + Option.iter p_runtime_id id + let dump_byte ic = let toc = Bytesections.read_toc ic in - let all = Bytesections.all toc in + Option.iter p_runtime (Byterntm.read_runtime toc ic); List.iter (fun {Bytesections.name = section; len; _} -> try @@ -317,10 +375,19 @@ let dump_byte ic = | SYMB -> let symb = Bytesections.read_section_struct toc ic section in print_global_table symb + | ORUN -> + let ocamlrunparam = + Bytesections.read_section_string toc ic section in + printf "Overridden OCAMLRUNPARAM defaults: %s\n" ocamlrunparam + | OSLD -> + let caml_standard_library_default = + Bytesections.read_section_string toc ic section in + printf "caml_standard_library_default: %s\n" + caml_standard_library_default | _ -> () with _ -> () ) - all + (Bytesections.all toc) let find_dyn_offset filename = match Binutils.read filename with diff --git a/tools/ocamlmklib.ml b/tools/ocamlmklib.ml index 80d0c4c9f33b..05bba3ad4681 100644 --- a/tools/ocamlmklib.ml +++ b/tools/ocamlmklib.ml @@ -25,10 +25,8 @@ let mklib out files opts = Printf.sprintf "link -lib -nologo %s-out:%s %s %s" machine out opts files else Printf.sprintf "%s rcs %s %s %s" Config.ar out opts files -(* PR#4783: under Windows, don't use absolute paths because we do - not know where the binary distribution will be installed. *) let compiler_path name = - if Sys.os_type = "Win32" then name else Filename.concat Config.bindir name + Filename.concat Config.bindir name let bytecode_objs = ref [] (* .cmo,.cma,.ml,.mli files to pass to ocamlc *) and native_objs = ref [] (* .cmx,.ml,.mli files to pass to ocamlopt *) @@ -51,6 +49,7 @@ and output_c = ref "" (* Output name for C part of library *) and rpath = ref [] (* rpath options *) and debug = ref false (* -g option *) and verbose = ref false +and suffixed = ref false (* -suffixed option *) let starts_with s pref = String.length s >= String.length pref && @@ -164,6 +163,10 @@ let parse_arguments argv = c_opts := s :: !c_opts else if s = "-framework" then (let a = next_arg s in c_opts := a :: s :: !c_opts) + else if s = "-suffixed" then + suffixed := true + else if s = "-no-suffixed" then + suffixed := false else if starts_with s "-" then raise (Bad_argument("Unknown option " ^ s)) else @@ -210,6 +213,7 @@ Options are: -oc Generated C library is named dll.so or lib.a -rpath Same as -dllpath -R Same as -rpath + -suffixed Append runtime ID to any generated shared libraries -verbose Print commands before executing them -v same as -verbose -version Print version and exit @@ -286,11 +290,17 @@ let flexdll_dirs = let build_libs () = if !c_objs <> [] then begin if !dynlink then begin + let dllname = + if !suffixed then + Misc.RuntimeID.stubslib !output_c + else + !output_c + in let retcode = command (Printf.sprintf "%s %s -o %s %s %s %s %s %s %s" Config.mkdll (if !debug then "-g" else "") - (prepostfix "dll" !output_c Config.ext_dll) + (prepostfix "dll" dllname Config.ext_dll) (String.concat " " !c_objs) (String.concat " " !c_opts) (String.concat " " !ld_opts) @@ -308,7 +318,7 @@ let build_libs () = end; if !bytecode_objs <> [] then scommand - (sprintf "%s -a %s %s %s -o %s.cma %s %s -dllib -l%s -cclib -l%s \ + (sprintf "%s -a %s %s %s -o %s.cma %s %s -dllib%s -l%s -cclib -l%s \ %s %s %s %s" (transl_path !ocamlc) (if !debug then "-g" else "") @@ -317,6 +327,7 @@ let build_libs () = !output (String.concat " " !caml_opts) (String.concat " " !bytecode_objs) + (if !suffixed then "-suffixed" else "") (Filename.basename !output_c) (Filename.basename !output_c) (String.concat " " (prefix_list "-ccopt " !c_opts)) diff --git a/tools/ocamlsize b/tools/ocamlsize index 783068067f78..44793b877091 100755 --- a/tools/ocamlsize +++ b/tools/ocamlsize @@ -19,12 +19,25 @@ foreach $f (@ARGV) { open(FILE, $f) || die("Cannot open $f"); read(FILE, $header, 2); if ($header eq '#!') { - $path = ; - if ($path = '/bin/sh') { - # exec form of the shebang header - $path = ; + chomp($path = ); + if ($path =~ m/\/sh$/) { + # shell-script form of the shebang header + chomp($path = ); + # exec form - used for -runtime-search absolute when the path to the + # runtime isn't valid as a #! line. if ($path =~ s/^exec '(.*)' "\$0" "\$@\"$/$1/ > 0) { $path =~ s/'\\''/'/g; + # Both -runtime-search enable and -runtime-search always define a + # variable r with the name of the runtime (see bytecomp/bytelink.ml) + } elsif ($path =~ s/^r='(.*)'$/$1/ > 0) { + $path =~ s/'\\''/'/g; + chomp($dir = ); + # In -runtime-search enable, there will also be a path to the runtime + # defined the variable c. + if ($dir =~ s/^c='(.*)'"\$r"$/$1/ > 0) { + $dir =~ s/'\\''/'/g; + $path = "[$dir]$path"; + } } else { undef $path; } @@ -45,10 +58,19 @@ foreach $f (@ARGV) { } print $f, ":\n" if ($#ARGV > 0); if (not defined $path) { - $path = - $length{'RNTM'} > 0 ? - substr(&read_section('RNTM'), 0, -1) : - "(custom runtime)"; + if ($length{'RNTM'} > 0) { + $path = &read_section('RNTM'); + # RNTM is "\0ocamlrun" for -runtime-search always + if ($path !~ s/^\0//) { + # RNTM is "/path/to/ocamlrun" for -runtime-search disable and + # "/path/to\0ocamlrun" for -runtime-search enable. Transform the + # embedded "\0" into a directory separator and display the directory + # in square brackets (as above for the sh-case) + $path =~ s/^([^\/\\]*)([\\\/])([^\0]*)\0(.*)$/[$1$2$3$2]$4/ + } + } else { + $path = '(custom runtime)'; + } }; printf ("\tcode: %-7d data: %-7d symbols: %-7d debug: %-7d\n", $length{'CODE'}, $length{'DATA'}, diff --git a/tools/opam/gen_ocaml_config.ml b/tools/opam/gen_ocaml_config.ml new file mode 100644 index 000000000000..bd3358697899 --- /dev/null +++ b/tools/opam/gen_ocaml_config.ml @@ -0,0 +1,161 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* Louis Gesbert, OCamlPro *) +(* *) +(* Copyright 2017 OCamlPro SAS *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +(* For as long it remains not totally impractical to do so, this script is + written in OCaml 3.07. Its purpose is to generate an opam .config file + containing the following variables: + - native: True if ocamlopt is located with ocaml + - native-tools: True if ocamlc.opt is located with ocaml + - native-dynlink: True if dynlink.cmxa exists in -I + or -I +dynlink + - stubsdir: Content of +ld.conf in CAML_LD_LIBRARY_PATH format + - preinstalled: True if this installation is provided by the system, rather + than compiled from sources by opam + - compiler: ["system"], if [preinstalled], otherwise the version of the opam + compiler package which provided the compiler (e.g. "5.4.0"). For + largely historical reasons, custom compiler append additional + configuration information (e.g. "5.4.0+options+flambda"). This + variable should be considered deprecated and the content + unstable. *) + +(* The script must be invoked using the interpreter, for example: + ocaml gen_ocaml_config.ml 5.4.0 ocaml 5.4.0+options false +flambda + where "5.4.0" is the expected value of Sys.ocaml_version (with any additional + information removed), the resulting configuration should be written to + "ocaml.config" and the "compiler" variable should be set to + "5.4.0+options+flambda". *) +let expected_ocaml_version, + package_config_file, + compiler_package_version, + preinstalled, + option_names = + match Array.to_list Sys.argv with + | _ :: + expected_ocaml_version :: + package_config_file :: + compiler_package_version :: + preinstalled :: + options -> + expected_ocaml_version, + package_config_file ^ ".config", + compiler_package_version, + preinstalled, + String.concat "" (List.filter ((<>) "") options) + | _ -> + prerr_endline "Invalid arguments"; + exit 1 + +(* Check that Sys.ocaml_version is as expected *) +let () = + let ocaml_version = + Scanf.sscanf Sys.ocaml_version "%u.%u" (fun major minor -> + if (major, minor) > (3, 7) then + (* Strip off any additional information *) + Scanf.sscanf Sys.ocaml_version "%[^~+]" (fun x -> x) + else + Sys.ocaml_version) + in + if ocaml_version <> expected_ocaml_version then + (Printf.eprintf + "OCaml version mismatch: %s, expected %s" + ocaml_version expected_ocaml_version; + exit 1) + +(* Write the .config file *) +let () = + let binary = + let dir = Filename.dirname Sys.executable_name in + if Filename.check_suffix Sys.executable_name ".exe" then + fun name -> Filename.concat dir (name ^ ".exe") + else + fun name -> Filename.concat dir name + in + let libdir = + let exit_code = + let ocamlc = binary "ocamlc" in + let ocamlc = + if Sys.os_type = "Win32" then + if String.contains ocamlc ' ' then + "\"" ^ ocamlc ^ "\"" + else + ocamlc + else + Filename.quote ocamlc + in + Sys.command (ocamlc ^ " -where > where") in + if exit_code = 0 then + (* Must be opened in text mode for Windows *) + let ic = open_in "where" in + let r = input_line ic in + close_in ic; Sys.remove "where"; r + else begin + Printf.eprintf "Unexpected exit code %d from `ocamlc -where'\n" exit_code; + exit 1 + end + in + let native = Sys.file_exists (binary "ocamlopt") in + let native_tools = Sys.file_exists (binary "ocamlc.opt") in + let native_dynlink = + let check_dir libdir = + Sys.file_exists (Filename.concat libdir "dynlink.cmxa") + in + List.exists check_dir [Filename.concat libdir "dynlink"; libdir] + in + let stubsdir = + let ld_conf = Filename.concat libdir "ld.conf" in + if Sys.file_exists ld_conf then + let separator = if Sys.os_type = "Win32" then ";" else ":" in + let ic = open_in ld_conf in + let rec input_lines acc = + try + let line = input_line ic in + let line = + if line = Filename.current_dir_name then + libdir + else if line = Filename.parent_dir_name then + Filename.concat libdir line + else + Scanf.sscanf line "%[.]%1[/\\]" (fun prefix separator -> + if separator <> "" then + if prefix = Filename.current_dir_name then + let line = String.sub line 2 (String.length line - 2) in + Filename.concat libdir line + else if prefix = Filename.parent_dir_name then + Filename.concat libdir line + else + line + else + line) + in + input_lines (line::acc) + with End_of_file -> + close_in ic; List.rev acc + in + String.concat separator (input_lines []) + else + "" + in + let oc = open_out package_config_file in + (* Quoted strings need OCaml 4.02; "\ " needs OCaml 3.09! *) + Printf.fprintf oc "\ + opam-version: \"2.0\"\n\ + variables {\n \ + native: %b\n \ + native-tools: %b\n \ + native-dynlink: %b\n \ + stubsdir: %S\n \ + preinstalled: %s\n \ + compiler: \"%s%s\"\n\ + }\n" native native_tools native_dynlink stubsdir preinstalled + compiler_package_version option_names; + close_out oc diff --git a/tools/opam/generate.ml b/tools/opam/generate.ml new file mode 100644 index 000000000000..b392feb1a6ac --- /dev/null +++ b/tools/opam/generate.ml @@ -0,0 +1,221 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* David Allsopp, University of Cambridge & Tarides *) +(* *) +(* Copyright 2025 David Allsopp Ltd. *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +(* This script is called from the root of the repository at the end of + `make INSTALL_MODE= install` and is responsible for converting + the various files generated by the installation backend into final output. + Parameters are the following Makefile variables: + $1 = $(INSTALL_MODE) (opam or clone) + $2 = $(OPAM_PACKAGE_NAME) + $3 = $(LN) *) + +let exit_because fmt = Printf.ksprintf (fun s -> prerr_endline s; exit 1) fmt + +let () = + if Array.length Sys.argv <> 4 + || Sys.argv.(1) <> "clone" && Sys.argv.(1) <> "opam" then begin + exit_because "Invalid command line arguments" + end + +let mode = Sys.argv.(1) +let package = Sys.argv.(2) +let ln_command = Sys.argv.(3) + +let output_endline oc = Printf.kfprintf (fun oc -> output_char oc '\n') oc + +(* [generate_file file] processes then erases opam-bin, opam-lib opam-libexec + and opam-man to produce [file] *) +let write_install_lines oc file = + In_channel.with_open_text file @@ + In_channel.fold_lines (fun _ -> output_endline oc " %s") () + +let output_section oc section = + let file = "opam-" ^ section in + if Sys.file_exists file then begin + let section = + if section = "lib" || section = "libexec" then + section ^ "_root" + else + section + in + output_endline oc {|%s: [ +%a]|} section write_install_lines file; + Sys.remove file + end + +let generate_install file = + Out_channel.with_open_text file @@ fun oc -> + List.iter (output_section oc) ["bin"; "lib"; "libexec"; "man"]; + output_endline oc {|share_root: [ + "config.cache" {"ocaml/config.cache"} + "config.status" {"ocaml/config.status"} +]|} + +(* [process_clone oc process] processes clone-* in the current directory, + emitting mkdir commands to [oc] and passing the directory name and a channel + set to the start of each clone file to [process]. The clone files are erased + after processing. *) +let process_clone oc process = + let process_file file = + if String.starts_with ~prefix:"clone-" file then begin + let dir = + String.map (function '@' -> '/' | c -> c) + (String.sub file 6 (String.length file - 6)) + in + output_endline oc {|mkdir -p "$1/%s"|} dir; + In_channel.with_open_text file @@ process oc dir; + Sys.remove file + end + in + Array.iter process_file (Sys.readdir Filename.current_dir_name) + +(* [process_symlinks oc ~mkdir] processes create-symlinks, if it exists, writing + any required mkdir commands to [oc] if [~mkdir = true] and also the + appropriate ln / mklink commands. create-symlinks is erased after + processing. *) +let process_symlinks oc ~mkdir = + let module StringSet = Set.Make(String) in + let file = "create-symlinks" in + if Sys.file_exists file then + let lines = + let parse acc line = + match String.split_on_char ' ' line with + | [dir; target; source] -> + (dir, target, source)::acc + | _ -> + exit_because "Invalid line encountered in create-symlinks" + in + In_channel.with_open_text file @@ fun ic -> + List.rev (In_channel.fold_lines parse [] ic) + in + output_endline oc {|cd "$1"|}; + let _ = + let create_dir seen (dir, _, _) = + if not (StringSet.mem dir seen) && String.contains dir '/' then + output_endline oc {|mkdir -p '%s'|} dir; + StringSet.add dir seen + in + List.fold_left create_dir StringSet.empty (if mkdir then lines else []) + in + if not Sys.win32 then + let ln (dir, target, source) = + let target = Filename.quote target in + let source = Filename.quote (Filename.concat dir source) in + output_endline oc {|%s %s %s|} ln_command target source + in + List.iter ln lines + else begin + let mklink (dir, target, source) = + (* Convert all slashes to _two_ backslashes *) + let to_backslashes oc s = + output_string oc (String.concat {|\\|} (String.split_on_char '/' s)) + in + output_endline oc + {| cmd /c "mklink %a\\%s %s"|} to_backslashes dir source target + and cp (dir, target, source) = + let target = Filename.quote (Filename.concat dir target) in + let source = Filename.quote (Filename.concat dir source) in + output_endline oc {| $CP %s %s|} target source + in + output_endline oc {|cmd /c "mklink __ln_test mklink-test"|}; + output_endline oc {|if test -L "$1/__ln_test"; then|}; + List.iter mklink lines; + output_endline oc {|else|}; + List.iter cp lines; + output_endline oc {|fi|}; + output_endline oc {|rm -f __ln_test|} + end; + Sys.remove file + +let copy_files oc dir = + In_channel.fold_lines (fun _ line -> + match String.split_on_char ' ' line with + | [source; dest] -> + output_endline oc {|cp '%s' "$1/%s/%s"|} source dir dest + | _ -> + exit_because "Invalid line encountered in clone files") () + +let clone_files oc dir ic = + output_endline oc + {|dest="$1/%s" xargs sh "$1/clone-files" <<'EOF'|} dir; + In_channel.fold_lines (fun _ -> output_endline oc "%s") () ic; + output_endline oc {|EOF|} + +let () = + if mode = "opam" then begin + generate_install (package ^ ".install"); + (* The script must be written with Unix line-endings on Windows *) + Out_channel.with_open_bin (package ^ "-fixup.sh") @@ fun oc -> + output_endline oc {|#!/bin/sh +set -e|}; + process_clone oc copy_files; + process_symlinks oc ~mkdir:true + end else begin + (* Don't pass -p to cp on Windows - it's never going to be relevant (no + execute bit which needs preserving) and there are scenarios in which it's + more likely to fail than add anytning useful (especially if copying from + a Cygwin-managed build directory to /cygdrive) *) + let preserve = if Sys.win32 then "" else "p" in + (* The script must be written with Unix line-endings on Windows *) + Out_channel.with_open_bin (package ^ "-clone.sh") @@ fun oc -> + output_endline oc {|#!/bin/sh +set -e +mkdir -p "$1" +rm -f "$1/__cp_test" "$1/__ln_test" +if cp --reflink=always doc/ocaml/LICENSE "$1/__cp_test" 2>/dev/null; then + rm -f "$1/__cp_test" + CP='cp --reflink=always -%sf' + if ! test -e "$1/clone-files"; then + echo "$CP"' "$@" "$dest/"' > "$1/clone-files" + fi +else + CP='cp -%sf' + if ! test -e "$1/clone-files"; then + if ln -f doc/ocaml/LICENSE "$1/__ln_test" 2>/dev/null; then + rm -f "$1/__ln_test" + echo 'ln -f "$@" "$dest/"' > "$1/clone-files" + else + echo "$CP"' "$@" "$dest/"' > "$1/clone-files" + fi + fi +fi|} preserve preserve; + let has_config_cache = Sys.file_exists "config.cache" in + Out_channel.with_open_text "clone-share@ocaml" (fun oc -> + output_endline oc "share/ocaml/clone"; + if has_config_cache then + output_endline oc "share/ocaml/config.cache"); + process_clone oc clone_files; + if not has_config_cache then + output_endline oc {|mkdir -p "$1/share/ocaml"|}; + (* ld.conf is a configuration file, so is always copied. + Makefile.config and config.status will both contain the original + prefix, which must be updated. *) + output_endline oc {|cp lib/ocaml/ld.conf "$1/lib/ocaml/ld.conf" +cat > "$1/prefix.awk" <<"ENDAWK" +{ + rest = $0 + while ((p = index(rest, ENVIRON["O"]))) { + printf "%%s%%s", substr(rest, 1, p-1), ENVIRON["N"] + rest = substr(rest, p + length(ENVIRON["O"])) + } + print rest +} +ENDAWK +prefix="$(sed -ne 's/^prefix *= *//p' lib/ocaml/Makefile.config)" +for file in lib/ocaml/Makefile.config share/ocaml/config.status; do + O="$prefix" N="$1" awk -f "$1/prefix.awk" "$file" > "$1/$file" +done +rm -f "$1/clone-files" "$1/prefix.awk"|}; + process_symlinks oc ~mkdir:false + end diff --git a/tools/opam/process.sh b/tools/opam/process.sh new file mode 100644 index 000000000000..5c10d7fa925a --- /dev/null +++ b/tools/opam/process.sh @@ -0,0 +1,182 @@ +#!/bin/sh +#************************************************************************** +#* * +#* OCaml * +#* * +#* David Allsopp, University of Cambridge & Tarides * +#* * +#* Copyright 2025 David Allsopp Ltd. * +#* * +#* All rights reserved. This file is distributed under the terms of * +#* the GNU Lesser General Public License version 2.1, with the * +#* special exception on linking described in the file LICENSE. * +#* * +#************************************************************************** + +set -e + +# This script is responsible for building and cloning OCaml installations. It is +# invoked by both the build and install sections of an opam package. +# $1 = make command (the `make` variable in opam). This should be the path to +# a binary only and is invoked without word-splitting (i.e. any +# additional arguments should be passed in $2 and the command is invoked +# "$1"). +# $2 = additional arguments passed to "$1". This variable will be used +# unquoted - arguments with spaces cannot be passed. From the build +# section, this allows the -j argument to be specified. For the install +# section, this argument must be "install". +# $3 = opam build-id variable of this package. +# $4 = name of the opam package to be used when generating .install and +# .config files. +# The remaining arguments depend on the value of $2. When it is "install": +# $5 = installation prefix, which may be a native Windows path, rather than a +# Cygwin path. +# When $2 is not "install" (the build opam section): +# $5 = "enabled" if cloning the compiler from an existing switch is permitted +# and "disabled" to force the compiler to be built from sources. +# $6, and any further arguments are additional options to pass to `configure` +# if the compiler is built from sources. + +make="$1" +make_args="$2" +build_id="$3" +package_name="$4" + +if [ x"$make_args" = 'xinstall' ]; then + prefix="$5" + + echo "📦 Installing the compiler to $prefix" + if [ -e 'config.status' ]; then + echo "📜 Using make install" + "$make" install + else + origin="$(tail -n 1 build-id)" + origin_prefix="$(opam var --switch="$origin" prefix | tr -d '\r')" + echo "🪄 Duplicating $origin_prefix" + ( cd "$origin_prefix" && sh ./share/ocaml/clone "$prefix" ) + fi + + exit 0 +fi + +# Build the package + +cloning="$5" +shift 5 +# "$@" now expands to the correctly-quoted arguments to pass to configure + +origin='' +clone_mechanism='' +if [ x"$cloning" = 'xenabled' ]; then + echo "🕵️ Searching for a switch containing build-id $build_id" + + if [ -e "$OPAM_SWITCH_PREFIX/share/ocaml/build-id" ]; then + switch="$(tail -n 1 "$OPAM_SWITCH_PREFIX/share/ocaml/build-id")" + if [ -n "$switch" ]; then + switch_share_dir="$(opam var --switch="$switch" share | tr -d '\r')" + switch_build_id="$switch_share_dir/ocaml/build-id" + if [ -e "$switch_build_id" ]; then + if [ x"$build_id" = x"$(head -n 1 $switch_build_id)" ]; then + echo "🔁 Prefer to re-clone from $switch" + echo "$switch" > opam-switches + origin="$switch" + if ln "$switch_build_id" __cp_test 2>/dev/null; then + rm __cp_test + clone_mechanism='hard-linking' + fi + fi + fi + fi + fi + + echo "🐫 Requesting list of switches from opam" + opam switch list --short | tr -d '\r' | grep -Fxv "$OPAMSWITCH" \ + >> opam-switches 2> /dev/null || true + + while IFS= read -r switch; do + switch_share_dir="$(opam var --switch="$switch" share | tr -d '\r')" + switch_build_id="$switch_share_dir/ocaml/build-id" + if [ -e "$switch_build_id" ]; then + if [ x"$build_id" = x"$(head -n 1 "$switch_build_id")" ]; then + # There are three ways of cloning a switch: + # - Copy-on-Write (cp --reflink=always) + # - Hard linking + # - Copy + # Copy-on-Write is the ideal - virtually no space overhead, but with + # defence against accidental subsequent alterations. Hard linking is + # preferred over copying for the space-saving, and because the + # compiler should not being subsequently altered. + if cp --reflink=always "$switch_build_id" __cp_test 2>/dev/null; then + rm __cp_test + echo "📝 - can reflink from: $switch" + origin="$switch" + clone_mechanism='copy-on-write' + break + elif ln "$switch_build_id" __cp_test 2>/dev/null; then + rm __cp_test + if [ -z "$clone_mechanism" ]; then + echo "🔗 - can hard link from: $switch" + origin="$switch" + clone_mechanism='hard-linking' + fi + elif [ -z "$origin" ]; then + echo "📄 - can copy from: $switch" + origin="$switch" + fi + elif [ -z "$origin" ]; then + echo "⛔ - different compiler: $switch" + fi + fi + done < opam-switches +fi + +{ echo "$build_id"; echo "$origin" ; } > build-id + +if [ -n "$origin" ]; then + + echo "🧬 Will clone the compiler from $origin" + test -n "$clone_mechanism" || clone_mechanism='copying' + + cloned='true' + clone_source="$(sed -e '1d;s/\\/\\\\/g;s/%/%%/g;s/"/\\"/g' build-id)" + case "$origin" in + */*|*\\*) clone_source="local switch $clone_source";; + *) clone_source="global switch $clone_source";; + esac + + cat > "$package_name.install" <<'EOF' +share_root: [ + "build-id" {"ocaml/build-id"} +] +EOF + +else + + echo "🏗️ Will build the compiler from sources" + + cloned='false' + clone_source='' + + ./configure --cache-file=config.cache "$@" + "$make" $make_args + "$make" OPAM_PACKAGE_NAME=ocaml-compiler INSTALL_MODE=clone install + + cat > "$package_name.install" <<'EOF' +share_root: [ + "build-id" {"ocaml/build-id"} + "ocaml-compiler-clone.sh" {"ocaml/clone"} + "config.cache" {"ocaml/config.cache"} + "config.status" {"ocaml/config.status"} +] +EOF +fi + +# Create the .config file +cat > "$package_name.config" < "ocamlprof.dump" + match Sys.getenv_opt "OCAMLPROF_DUMP" with + | None | Some "" -> "ocamlprof.dump" + | Some file -> file in begin try let ic = open_in_bin dumpfile in diff --git a/toplevel/byte/topeval.ml b/toplevel/byte/topeval.ml index 9fbba9e9ced8..6e562773912b 100644 --- a/toplevel/byte/topeval.ml +++ b/toplevel/byte/topeval.ml @@ -69,6 +69,16 @@ include Topcommon.MakeEvalPrinter(EvalBase) (* Load in-core and execute a lambda term *) +module Meta = struct + type closure = unit -> Obj.t + type bytecode + external reify_bytecode : + (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t -> + Instruct.debug_event list array -> string option -> bytecode * closure + = "caml_reify_bytecode" + external release_bytecode : bytecode -> unit = "caml_static_release_bytecode" +end + let may_trace = ref false (* Global lock on tracing *) let load_lambda ppf lam = @@ -240,6 +250,8 @@ let load_compunit ic filename ppf compunit = raise Load_failed end +external supports_shared_libraries : unit -> bool = "%shared_libraries" + let rec load_file recursive ppf name = let filename = try Some (Load_path.find name) with Not_found -> None @@ -283,6 +295,17 @@ and really_load_file recursive ppf name filename ic = let toc_pos = input_binary_int ic in (* Go to table of contents *) seek_in ic toc_pos; let lib = (input_value ic : library) in + if lib.lib_dllibs <> [] && not (supports_shared_libraries ()) then begin + let detail = + match lib.lib_dllibs with + | [_] -> "a shared library" + | _ -> "shared libraries" + in + fprintf ppf + "File %s requires %s to be loaded, which the runtime executing \ + this toplevel does not support.@." name detail; + raise Load_failed + end; List.iter (fun dllib -> let name = Dll.extract_dll_name dllib in diff --git a/toplevel/byte/trace.ml b/toplevel/byte/trace.ml index bddb4b3f7a89..bf50dcd8bfab 100644 --- a/toplevel/byte/trace.ml +++ b/toplevel/byte/trace.ml @@ -53,12 +53,15 @@ let set_code_pointer cls ptr = (* Call a traced function (use old code pointer, but new closure as environment so that recursive calls are also traced). - It is necessary to wrap Meta.invoke_traced_function in an ML function + It is necessary to wrap caml_invoke_traced_function in an ML function so that the RETURN at the end of the ML wrapper takes us to the code of the function. *) +external caml_invoke_traced_function: + Obj.raw_data -> Obj.t -> Obj.t -> Obj.t = "caml_invoke_traced_function" + let invoke_traced_function codeptr env arg = - Meta.invoke_traced_function codeptr env arg + caml_invoke_traced_function codeptr env arg let print_label ppf l = if l <> Asttypes.Nolabel then fprintf ppf "%s:" diff --git a/toplevel/topcommon.ml b/toplevel/topcommon.ml index 19d845767153..65cb5e0970b3 100644 --- a/toplevel/topcommon.ml +++ b/toplevel/topcommon.ml @@ -294,6 +294,7 @@ let set_paths ?(auto_include=Compmisc.auto_include) ?(dir="") () = let update_search_path_from_env () = let extra_paths = let env = Sys.getenv_opt "OCAMLTOP_INCLUDE_PATH" in + (* NB: Misc.split_path_contents "" = [] *) Option.fold ~none:[] ~some:Misc.split_path_contents env in Clflags.include_dirs := List.rev_append extra_paths !Clflags.include_dirs diff --git a/utils/ccomp.ml b/utils/ccomp.ml index defe4d2a4b92..8effc9b289e7 100644 --- a/utils/ccomp.ml +++ b/utils/ccomp.ml @@ -90,7 +90,13 @@ let compile_file ?output ?(opt="") ?stable_name name = ("", "") in let debug_prefix_map = match stable_name with - | Some stable when Config.c_has_debug_prefix_map -> + | Some stable + when Config.c_has_debug_prefix_map + && not (String.starts_with ~prefix:"mingw" Config.system) -> + (* -fdebug-prefix-map exists on mingw-w64 but at present it is not used + for BUILD_PATH_PREFIX_MAP because there isn't yet a good story for how + to deal with Cygwin, where the paths are Cygwin-style paths and MSYS2, + where they are native Windows paths. *) Printf.sprintf " -fdebug-prefix-map=%s=%s" name stable | Some _ | None -> "" in let exit = @@ -147,16 +153,15 @@ let create_archive archive file_list = (quote_files ~response_files:Config.ar_supports_response_files file_list)) -let expand_libname cclibs = - cclibs |> List.map (fun cclib -> - if String.starts_with ~prefix:"-l" cclib then - let libname = - "lib" ^ String.sub cclib 2 (String.length cclib - 2) ^ Config.ext_lib in - try - Load_path.find libname - with Not_found -> - libname - else cclib) +let expand_libname cclib = + if String.starts_with ~prefix:"-l" cclib then + let libname = + "lib" ^ String.sub cclib 2 (String.length cclib - 2) ^ Config.ext_lib in + try + Some (Load_path.find libname) + with Not_found -> + None + else Some cclib type link_mode = | Exe @@ -176,11 +181,16 @@ let call_linker mode output_name files extra = Profile.record_call "c-linker" (fun () -> let cmd = if mode = Partial then - let (l_prefix, files) = + let l_prefix = match Config.ccomp_type with - | "msvc" -> ("/libpath:", expand_libname files) - | _ -> ("-L", files) + | "msvc" -> "/libpath:" + | _ -> "-L" in + (* For partial linking, only include -llib if -llib can be found in the + current search path. For ld -r, this PATH is (usually) limited to the + -L directories. This should cause OCaml libraries to be linked, but + not any system libraries mentioned in .cma/.cmxa files. *) + let files = List.filter_map expand_libname files in Printf.sprintf "%s%s %s %s %s" Config.native_pack_linker (Filename.quote output_name) diff --git a/utils/clflags.ml b/utils/clflags.ml index bf79b30b7e4b..2f9ef0e42a50 100644 --- a/utils/clflags.ml +++ b/utils/clflags.ml @@ -38,9 +38,10 @@ module Float_arg_helper = Arg_helper.Make (struct end end) -let objfiles = ref ([] : string list) (* .cmo and .cma files *) -and ccobjs = ref ([] : string list) (* .o, .a, .so and -cclib -lxxx *) -and dllibs = ref ([] : string list) (* .so and -dllib -lxxx *) +let objfiles = ref ([] : string list) (* .cmo and .cma files *) +and ccobjs = ref ([] : string list) (* .o, .a, .so and -cclib -lxxx *) +and dllibs = ref ([] : (bool * string) list) (* .so, -dllib -lxxx and + '-dllib-suffixed -lxxx *) let cmi_file = ref None @@ -48,6 +49,9 @@ let compile_only = ref false (* -c *) and output_name = ref (None : string option) (* -o *) and include_dirs = ref ([] : string list) (* -I *) and hidden_include_dirs = ref ([] : string list) (* -H *) +and runtime_parameters = (* -set-runtime-default *) + (Hashtbl.create 16 : (string, string) Hashtbl.t) +and standard_library_default = ref None (* -set-runtime-default *) and no_std_include = ref false (* -nostdlib *) and no_cwd = ref false (* -nocwd *) and print_types = ref false (* -i *) @@ -86,6 +90,10 @@ and noinit = ref false (* -noinit *) and open_modules = ref [] (* -open *) and use_prims = ref "" (* -use-prims ... *) and use_runtime = ref "" (* -use-runtime ... *) +and launch_method = (* -launch-method ... *) + ref (Config.launch_method, Config.target_bindir) +and search_method = (* -search-method ... *) + ref Config.search_method and plugin = ref false (* -plugin ... *) and principal = ref false (* -principal *) and real_paths = ref true (* -short-paths *) diff --git a/utils/clflags.mli b/utils/clflags.mli index 3e54d98ad514..d71417ead9cb 100644 --- a/utils/clflags.mli +++ b/utils/clflags.mli @@ -70,12 +70,14 @@ val use_inlining_arguments_set : ?round:int -> inlining_arguments -> unit val objfiles : string list ref val ccobjs : string list ref -val dllibs : string list ref +val dllibs : (bool * string) list ref val cmi_file : string option ref val compile_only : bool ref val output_name : string option ref val include_dirs : string list ref val hidden_include_dirs : string list ref +val runtime_parameters : (string, string) Hashtbl.t +val standard_library_default : string option ref val no_std_include : bool ref val no_cwd : bool ref val print_types : bool ref @@ -113,6 +115,8 @@ val noinit : bool ref val noversion : bool ref val use_prims : string ref val use_runtime : string ref +val launch_method : (Config.launch_method * string) ref +val search_method : Config.search_method ref val plugin : bool ref val principal : bool ref val print_variance : bool ref diff --git a/utils/config.common.ml.in b/utils/config.common.ml.in index 0f956d2fbc5f..ddc7be9f9e74 100644 --- a/utils/config.common.ml.in +++ b/utils/config.common.ml.in @@ -20,14 +20,41 @@ (* The main OCaml version string has moved to ../build-aux/ocaml_version.m4 *) let version = Sys.ocaml_version +(* is_release and release_number are automatically updated autoconf from values + in ../build-aux/ocaml_version.m4 - do not edit these lines directly. *) +let is_release = false +let release_number = 21 + +external standard_library_default : unit -> string = "%standard_library_default" + +let standard_library_default = standard_library_default () + +external stdlib_dirs : string -> string * string option + = "caml_sys_get_stdlib_dirs" + +let standard_library_effective, relative_root_dir = + stdlib_dirs standard_library_default + let standard_library = - try - Sys.getenv "OCAMLLIB" - with Not_found -> - try - Sys.getenv "CAMLLIB" - with Not_found -> - standard_library_default + match Sys.getenv_opt "OCAMLLIB" with + | None | Some "" -> + begin match Sys.getenv_opt "CAMLLIB" with + | None | Some "" -> + standard_library_effective + | Some value -> + value + end + | Some value -> + value + +let standard_library_relative = relative_root_dir <> None + +let bindir = Option.value ~default:bindir relative_root_dir +let target_bindir = + if target_bindir = Filename.current_dir_name then + Filename.dirname Sys.executable_name + else + target_bindir let exec_magic_number = {magic|@EXEC_MAGIC_NUMBER@|magic} (* exec_magic_number is duplicated in runtime/caml/exec.h *) @@ -46,6 +73,21 @@ let safe_string = true let default_safe_string = true let naked_pointers = false +type launch_method = Executable | Shebang of string option +type search_method = Absolute | Absolute_then_search | Search + +let launch_method = + match launch_method with + | "exe" -> Executable + | "sh" -> Shebang None + | _ -> Shebang (Some launch_method) + +let search_method = + match search_method with + | "always" -> Search + | "enable" -> Absolute_then_search + | _ -> Absolute + let interface_suffix = ref ".mli" let max_tag = 243 @@ -57,11 +99,16 @@ let lazy_tag = 246 let max_young_wosize = 256 let stack_threshold = 32 (* see runtime/caml/config.h *) let stack_safety_margin = 6 +let target_unix = (target_os_type = "Unix") +let target_win32 = (target_os_type = "Win32") +let target_cygwin = (target_os_type = "Cygwin") let default_executable_name = - match target_os_type with - "Unix" -> "a.out" - | "Win32" | "Cygwin" -> "camlprog.exe" - | _ -> "camlprog" + if target_unix then + "a.out" + else if target_win32 || target_cygwin then + "camlprog.exe" + else + "camlprog" type configuration_value = | String of string | Int of int @@ -71,9 +118,20 @@ let configuration_variables () = let p x v = (x, String v) in let p_int x v = (x, Int v) in let p_bool x v = (x, Bool v) in + let is_explicit_relative path = + path = Filename.current_dir_name + || path = Filename.parent_dir_name + || Filename.is_relative path && not (Filename.is_implicit path) + in + let standard_library_relative = + if is_explicit_relative standard_library_default then + standard_library_default + else + "" in [ p "version" version; - p "standard_library_default" standard_library_default; + p "standard_library_default" standard_library_effective; + p "standard_library_relative" standard_library_relative; p "standard_library" standard_library; p "ccomp_type" ccomp_type; p "c_compiler" c_compiler; @@ -113,6 +171,8 @@ let configuration_variables () = p_bool "systhread_supported" systhread_supported; p "host" host; p "target" target; + p "bytecode_runtime_id" bytecode_runtime_id; + p "native_runtime_id" native_runtime_id; p_bool "flambda" flambda; p_bool "safe_string" safe_string; p_bool "default_safe_string" default_safe_string; diff --git a/utils/config.fixed.ml b/utils/config.fixed.ml index c8141427f840..e1ec3df79de3 100644 --- a/utils/config.fixed.ml +++ b/utils/config.fixed.ml @@ -21,13 +21,14 @@ let boot_cannot_call s = "/ The boot compiler should not call " ^ s let bindir = "/tmp" -let standard_library_default = "/tmp" +let target_bindir = bindir let ccomp_type = "n/a" let c_compiler = boot_cannot_call "the C compiler" let c_compiler_vendor = "" let c_output_obj = "" let c_has_debug_prefix_map = false let as_has_debug_prefix_map = false +let as_is_cc = false let bytecode_cflags = "" let bytecode_cppflags = "" let native_cflags = "" @@ -58,6 +59,8 @@ let align_double = true let align_int64 = true let function_sections = false let afl_instrument = false +let bytecode_runtime_id = "" +let native_runtime_id = "" let native_compiler = false let tsan = false let architecture = "none" @@ -82,3 +85,6 @@ let systhread_supported = false let flexdll_dirs = [] let ar_supports_response_files = true let shebangscripts = false +let suffixing = false +let launch_method = "sh" +let search_method = "always" diff --git a/utils/config.generated.ml.in b/utils/config.generated.ml.in index dfa7c09e7d74..b6a7613744d3 100644 --- a/utils/config.generated.ml.in +++ b/utils/config.generated.ml.in @@ -19,8 +19,7 @@ than compiled on its own *) let bindir = {@QS@|@ocaml_bindir@|@QS@} - -let standard_library_default = {@QS@|@ocaml_libdir@|@QS@} +let target_bindir = {@QS@|@TARGET_BINDIR@|@QS@} let ccomp_type = {@QS@|@ccomp_type@|@QS@} let c_compiler = {@QS@|@CC@|@QS@} @@ -28,6 +27,7 @@ let c_compiler_vendor = {@QS@|@ocaml_cc_vendor@|@QS@} let c_output_obj = {@QS@|@outputobj@|@QS@} let c_has_debug_prefix_map = @cc_has_debug_prefix_map@ let as_has_debug_prefix_map = @as_has_debug_prefix_map@ +let as_is_cc = @as_is_cc@ let bytecode_cflags = {@QS@|@bytecode_cflags@|@QS@} let bytecode_cppflags = {@QS@|@bytecode_cppflags@|@QS@} let native_cflags = {@QS@|@native_cflags@|@QS@} @@ -71,6 +71,9 @@ let align_int64 = @align_int64@ let function_sections = @function_sections@ let afl_instrument = @afl@ +let bytecode_runtime_id = {@QS@|@bytecode_runtime_id@|@QS@} +let native_runtime_id = {@QS@|@native_runtime_id@|@QS@} + let native_compiler = @native_compiler@ let architecture = {@QS@|@arch@|@QS@} @@ -103,3 +106,9 @@ let ar_supports_response_files = @ar_supports_response_files@ let tsan = @tsan@ let shebangscripts = @shebangscripts@ + +let suffixing = @suffixing@ + +let launch_method = {@QS@|@target_launch_method@|@QS@} + +let search_method = {@QS@|@runtime_search_target@|@QS@} diff --git a/utils/config.mli b/utils/config.mli index cd85aad4daef..273927184113 100644 --- a/utils/config.mli +++ b/utils/config.mli @@ -23,11 +23,48 @@ val version: string (** The current version number of the system *) +val release_number: int +(** The release number for the compiler + + @since 5.5 *) + +val is_release: bool +(** True if the compiler is an unmodified official OCaml release + + @since 5.5 *) + val bindir: string -(** The directory containing the binary programs *) +(** The directory containing the binary programs. If the compiler was configured + with [--with-relative-libdir] then this will be the directory containing the + currently executing runtime. *) + +val standard_library_default: string +(** The configured value for the directory containing the standard libraries. + May be a relative path if the compiler was configured with + [--with-relative-libdir]. + + @since 5.5 *) + +val standard_library_effective: string +(** The standard library directory, computed taking {!standard_library_relative} + and {!standard_library_default} into account, but not taking CAMLLIB or + OCAMLLIB into account. + + @since 5.5 *) + +val standard_library_relative: bool +(** Whether {!standard_library_effective} is computed relative to the runtime + + @since 5.5 *) + +val target_bindir: string +(** The directory containing the runtime binaries on the target system + + @since 5.5 *) val standard_library: string -(** The directory containing the standard libraries *) +(** The effective directory containing the standard libraries, taking CAMLLIB + and OCAMLLIB into account. *) val ccomp_type: string (** The "kind" of the C compiler, assembler and linker used: one of @@ -70,6 +107,12 @@ val c_has_debug_prefix_map : bool val as_has_debug_prefix_map : bool (** Whether the assembler supports --debug-prefix-map *) +val as_is_cc : bool +(** Whether the assembler is actually an assembler, or whether we are really + assembling files via the C compiler + + @since 5.5 *) + val bytecode_cflags : string (** The flags ocamlc should pass to the C compiler *) @@ -202,7 +245,24 @@ val target_os_type: string (** Operating system targeted by the native-code compiler. One of - ["Unix"] (for all Unix versions, including Linux and macOS), - ["Win32"] (for MS-Windows, OCaml compiled with MSVC++ or MinGW-w64), -- ["Cygwin"] (for MS-Windows, OCaml compiled with Cygwin). *) +- ["Cygwin"] (for MS-Windows, OCaml compiled with Cygwin). + + @since 5.4 *) + +val target_unix: bool +(** True if [target_os_type = "Unix"] + + @since 5.5 *) + +val target_win32: bool +(** True if [target_os_type = "Win32"] + + @since 5.5 *) + +val target_cygwin: bool +(** True if [target_os_type = "Cygwin"] + + @since 5.5 *) val asm: string (** The assembler (and flags) to use for assembling @@ -316,11 +376,56 @@ val ar_supports_response_files: bool val tsan : bool (** Whether ThreadSanitizer instrumentation is enabled *) +(** Launch mechanisms for bytecode executables + + @since 5.5 *) +type launch_method = +| Executable (** Use executable launcher stub *) +| Shebang of string option (** Use shebang-style launcher, either directly to + the runtime, or via sh. The parameter if + specified is the full path to sh, otherwise the + linker searches for it. *) + +val launch_method : launch_method +(** Default launch mechanism for bytecode executables + + @since 5.5 *) + +(** Mechanisms used by tendered bytecode executables to locate the interpreter + + @since 5.5 *) +type search_method = +| Absolute (** Check fixed absolute location only *) +| Absolute_then_search (** Check fixed absolute location, but perform a search + if that fails *) +| Search (** Always search for the interpreter *) + +val search_method : search_method +(** Default search mechanism for bytecode executables + + @since 5.5 *) + val shebangscripts : bool (** Whether the target supports shebang scripts @since 5.5 *) +val suffixing : bool +(** Whether the runtime executable and shared library filenames and C stub + library filenames are being mangled with Runtime IDs and the {!target}. + + @since 5.5 *) + +val bytecode_runtime_id : string +(** The Runtime ID for this build of the bytecode runtime system + + @since 5.5 *) + +val native_runtime_id : string +(** The Runtime ID for this build of the native runtime system + + @since 5.5 *) + (** Access to configuration values *) val print_config : out_channel -> unit diff --git a/utils/misc.ml b/utils/misc.ml index 8499cb1ba824..b5b7da657b23 100644 --- a/utils/misc.ml +++ b/utils/misc.ml @@ -579,7 +579,7 @@ let path_separator = let split_path_contents ?(sep = path_separator) = function | "" -> [] - | s -> String.split_on_char sep s + | s -> List.filter ((<>) "") (String.split_on_char sep s) (* Hashtable functions *) @@ -1433,3 +1433,134 @@ module Magic_number = struct | Error err -> Error (Unexpected_error err) | Ok () -> Ok info end + +module RuntimeID = struct + type t = { + dev: bool; + release: int; + reserved: int; + no_flat_float_array: bool; + fp: bool; + tsan: bool; + int31: bool; + static: bool; + no_compression: bool; + ansi: bool; + } + + let make fn ?(dev = not Config.is_release) + ?(release = Config.release_number) + ?(reserved = Config.reserved_header_bits) + ?(no_flat_float_array = not Config.flat_float_array) + ?(fp = Config.with_frame_pointers) + ?(tsan = Config.tsan) + ?(int31 = (Sys.int_size = 31)) + ?(static = not Config.supports_shared_libraries) + ?(no_compression = (Config.compression_c_libraries = "")) + ?(ansi = Config.target_win32 && not Config.windows_unicode) () = + if release < 0 || release > 63 || reserved < 0 || reserved > 31 then + invalid_arg fn + else + {dev; release; reserved; no_flat_float_array; fp; tsan; int31; static; + no_compression; ansi} + + let make_zinc = + make "Misc.RuntimeID.make_zinc" + ~reserved:0 ~fp:false ~tsan:false ~ansi:false + + let make_bytecode = + make "Misc.RuntimeID.make_bytecode" ~fp:false ~tsan:false + + let make_native = make "Misc.RuntimeID.make_native" + + let is_zinc = function + | {dev = _; release = _; reserved = 0; no_flat_float_array = _; fp = false; + tsan = false; int31 = _; static = _; no_compression = _; ansi = false} -> + true + | _ -> + false + + let is_bytecode = function + | {dev = _; release = _; reserved = _; no_flat_float_array = _; fp = false; + tsan = false; int31 = _; static = _; no_compression = _; ansi = _} -> true + | _ -> false + + let is_native _ = true + + let to_string t = + let alpha = "0123456789abcdefghijklmnopqrstuv" in + let bit bit cond = if cond then 1 lsl bit else 0 in + let q0 = + (bit 0 t.dev) lor + ((t.release lsl 1) land 0b11110) (* 4 bits *) + in + let q1 = + t.release lsr 4 lor (* 2 bits *) + ((t.reserved lsl 2) land 0b11100) (* 3 bits *) + in + let q2 = + t.reserved lsr 3 lor (* 2 bits *) + bit 2 t.no_flat_float_array lor + bit 3 t.fp lor + bit 4 t.tsan + in + let q3 = + bit 0 t.int31 lor + bit 1 t.static lor + bit 2 t.no_compression lor + bit 3 t.ansi + (* bit 4 is unused *) + in + Printf.sprintf "%c%c%c%c" alpha.[q0] alpha.[q1] alpha.[q2] alpha.[q3] + + let of_string s = + if String.length s <> 4 then + None + else + let convert c = + match c with + | '0'..'9' -> Char.code c - Char.code '0' + | 'a'..'v' -> Char.code c - Char.code 'a' + 10 + | _ -> min_int + in + let set bit q = (q land (1 lsl bit) <> 0) in + let q0 = convert s.[0] in + let q1 = convert s.[1] in + let q2 = convert s.[2] in + let q3 = convert s.[3] in + if q0 + q1 + q2 + q3 >= 0 then + Some {dev = set 0 q0; release = ((q1 land 0b11) lsl 4) lor (q0 lsr 1); + reserved = ((q2 land 0b11) lsl 2) lor (q1 lsr 2); + no_flat_float_array = set 2 q2; fp = set 3 q2; tsan = set 4 q2; + int31 = set 0 q3; static = set 1 q3; no_compression = set 2 q3; + ansi = set 3 q3; (* bit 4 of q3 is unused *)} + else + None + + let of_zinc_hi ?(dev = not Config.is_release) + ?(release = Config.release_number) s = + Option.map (fun id -> {id with dev; release}) (of_string ("00" ^ s)) + + let ocamlrun variant runtime_id = + if is_zinc runtime_id then + Printf.sprintf "ocamlrun%s-%s" variant (to_string runtime_id) + else + invalid_arg "Misc.RuntimeID.ocamlrun" + + let shared_runtime ?runtime_id ?(host = Config.target) ?(prefix = "-l") + backend_type = + match backend_type with + | Sys.Native -> + let runtime_id = Option.value ~default:(make_native ()) runtime_id in + Printf.sprintf "%sasmrun-%s-%s" prefix host (to_string runtime_id) + | Sys.Bytecode -> + let runtime_id = Option.value ~default:(make_bytecode ()) runtime_id in + Printf.sprintf "%scamlrun-%s-%s" prefix host (to_string runtime_id) + | Sys.Other _ -> + invalid_arg "Misc.RuntimeID.shared_runtime" + + let stubslib ?(runtime_id = make_bytecode ()) + ?(host = Config.target) + name = + Printf.sprintf "%s-%s-%s" name host (to_string runtime_id) +end diff --git a/utils/misc.mli b/utils/misc.mli index 267c09bda1d8..717fb7704294 100644 --- a/utils/misc.mli +++ b/utils/misc.mli @@ -246,7 +246,9 @@ val split_path_contents: ?sep:char -> string -> string list directories. [s] is split using the platform-specific delimiter, or [~sep] if it is passed. - Returns the empty list if [s] is empty. *) + Returns the empty list if [s] is empty. Empty strings are filtered + out - include [Filename.current_dir_name] explicitly if the current + working directory is supposed to be searched. *) val copy_file: in_channel -> out_channel -> unit (** [copy_file ic oc] reads the contents of file [ic] and copies @@ -875,6 +877,110 @@ module Utf8_lexeme: sig are not checked. *) end +module RuntimeID : sig + (** Manipulation of the Runtime ID values used to mangle the filenames of + shared libraries and the bytecode interpreters. + + @since 5.5 *) + + (** Runtime IDs *) + type t = private { + dev: bool; + (** [true] if this not an unaltered official release of OCaml *) + release: int; + (** Release number (OCaml 5.5 is release 21) *) + reserved: int; + (** The number of reserved bits (0-31) in the {v value v} header *) + no_flat_float_array: bool; + (** [true] if float arrays must be boxed (i.e. configured with + {v --disable-flat-float-array v}) *) + fp: bool; + (** [true] if frame pointers are required (i.e. configured with + {v --enable-frame-pointers v} *) + tsan: bool; + (** [true] if ThreadSanitizer (TSAN) is required (i.e. configured with + {v --enable-tsan v}) *) + int31: bool; + (** [true] if the platform has 31-bit [int]s (i.e. 32-bit systems) *) + static: bool; + (** [true] if dynamic loading of libraries is not supported *) + no_compression: bool; + (** [true] if compressed marshalling is not supported *) + ansi: bool; + (** [true] if Unicode support on Windows is disabled *) + } + + val make_zinc: ?dev:bool -> ?release:int + -> ?no_flat_float_array:bool + -> ?int31:bool -> ?static:bool -> ?no_compression:bool + -> unit -> t + (** Returns the Zinc Runtime ID for the given parameters (using default values + from {!Config} and {!Sys} as necessary) *) + + val make_bytecode: ?dev:bool -> ?release:int + -> ?reserved:int -> ?no_flat_float_array:bool + -> ?int31:bool -> ?static:bool -> ?no_compression:bool + -> ?ansi:bool + -> unit -> t + (** Returns the Bytecode Runtime ID for the given parameters (using default + values from {!Config} and {!Sys} as necessary) *) + + val make_native: ?dev:bool -> ?release:int + -> ?reserved:int -> ?no_flat_float_array:bool -> ?fp:bool -> ?tsan:bool + -> ?int31:bool -> ?static:bool -> ?no_compression:bool + -> ?ansi:bool + -> unit -> t + (** Returns the Native Runtime ID for the given parameters (using default + values from {!Config} and {!Sys} as necessary) *) + + val is_zinc: t -> bool + (** [is_zinc t] is true if [t] can be used as a Zinc Runtime ID *) + + val is_bytecode: t -> bool + (** [is_bytecode t] is true if [t] can be used as a Bytecode Runtime ID *) + + val is_native: t -> bool + (** [is_native t] is true if [t] can be used as a Native Runtime ID *) + + val to_string: t -> string + (** Returns the 4-character representation of a {!t} *) + + val of_string: string -> t option + (** Converts the 4-character representation back to a {!t} *) + + val of_zinc_hi: ?dev:bool -> ?release:int -> string -> t option + (** Converts hi 2 characters of the representation back to a {!t} (using the + default version information from {!Config}. *) + + val ocamlrun: string -> t -> string + (** [ocamlrun variant runtime_id] returns the name for the runtime for the + given Zinc Runtime ID. *) + + val shared_runtime: ?runtime_id:t -> ?host:string + -> ?prefix:string -> Sys.backend_type -> string + (** [shared_runtime ?runtime_id ?host ?prefix backend] returns the name of the + shared runtime for the given [backend]. [runtime_id] defaults to + {!make_bytecode} if [backend = Sys.Bytecode] and {!make_native} if + [backend = Sys.Native] and [host] to {!Config.target}. [prefix] defaults + to ["-l"] and the function does not append {!Config.ext_dll}. + + e.g. [shared_runtime ~host:"x86_64-pc-linux-gnu" Native + = "-lasmrun-x86_64-pc-linux-gnu-b100"] for a default OCaml 5.5 + build on a 64-bit system with shared library support and compressed + marshalling. *) + + val stubslib: ?runtime_id:t -> ?host:string -> string -> string + (** [stublibs ?runtime_id ?host dllname] returns the name for the given DLL + basename. [dllname] should not include {!Config.ext_dll} (and the result + does not include it either). [host] and [runtime_id] default to + {!Config.target} and {!make_bytecode} respectively. + + e.g. [stubslib ~host:"x86_64-pc-linux-gnu" "dllunixbyt" + = "dllunixbyt-x86_64-pc-linux-gnu-001b"] for a default OCaml 5.5 + build on a 64-bit system with shared library support and compressed + marshalling. *) +end + (** {1 Miscellaneous type aliases} *) type filepath = string diff --git a/yacc/main.c b/yacc/main.c index 59cf02d5c223..6dead9327764 100644 --- a/yacc/main.c +++ b/yacc/main.c @@ -290,14 +290,18 @@ void create_file_names(void) #ifdef _WIN32 tmpdir = _wgetenv(L"TEMP"); - if (tmpdir == 0) tmpdir = L"."; + /* Ensure tmpdir is neither NULL nor zero-length */ + if (tmpdir == 0 || *tmpdir == 0) tmpdir = L"."; #else tmpdir = getenv("TMPDIR"); - if (tmpdir == 0) tmpdir = "/tmp"; + /* Write to /tmp instead of . if TMPDIR is "Set But Null" (also ensures + tmpdir is neither NULL nor zero-length */ + if (tmpdir == 0 || *tmpdir == 0) tmpdir = "/tmp"; #endif len = strlen_os(tmpdir); i = len + sizeof(temp_form); - if (len && tmpdir[len-1] != dirsep) + /* Technically, tmpdir != NULL && *tmpdir != 0 - i.e. len > 0 */ + if (tmpdir[len-1] != dirsep) ++i; action_file_name = MALLOC(i * sizeof(char_os)); @@ -311,7 +315,7 @@ void create_file_names(void) strcpy_os(entry_file_name, tmpdir); strcpy_os(text_file_name, tmpdir); - if (len && tmpdir[len - 1] != dirsep) + if (tmpdir[len - 1] != dirsep) { action_file_name[len] = dirsep; entry_file_name[len] = dirsep;