diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 740a84b71..966b0db8f 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -4,3 +4,5 @@ 2a2e1a95e522e820ba6ef9a0b7af73ddb5ead5b0 0a178daa6c8c335799c0b05db3f212594435216e 3f523a79e10e023308d28c4b030c6081b6dbbaa3 + +# XXX: https://git-scm.com/docs/git-replace diff --git a/env/cpu-arm64.mk b/env/cpu-arm64.mk new file mode 100644 index 000000000..3a98984f4 --- /dev/null +++ b/env/cpu-arm64.mk @@ -0,0 +1,13 @@ +# Cycc/Cympile - Shared Build Scripts for Make +# Copyright (C) 2013-2020 Jay Freeman (saurik) + +# Zero Clause BSD license {{{ +# +# Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# }}} + + +# XXX: consider using =pac-ret+leaf +qflags += -mbranch-protection=standard diff --git a/env/cpu-x86_64.mk b/env/cpu-x86_64.mk new file mode 100644 index 000000000..647c38a49 --- /dev/null +++ b/env/cpu-x86_64.mk @@ -0,0 +1,15 @@ +# Cycc/Cympile - Shared Build Scripts for Make +# Copyright (C) 2013-2020 Jay Freeman (saurik) + +# Zero Clause BSD license {{{ +# +# Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# }}} + + +ifneq ($(target),win) +# XXX: this breaks libgcrypt due to cet.h being ELF-specific +qflags += -fcf-protection=full +endif diff --git a/env/embed.c b/env/embed.c deleted file mode 100644 index c9baf7e0b..000000000 --- a/env/embed.c +++ /dev/null @@ -1,40 +0,0 @@ -// Cycc/Cympile - Shared Build Scripts for Make -// Copyright (C) 2013-2020 Jay Freeman (saurik) - -// Zero Clause BSD license {{{ -// -// Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. -// -// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -// }}} - - -#include - -int main(int argc, const char *argv[]) { - const char *name = argv[1]; - FILE *file = fopen(argv[2], "rb"); - if (file == NULL) return 1; - printf("const unsigned char %s_data[] =\n", name); - size_t size = 0; - for (;;) { - unsigned char data[1024]; - size_t writ = fread(data, 1, sizeof(data), file); - if (writ == 0) break; - size += writ; - char line[writ*4+3]; - line[0] = '"'; - for (size_t i = 0; i != writ; ++i) { - line[i*4+1] = '\\'; - line[i*4+2] = 'x'; - line[i*4+3] = "0123456789abcdef"[data[i]/16]; - line[i*4+4] = "0123456789abcdef"[data[i]%16]; - } - line[sizeof(line)-2] = '"'; - line[sizeof(line)-1] = '\n'; - fwrite(line, 1, sizeof(line), stdout); - } - printf(";\n"); - printf("const unsigned int %s_size = %zu;\n", name, size); - return 0; -} diff --git a/env/kit-android.mk b/env/kit-android.mk index e3816285e..73f003da9 100644 --- a/env/kit-android.mk +++ b/env/kit-android.mk @@ -67,6 +67,8 @@ cxx := $(llvm)/bin/clang++ $(more) define _ ar/$(1) := $(llvm)/bin/llvm-ar ranlib/$(1) := $(llvm)/bin/llvm-ranlib +objdump/$(1) := $(llvm)/bin/llvm-objdump +objcopy/$(1) := $(llvm)/bin/llvm-objcopy endef $(each) diff --git a/env/kit-default.mk b/env/kit-default.mk index ddaa58e60..de8580902 100644 --- a/env/kit-default.mk +++ b/env/kit-default.mk @@ -15,6 +15,8 @@ cxx := clang++$(suffix) $(more) define _ ar/$(1) := ar ranlib/$(1) := ranlib +objdump/$(1) := llvm-objdump$(suffix) +objcopy/$(1) := llvm-objcopy$(suffix) endef $(each) diff --git a/env/output.mk b/env/output.mk index 946250e8f..0d23e09fd 100644 --- a/env/output.mk +++ b/env/output.mk @@ -194,3 +194,10 @@ object := $(filter-out $(1)%.o,$(object)) linked += $(1)_.a endef $(foreach archive,$(archive),$(eval $(call _,$(archive)))) + +define _ +object := $$(patsubst $(1).o,$(1)-.o,$$(object)) +$$(output)/%/$(1)-.o: $$(output)/%/$(1).o + $$(objcopy) $(2) $$< $$@ +endef +$(foreach oflags,$(filter oflags/%,$(.VARIABLES)),$(eval $(call _,$(patsubst oflags/%,%,$(oflags)),$($(oflags))))) diff --git a/env/setup-apt.sh b/env/setup-apt.sh index df84179d2..70814c64e 100755 --- a/env/setup-apt.sh +++ b/env/setup-apt.sh @@ -9,7 +9,8 @@ apt-get -y install \ curl git-core rsync wget \ fakeroot libtalloc-dev \ cpio rpm unzip zstd \ - clang clang-tidy lld \ + clang clang-tidy lld llvm \ + binutils-{aarch64,x86-64}-linux-gnu \ libc++-dev libc++abi-dev \ g++-multilib gcc-multilib \ python3-pip python3-setuptools \ diff --git a/env/setup-sys.sh b/env/setup-sys.sh index 1e511d7fd..8c0d5ef09 100755 --- a/env/setup-sys.sh +++ b/env/setup-sys.sh @@ -5,7 +5,7 @@ shift 1 export DEBIAN_FRONTEND=noninteractive apt-get -y update apt-get -y dist-upgrade -apt-get -y install --no-install-recommends libstdc++-"${llvm}"-dev{,-arm{hf,64}-cross} "$@" +apt-get -y install --no-install-recommends libstdc++-"${llvm}"-dev{,-{arm{hf,64},i386}-cross} "$@" for root in /usr/*-gnu*; do ln -s .. "${root}"/usr done diff --git a/env/sys-ubuntu.sh b/env/sys-ubuntu.sh index b4fbb4be3..218951d93 100755 --- a/env/sys-ubuntu.sh +++ b/env/sys-ubuntu.sh @@ -24,11 +24,12 @@ if [[ $(uname -s) = Linux ]]; then debootstrap=${mount}/debootstrap # XXX: proot -0 runs the command but fails on exit; fakeroot works correctly - DEBOOTSTRAP_DIR=${debootstrap} fakeroot "${debootstrap}"/debootstrap \ - --foreign --variant=minbase --arch amd64 "${distro}" . + DEBOOTSTRAP_DIR=${debootstrap} fakeroot "${debootstrap}"/debootstrap --foreign \ + --variant=minbase --arch amd64 --components=main,universe "${distro}" . "${proot}" -0 -r . -w / -b /proc -b /sys /debootstrap/debootstrap --second-stage - echo "deb http://archive.ubuntu.com/ubuntu/ ${distro}-updates main" >>etc/apt/sources.list + # XXX: https://groups.google.com/g/linux.debian.bugs.dist/c/-p06sQmwamA + echo "deb http://archive.ubuntu.com/ubuntu/ ${distro}-updates main universe" >>etc/apt/sources.list HOME= "${proot}" -S . -w / -b "${mount}:/mnt" /mnt/setup-sys.sh "$@" else # https://stackoverflow.com/questions/29934204/mount-data-volume-to-docker-with-readwrite-permission diff --git a/env/target-and.mk b/env/target-and.mk index 2fb348645..3c33b14e7 100644 --- a/env/target-and.mk +++ b/env/target-and.mk @@ -67,6 +67,7 @@ else more = --sysroot=$(llvm)/sysroot # https://github.com/android-ndk/ndk/issues/884 +# XXX: wait, but don't I want this? look again! more += -fno-addrsig include $(pwd)/kit-android.mk diff --git a/env/target-any.mk b/env/target-any.mk index fee71c7ce..a79cec19c 100644 --- a/env/target-any.mk +++ b/env/target-any.mk @@ -15,10 +15,7 @@ path = $(1) .PHONY: all: -uname-m := $(shell uname -m) -uname-s := $(shell uname -s) -uname-o := $(shell uname -o 2>/dev/null) --include $(pwd)/uname-$(uname-s).mk +include $(pwd)/uname.mk version := $(shell $(pwd)/version.sh) monotonic := $(word 1,$(version)) @@ -48,9 +45,14 @@ archive := qflags += -gfull -Os cflags += -DNDEBUG -cflags += -D_FORTIFY_SOURCE=2 qflags += -fno-omit-frame-pointer +cflags += -D_FORTIFY_SOURCE=2 +# XXX: -fstack-protector-{strong,all} +# XXX: -param=ssp-buffer-size=4 +# XXX: -fsanitize={shado,safe}-stack +# XXX: -fstack-clash-protection + cflags += -D__STDC_CONSTANT_MACROS cflags += -D__STDC_FORMAT_MACROS @@ -69,6 +71,7 @@ cflags += -Wno-misleading-indentation cflags += -Wno-missing-selector-name cflags += -Wno-overloaded-shift-op-parentheses cflags += -Wno-potentially-evaluated-expression +# XXX: cflags += -Wno-shift-op-parentheses cflags += -Wno-tautological-constant-out-of-range-compare cflags += -Wno-tautological-overlap-compare @@ -103,6 +106,8 @@ ifeq ($(machine),) machine := $(default) endif +-include $(pwd)/cpu-$(machine).mk + cflags += -I@/usr/include define depend diff --git a/env/target-apl.mk b/env/target-apl.mk index e3c7264c3..eeac8ecba 100644 --- a/env/target-apl.mk +++ b/env/target-apl.mk @@ -23,6 +23,7 @@ lflags += -Wl,-no_dead_strip_inits_and_terms # libtool qflags += -DPIC qflags += -fPIC +lflags += -fPIC signature := /_CodeSignature/CodeResources diff --git a/env/target-elf.mk b/env/target-elf.mk index bb474ee15..52d26a627 100644 --- a/env/target-elf.mk +++ b/env/target-elf.mk @@ -10,8 +10,30 @@ include $(pwd)/target-lld.mk -#lflags += -Wl,-error-limit=0 + lflags += -Wl,--build-id=none -lflags += -Wl,-z,relro +lflags += -Wl,-z,noexecstack + lflags += -Wl,--no-undefined +lflags += -Wl,-z,defs +lflags += -Wl,--no-copy-dt-needed-entries + +ifeq ($(target),and) +# XXX: this is wrong in the general case +# I need a separate build for this... :( qflags += -fpic +lflags += -fpic +else +qflags += -fpie +lflags += -fpie +endif + +qflags += -fno-plt +lflags += -fno-plt + +lflags += -Wl,-z,relro +lflags += -Wl,-z,now + +# https://maskray.me/blog/2021-01-09-copy-relocations-canonical-plt-entries-and-protected +qflags += -fno-semantic-interposition +qflags += -fdirect-access-external-data diff --git a/env/target-lld.mk b/env/target-lld.mk index 978f99cb7..bd1c399f6 100644 --- a/env/target-lld.mk +++ b/env/target-lld.mk @@ -13,5 +13,6 @@ include $(pwd)/target-gnu.mk wflags += -fuse-ld=lld lflags += -Wl,--icf=all +lflags += -Wl,--error-limit=0 export LLD_VERSION := Linker: LLD diff --git a/env/target-lnx.mk b/env/target-lnx.mk index cd1758ba2..3b336de50 100644 --- a/env/target-lnx.mk +++ b/env/target-lnx.mk @@ -24,6 +24,7 @@ archs += i386 openssl/i386 := linux-x86 host/i386 := i386-linux-$(libc) triple/i386 := i686-unknown-linux-$(libc) +format/i386 := elf32-i386 meson/i386 := x86 bits/i386 := 32 centos/i386 := i686 @@ -32,6 +33,7 @@ archs += x86_64 openssl/x86_64 := linux-x86_64 host/x86_64 := x86_64-linux-$(libc) triple/x86_64 := x86_64-unknown-linux-$(libc) +format/x86_64 := elf64-x86-64 meson/x86_64 := x86_64 bits/x86_64 := 64 centos/x86_64 := x86_64 @@ -40,6 +42,7 @@ archs += arm64 openssl/arm64 := linux-aarch64 host/arm64 := aarch64-linux-$(libc) triple/arm64 := aarch64-unknown-linux-$(libc) +format/arm64 := elf64-littleaarch64 meson/arm64 := aarch64 bits/arm64 := 64 @@ -47,6 +50,7 @@ archs += armhf openssl/armhf := linux-armv4 host/armhf := arm-linux-$(libc)eabihf triple/armhf := arm-unknown-linux-$(libc)eabihf +format/armhf := elf32-littlearm meson/armhf := arm bits/armhf := 32 @@ -54,6 +58,7 @@ archs += mips openssl/mips := linux-mips32 host/mips := mips-linux-$(libc) triple/mips := mips-unknown-linux-$(libc) +format/mips := elf32-tradlittlemips meson/mips := mips bits/mips := 32 diff --git a/env/uname-Darwin.mk b/env/uname-Darwin.mk index 63d979b96..459ab92b9 100644 --- a/env/uname-Darwin.mk +++ b/env/uname-Darwin.mk @@ -16,3 +16,6 @@ endif prebuilt := darwin-x86_64 export PATH := /usr/local/opt/gettext/bin:$(PATH) + +export PATH := $(PATH):/usr/local/opt/binutils/bin +objcopy = objcopy diff --git a/env/uname.mk b/env/uname.mk new file mode 100644 index 000000000..cb30db1c4 --- /dev/null +++ b/env/uname.mk @@ -0,0 +1,18 @@ +# Cycc/Cympile - Shared Build Scripts for Make +# Copyright (C) 2013-2020 Jay Freeman (saurik) + +# Zero Clause BSD license {{{ +# +# Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# }}} + + +uname-m := $(shell uname -m) +uname-s := $(shell uname -s) +uname-o := $(shell uname -o 2>/dev/null) + +objcopy = $(host/$*)-objcopy + +-include $(pwd)/uname-$(uname-s).mk diff --git a/srv-daemon/makefile b/srv-daemon/makefile index ab218335e..63b5d7c11 100644 --- a/srv-daemon/makefile +++ b/srv-daemon/makefile @@ -64,14 +64,9 @@ endif kernel/$(output)/$(machine)/kernel: force $(MAKE) -C kernel target=lnx machine=$(machine) -$(output)/embed: env/embed.c - clang -o $@ $< - -$(output)/$(machine)/extra/kernel.xxd: kernel/$(output)/$(machine)/kernel $(output)/embed - @mkdir -p $(dir $@) - $(output)/embed kernel $< >$@ - -$(call depend,$(pwd)/source/kernel.cpp.o,$(output)/$(machine)/extra/kernel.xxd) +$(output)/%/kernel.o: kernel/$(output)/%/kernel + cd $(dir $<) && $(objcopy/$*) -O $(format/$*) -I binary $(notdir $<) $(CURDIR)/$@ --set-section-alignment '.data=4096' --set-section-flags '.data=alloc,load,readonly,data' +linked += kernel.o include env/output.mk diff --git a/srv-daemon/source/engine.cpp b/srv-daemon/source/engine.cpp index 5df6360bd..ee1e858ea 100644 --- a/srv-daemon/source/engine.cpp +++ b/srv-daemon/source/engine.cpp @@ -32,14 +32,26 @@ #include +#if 0 +#elif defined(__aarch64__) +//#include +#elif defined(__x86_64__) +#else +#error +#endif + #include "buffer.hpp" -#include "kernel.hpp" #include "scope.hpp" #include "syscall.hpp" #include "time.hpp" #include "load.hpp" +extern const unsigned char _binary_kernel_start[]; + +// XXX: move this somewhere and maybe find a library +namespace gsl { template using owner = Type_; } + // NOLINTBEGIN(cppcoreguidelines-pro-type-vararg) namespace orc { @@ -56,81 +68,246 @@ int Engine() { const Fd zygote(memfd_create("zygote", MFD_CLOEXEC)); - const size_t physical(64*megabyte); - orc_syscall(ftruncate(zygote, physical)); + const size_t arena(512*megabyte); + orc_syscall(ftruncate(zygote, arena)); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast,performance-no-int-to-ptr) + const auto memory(reinterpret_cast(orc_syscall(mmap(nullptr, arena, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED, zygote, 0)))); + //madvise(memory, arena, MADV_MERGEABLE); + + + const auto [limit, entry] = [&]() { + const auto ident(reinterpret_cast(_binary_kernel_start)); + orc_assert(memcmp(ident, ELFMAG, SELFMAG) == 0); + orc_assert(ident[EI_DATA] == ELFDATA2LSB); + orc_assert(ident[EI_VERSION] == EV_CURRENT); + orc_assert(ident[EI_OSABI] == ELFOSABI_NONE); + + const auto load([&]() -> std::tuple { + const auto &header(*reinterpret_cast(_binary_kernel_start)); + const auto commands(reinterpret_cast(_binary_kernel_start + header.e_phoff)); + + uintptr_t offset(0); + uintptr_t limit(0); + uintptr_t entry(-1); + + for (size_t i(0); i != header.e_phnum; ++i) + if (const auto &command(commands[i]); command.p_type == PT_LOAD) { + orc_assert_(limit == 0, "non-final zero-initialized segment"); + if (command.p_filesz == 0) + limit = command.p_vaddr - offset; + else { + orc_assert_(command.p_filesz == command.p_memsz, "non-fully zero-initialized segment"); + const uintptr_t current(command.p_vaddr - command.p_offset); + if (offset == 0) + offset = current; + else + orc_assert_(offset == current, "inconsistent segment offset for mapping"); + } + + if (header.e_entry >= command.p_vaddr && header.e_entry < command.p_vaddr + command.p_memsz) + entry = header.e_entry - command.p_vaddr + command.p_offset; + } - const Fd kvm(orc_syscall(open("/dev/kvm", O_RDWR | O_CLOEXEC))); - orc_assert(orc_syscall(ioctl(kvm, KVM_GET_API_VERSION, nullptr)) == 12); - const auto size(orc_syscall(ioctl(kvm, KVM_GET_VCPU_MMAP_SIZE, nullptr))); + orc_assert_(limit != 0, "unable to determine block starting point"); + orc_assert_(entry != -1, "entrypoint outside of kernel section"); - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast,performance-no-int-to-ptr) - const auto memory(reinterpret_cast(orc_syscall(mmap(nullptr, physical, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED, zygote, 0)))); - //madvise(memory, physical, MADV_MERGEABLE); + return {(limit + 0xfffull) & ~0xfffull, entry}; + }); + + switch (const auto value = ident[EI_CLASS]) { + #define caseELF(bits) case ELFCLASS##bits: \ + return load.operator ()(); + caseELF(32) caseELF(64) default: + orc_assert_(false, "unknown EI_CLASS " << unsigned(value)); + } + }(); - const auto &header(*reinterpret_cast(kernel_data)); - const uint64_t entry(header.e_entry); - const auto commands(reinterpret_cast(kernel_data + header.e_phoff)); - for (size_t i(0); i != header.e_phnum; ++i) - if (const auto &command(commands[i]); command.p_type == PT_LOAD) - memcpy(memory + command.p_paddr, kernel_data + command.p_offset, command.p_filesz); + std::cout << "limit: " << std::hex << limit << std::endl; + std::cout << "entry: " << std::hex << entry << std::endl; + const Fd kvm(orc_syscall(open("/dev/kvm", O_RDWR | O_CLOEXEC))); + orc_assert(orc_syscall(ioctl(kvm, KVM_GET_API_VERSION, nullptr)) == 12); + const auto size(orc_syscall(ioctl(kvm, KVM_GET_VCPU_MMAP_SIZE, nullptr))); + const auto vm(orc_syscall(ioctl(kvm, KVM_CREATE_VM, 0))); const auto cpu(orc_syscall(ioctl(vm, KVM_CREATE_VCPU, 0))); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast,performance-no-int-to-ptr) const auto run(static_cast(orc_syscall(mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, cpu, 0)))); +#if 0 +#elif defined(__aarch64__) + struct kvm_vcpu_init cpuid{}; + cpuid.target = KVM_ARM_TARGET_GENERIC_V8; +#elif defined(__x86_64__) + // XXX: implement allocation loop using cpuid: label + const decltype(std::declval()->nent) cpuids(128); + // NOLINTBEGIN(cppcoreguidelines-no-malloc,cppcoreguidelines-owning-memory) + const auto cpuid(static_cast(malloc(sizeof(struct kvm_cpuid2) + cpuids * sizeof(struct kvm_cpuid_entry2)))); + _scope({ free(cpuid); }); + // NOLINTEND(cppcoreguidelines-no-malloc,cppcoreguidelines-owning-memory) + cpuid->nent = cpuids; + orc_syscall(ioctl(kvm, KVM_GET_SUPPORTED_CPUID, cpuid)); + + for (decltype(cpuid->nent) i(0); i != cpuid->nent; ++i) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-constant-array-index) + auto &entry(cpuid->entries[i]); + switch (entry.function) { + // supports long mode + case 0x80000001: + orc_assert((entry.edx & (1 << 29)) != 0); + break; + } + } +#else +#error +#endif + +#if 0 +#elif defined(__aarch64__) + orc_syscall(ioctl(cpu, KVM_ARM_VCPU_INIT, &cpuid)); +#elif defined(__x86_64__) + orc_syscall(ioctl(cpu, KVM_SET_CPUID2, cpuid)); +#else +#error +#endif - struct kvm_userspace_memory_region region = {}; - region.slot = 0; - region.flags = 0; - region.guest_phys_addr = 0x0; - region.memory_size = physical; - region.userspace_addr = reinterpret_cast(memory); - orc_syscall(ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion)); -#ifdef __aarch64__ - struct kvm_vcpu_init init = {}; - init.target = KVM_ARM_TARGET_GENERIC_V8; - orc_syscall(ioctl(cpu, KVM_ARM_VCPU_INIT, &init)); +#if 0 +#elif defined(__aarch64__) + const uintptr_t base(0x40000000); +#elif defined(__x86_64__) + const uintptr_t base(0x00000000); +#else +#error +#endif - struct kvm_one_reg reg = {}; - uintptr_t value; - reg.addr = reinterpret_cast(&value); + // kASLR is a sham and it has never stopped me from hacking anything + // https://forums.grsecurity.net/viewtopic.php?f=7&t=3367 + const auto slide(base + 0x200000); + + { + struct kvm_userspace_memory_region region{}; + region.slot = 0; + region.flags = KVM_MEM_READONLY; + region.guest_phys_addr = slide; + region.memory_size = limit; + region.userspace_addr = reinterpret_cast(_binary_kernel_start); + orc_syscall(ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion)); + } - reg.id = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc); - value = entry; - orc_syscall(ioctl(cpu, KVM_SET_ONE_REG, ®)); + { + // XXX: memfd memslot https://lwn.net/Articles/879370/ + struct kvm_userspace_memory_region region{}; + region.slot = 1; + region.flags = 0; + region.guest_phys_addr = slide + limit; + region.memory_size = arena; + region.userspace_addr = reinterpret_cast(memory); + orc_syscall(ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion)); + } - reg.id = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1); - value = 2 * megabyte; - orc_syscall(ioctl(cpu, KVM_SET_ONE_REG, ®)); -#endif -#ifdef __x86_64__ - (void) entry; + { +#if 0 +#elif defined(__aarch64__) + struct kvm_one_reg reg{}; + uintptr_t value; + reg.addr = reinterpret_cast(&value); + + reg.id = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc); + value = slide + entry; + orc_syscall(ioctl(cpu, KVM_SET_ONE_REG, ®)); +#elif defined(__x86_64__) + struct kvm_sregs sregs{}; + orc_syscall(ioctl(cpu, KVM_GET_SREGS, &sregs)); + + struct kvm_segment seg{ + .base = 0, .limit = 0xffffffff, + .selector = 1 << 3, .type = 0xb, + .present = 1, .dpl = 0, .db = 1, + .s = 1, .l = 0, .g = 1, + }; + + sregs.cs = seg; + seg.selector = 2 << 3; seg.type = 0x3; + sregs.ds = seg; sregs.es = seg; + sregs.fs = seg; sregs.gs = seg; + sregs.ss = seg; + + sregs.cr0 |= 0x1; + orc_syscall(ioctl(cpu, KVM_SET_SREGS, &sregs)); + + struct kvm_regs regs{}; + orc_syscall(ioctl(cpu, KVM_GET_REGS, ®s)); + regs.rip = regs.rcx = slide + entry; + regs.rflags = 0x2; + orc_syscall(ioctl(cpu, KVM_SET_REGS, ®s)); +#else +#error #endif + } + + const auto dump([&]() { #if 0 - struct kvm_regs regs = {}; - orc_syscall(ioctl(cpu, KVM_GET_REGS, ®s)); - std::cout << std::hex; - for (size_t i(0); i != 34; ++i) - std::cout << regs.regs.regs[i] << std::endl; - orc_syscall(ioctl(cpu, KVM_SET_REGS, ®s)); +#elif defined(__aarch64__) + std::vector regs; + regs.resize(1024); + regs[0] = regs.size() - 1; + orc_syscall(ioctl(cpu, KVM_GET_REG_LIST, regs.data())); + + //v=0xc290; z=[14,11,7,3,0]; [(v>>b)&((1<<(a-b))-1) for a,b in zip(([16]+z)[:-1],z)] + + struct kvm_one_reg reg{}; + uintptr_t value; + reg.addr = reinterpret_cast(&value); + + for (size_t i(0); i != regs[0]; ++i) { + reg.id = regs[i+1]; + if ((reg.id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) + continue; + orc_syscall(ioctl(cpu, KVM_GET_ONE_REG, ®)); + std::cout << std::hex << reg.id << " " << value << std::endl; + } +#elif defined(__x86_64__) + struct kvm_regs regs{}; + orc_syscall(ioctl(cpu, KVM_GET_REGS, ®s)); + std::cout << std::hex; + std::cout << "ax " << regs.rax << std::endl; + std::cout << "bx " << regs.rbx << std::endl; + std::cout << "cx " << regs.rcx << std::endl; + std::cout << "dx " << regs.rdx << std::endl; + std::cout << "si " << regs.rsi << std::endl; + std::cout << "di " << regs.rdi << std::endl; + std::cout << "sp " << regs.rsp << std::endl; + std::cout << "bp " << regs.rbp << std::endl; + std::cout << "r8 " << regs.r8 << std::endl; + std::cout << "r9 " << regs.r9 << std::endl; + std::cout << "rA " << regs.r10 << std::endl; + std::cout << "rB " << regs.r11 << std::endl; + std::cout << "rC " << regs.r12 << std::endl; + std::cout << "rD " << regs.r13 << std::endl; + std::cout << "rE " << regs.r14 << std::endl; + std::cout << "rF " << regs.r15 << std::endl; + std::cout << "ip " << regs.rip << std::endl; + std::cout << "fl " << regs.rflags << std::endl; #endif + std::cout << std::endl; + }); (void) dump; + + const auto buffer(reinterpret_cast(memory)); - const auto buffer(reinterpret_cast(memory + megabyte)); + if (false) { + struct kvm_guest_debug debug{}; + debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP; + orc_syscall(ioctl(cpu, KVM_SET_GUEST_DEBUG, &debug)); + } for (;;) { -#ifdef __aarch64__ - reg.id = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc); - orc_syscall(ioctl(cpu, KVM_GET_ONE_REG, ®)); - std::cerr << std::hex << "KVM_RUN(0x" << value << ")" << std::endl; -#endif + dump(); - //__asm__ volatile ("dc civac, %0" : : "r" (buffer) : "memory"); orc_syscall(ioctl(cpu, KVM_RUN, nullptr), EINTR); switch (run->exit_reason) { case KVM_EXIT_INTR: @@ -138,10 +315,20 @@ int Engine() { break; case KVM_EXIT_MMIO: { - orc_assert_(run->mmio.phys_addr == 0x10000000, "mmio: 0x" << std::hex << run->mmio.phys_addr); + dump(); + orc_assert_(run->mmio.phys_addr == base + 0x1000, "mmio: 0x" << std::hex << run->mmio.phys_addr); orc_assert(run->mmio.is_write); orc_assert(run->mmio.len == sizeof(uintptr_t)); switch (const auto command = *reinterpret_cast(run->mmio.data)) { + case 0: { + struct kvm_guest_debug debug{}; + debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP; + orc_syscall(ioctl(cpu, KVM_SET_GUEST_DEBUG, &debug)); + } break; + case 1: { + std::cout << "abort()" << std::endl; + return 0; + } break; case 2: { std::cout << buffer << std::flush; } break; @@ -151,7 +338,37 @@ int Engine() { } } break; - default: orc_insist(false); + case KVM_EXIT_DEBUG: { +#ifdef __aarch64__ +#if 0 + const auto esr(run->debug.arch.hsr >> 26); + const auto iss(run->debug.arch.hsr >> 26); +#endif + //ESR_ELx_EC_SOFTSTP_LOW; + //ESR_ELx_EC_SOFTSTP_CUR; + //KVM_DEBUG_ARCH_HSR_HIGH_VALID && hsr_high + //std::cout << std::hex << run->debug.arch.hsr << " " << run->debug.arch.far << std::endl; +#endif + } break; + + case KVM_EXIT_IO: { + std::cout << run->io.direction << std::endl; + } break; + + case KVM_EXIT_HLT: + std::cout << "HLT" << std::endl; + return 0; + case KVM_EXIT_SHUTDOWN: + dump(); + std::cout << "SHUTDOWN" << std::endl; + return 0; + case KVM_EXIT_FAIL_ENTRY: + std::cout << "FAIL_ENTRY" << std::endl; + return 0; + + case KVM_EXIT_INTERNAL_ERROR: + orc_insist_(false, "kvm internal error"); + default: orc_insist_(false, "unknown reason: " << run->exit_reason); } } diff --git a/srv-daemon/source/kernel.cpp b/srv-daemon/source/kernel.cpp deleted file mode 100644 index 34a545a29..000000000 --- a/srv-daemon/source/kernel.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* Orchid - WebRTC P2P VPN Market (on Ethereum) - * Copyright (C) 2017-2020 The Orchid Authors -*/ - -/* GNU Affero General Public License, Version 3 {{{ */ -/* - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . -**/ -/* }}} */ - - -#include "kernel.hpp" - -namespace orc { -__attribute__((__aligned__(16*1024))) -#include "kernel.xxd" -} diff --git a/srv-daemon/source/kernel.hpp b/srv-daemon/source/kernel.hpp deleted file mode 100644 index 58d5b9e26..000000000 --- a/srv-daemon/source/kernel.hpp +++ /dev/null @@ -1,32 +0,0 @@ -/* Orchid - WebRTC P2P VPN Market (on Ethereum) - * Copyright (C) 2017-2020 The Orchid Authors -*/ - -/* GNU Affero General Public License, Version 3 {{{ */ -/* - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . -**/ -/* }}} */ - - -#ifndef ORCHID_KERNEL_HPP -#define ORCHID_KERNEL_HPP - -namespace orc { -extern const unsigned char kernel_data[]; -extern const unsigned int kernel_size; -} - -#endif//ORCHID_KERNEL_HPP - diff --git a/srv-kernel/makefile b/srv-kernel/makefile index 6cae787f9..3707f6c7b 100644 --- a/srv-kernel/makefile +++ b/srv-kernel/makefile @@ -41,42 +41,74 @@ test: $(binary) source += $(wildcard $(pwd)/source/*.cpp) source += $(wildcard $(pwd)/source/*.c) -cflags += -I$(pwd)/source - +checks/$(pwd)/source/main.cpp += -cppcoreguidelines-pro-type-vararg lflags += -nostdlib -lflags += -static lflags := $(filter-out -l%,$(lflags)) lflags := $(filter-out -pthread,$(lflags)) +lflags += -static + xflags += -fno-exceptions -cflags += -fno-plt -lflags += -fno-plt +xflags += -fno-asynchronous-unwind-tables + +qflags += -ffreestanding cflags := $(filter-out -D_FORTIFY_SOURCE=%,$(cflags)) +vaddr := 0xffffffffc0200000 +ifeq ($(machine),arm64) +paddr := 0x0000000040200000 +else +paddr := 0x0000000000200000 +endif + +lflags += -Wl,--no-rosegment +lflags += -Wl,--image-base=$(vaddr) +oflags += --change-section-lma "*"-$$(($(vaddr)-$(paddr))) + +ifeq ($(machine),x86_64) +# https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models +qflags += -mcmodel=kernel +lflags += -mcmodel=kernel +else +# XXX: consider using large +qflags += -mcmodel=small +lflags += -mcmodel=small +endif + +# XXX: this breaks some x86 headers and has no affect on aarch64 +nofp result +#qflags += -mgeneral-regs-only + ifeq ($(machine),arm64) # https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md -cflags += -march=armv8-a+nofp +qflags += -march=armv8-a+nofp cflags += -mno-unaligned-access #-Wl,-z,max-page-size=0x100 endif -worker/$(output)/$(machine)/worker: force - $(MAKE) -C worker target=lnx machine=$(machine) +ifeq ($(machine),x86_64) +qflags += -mno-red-zone +qflags += -mno-sse +cflags/$(pwd)/source/paging.cpp += -m32 +cflags/$(pwd)/source/paging.cpp += -g0 +oflags/$(pwd)/source/paging.cpp += -O elf64-x86-64 +endif -$(output)/embed: env/embed.c - clang -o $@ $< +cflags/$(pwd)/source/paging.cpp += -fomit-frame-pointer -$(output)/$(machine)/extra/worker.xxd: worker/$(output)/$(machine)/worker $(output)/embed - @mkdir -p $(dir $@) - $(output)/embed worker $< >$@ +worker/$(output)/$(machine)/worker: force + $(MAKE) -C worker target=lnx machine=$(machine) -$(call depend,$(pwd)/source/worker.cpp.o,$(output)/$(machine)/extra/worker.xxd) +$(output)/%/worker.o: worker/$(output)/%/worker + cd $(dir $<) && $(objcopy/$*) -O $(format/$*) -I binary $(notdir $<) $(CURDIR)/$@ --set-section-alignment '.data=4096' --set-section-flags '.data=alloc,load,readonly,data' +linked += worker.o include env/output.mk $(output)/%/kernel$(exe): $(patsubst %,$(output)/$$*/%,$(object) $(linked)) @mkdir -p $(dir $@) @echo [LD] $@ - @$(cxx) $(more/$*) $(wflags) -o $@ $(bflags) $^ $(lflags) -Wl,--color-diagnostics=always -fcolor-diagnostics 2>&1 | nl + @$(cxx) $(more/$*) $(wflags) -o $@- $(bflags) $^ $(lflags) -Wl,--color-diagnostics=always -fcolor-diagnostics 2>&1 | nl + @$(objcopy) $(oflags) $@- $@ + @test "$$($(objdump/$*) -x $@ | sed -e '/^.*filesz /!d;s///;/ .w.$$/!d;/^0x0000000000000000 /d' | tee >(cat 1>&2))" = "" @openssl sha256 -r $@ @ls -la $@ diff --git a/srv-kernel/qemu-amd64.sh b/srv-kernel/qemu-amd64.sh new file mode 100755 index 000000000..e09b7625d --- /dev/null +++ b/srv-kernel/qemu-amd64.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -e + +(cd qemu && make -kj12) +make -kj12 target=lnx machine=x86_64 + +kernel=out-lnx-26.2/x86_64/kernel + + #-M microvm,pit=off,pic=off,isa-serial=off,rtc=off \ + +qemu/build/qemu-system-x86_64 -kernel "${kernel}" \ + -M microvm \ + -m 128m -nic none -nodefaults -display none -nic none \ + -nographic -monitor unix:qemu.sock,server,nowait \ + -no-user-config -d in_asm,op,mmu,cpu_reset,int,cpu + + #-chardev stdio,id=virtiocon0 + #-device virtio-serial-device + #-device virtconsole,chardev=virtiocon0 + #-drive id=test,file=test.img,format=raw,if=none + #-device virtio-blk-device,drive=test + #-netdev tap,id=tap0,script=no,downscript=no + #-device virtio-net-device,netdev=tap0 diff --git a/srv-kernel/qemu-arm64.sh b/srv-kernel/qemu-arm64.sh new file mode 100755 index 000000000..adbb8d37e --- /dev/null +++ b/srv-kernel/qemu-arm64.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -e + +(cd qemu && make -kj12) +make -kj12 target=lnx machine=arm64 + +kernel=out-lnx-26.2/arm64/kernel + +qemu/build/qemu-system-aarch64 -kernel "${kernel}" \ + -M virt -cpu cortex-a53 \ + -m 128m -nic none -nodefaults -display none -nic none \ + -nographic -monitor unix:qemu.sock,server,nowait \ + -no-user-config -d in_asm,op,mmu,cpu_reset,int,cpu diff --git a/srv-kernel/source/main.cpp b/srv-kernel/source/main.cpp index a19f602c5..aabbb8a65 100644 --- a/srv-kernel/source/main.cpp +++ b/srv-kernel/source/main.cpp @@ -20,41 +20,109 @@ /* }}} */ -#include -#include +#ifdef __x86_64__ +#include +#endif +#include + +#include + +#include +#include #include -#include "scope.hpp" -#include "worker.hpp" +#include +#include + +#include "paging.hpp" -// NOLINTBEGIN(performance-no-int-to-ptr) namespace orc { -static const auto control_(reinterpret_cast(0x10000000)); +// assembly helpers {{{ +#if 0 +#elif defined(__aarch64__) +#define Clear(code) \ + __asm__ volatile ("dc cvac, %0" : : "r" (code) : "memory") +#elif defined(__x86_64__) || defined(__i386__) +#define Clear(code) +#endif + +#if defined(__x86_64__) +inline void WriteMSR(uint32_t msr, uint32_t hi, uint32_t lo) { + __asm__ volatile ("wrmsr" : : "c" (msr), "d" (hi), "a" (lo)); } +inline void WriteMSR(uint32_t msr, uintptr_t value) { + WriteMSR(msr, value >> 32, static_cast(value)); } +template +inline void WriteMSR(uint32_t msr, Type_ *value) { + WriteMSR(msr, reinterpret_cast(value)); } +#endif +// }}} + +struct Page { + Page *next_; + uint64_t zero_[(1 << (pagebits_ - wordbits_)) - 1]; +}; + +static_assert(sizeof(Page) == kilopage_); + +struct State { + char buffer_[kilopage_]; + + uint8_t stack_[kilopage_]; + Table tables_[4]; + + Page *next_, *more_; + uintptr_t stop_; + const void *syscalls_[500]; +} __attribute__((__aligned__(kilopage_))); +// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global) +State state_; + +static_assert((sizeof(State) % kilopage_) == 0); + +uint64_t Align(uint64_t address) { + return (address + kilopage_ - 1) & ~uint64_t(kilopage_ - 1); +} + +const auto control_(static_cast(KofP(boundary_ + kilopage_))); + +} #pragma clang diagnostic push #pragma clang diagnostic ignored "-Winvalid-noreturn" -__attribute__ ((__noreturn__)) -void abort() { +__attribute__((__noreturn__)) +extern "C" void abort() { + // NOLINTNEXTLINE(google-build-using-namespace) + using namespace orc; *control_ = 1; } #pragma clang diagnostic pop +// memory api {{{ +// NOLINTBEGIN(readability-inconsistent-declaration-parameter-name) +extern "C" void *memset(void *data, int value, size_t size) { + for (size_t i(0); i != size; ++i) + reinterpret_cast(data)[i] = char(value); + return data; +} -void *memcpy(void *dest, const void *src, size_t size) { +extern "C" void *memcpy(void *dest, const void *src, size_t size) { for (size_t i(0); i != size; ++i) - reinterpret_cast(dest)[i] = reinterpret_cast(src)[i]; + reinterpret_cast(dest)[i] = reinterpret_cast(src)[i]; return dest; } -size_t strlen(const char *data) { +extern "C" size_t strlen(const char *data) { for (size_t size(0);; ++size) if (data[size] == '\0') return size; } +// NOLINTEND(readability-inconsistent-declaration-parameter-name) +// }}} -// snprintf {{{ +#if 1 +// printf {{{ namespace { char *itoa_(uintmax_t value, char *data, unsigned int base, bool upper) { if (value == 0) @@ -69,7 +137,7 @@ char *itoa_(uintmax_t value, char *data, unsigned int base, bool upper) { } } // NOLINTBEGIN(cppcoreguidelines-pro-type-vararg) -size_t vsnprintf(char *str, size_t max, const char *format, va_list args) { +extern "C" size_t vsnprintf(char *str, size_t max, const char *format, va_list args) { auto end(str); size_t length(0); @@ -149,147 +217,804 @@ size_t vsnprintf(char *str, size_t max, const char *format, va_list args) { // NOLINTEND(cppcoreguidelines-pro-type-vararg) // NOLINTBEGIN(cppcoreguidelines-pro-type-vararg,cert-dcl50-cpp) -size_t snprintf(char *str, size_t size, const char *format, ...) { +extern "C" size_t snprintf(char *str, size_t size, const char *format, ...) { va_list args; va_start(args, format); auto value(vsnprintf(str, size, format, args)); va_end(args); return value; } // NOLINTEND(cppcoreguidelines-pro-type-vararg,cert-dcl50-cpp) -// }}} - -static const size_t kibibyte_(1024); -static const size_t mebibyte_(1024*kibibyte_); -//static const size_t gibibyte_(1024*mebibyte_); // NOLINTBEGIN(cppcoreguidelines-pro-type-vararg,cert-dcl50-cpp) -__attribute__ ((__format__ (__printf__, 1, 2))) -size_t printf(const char *format, ...) { - const auto buffer(reinterpret_cast(mebibyte_)); +__attribute__((__format__ (__printf__, 1, 2))) +extern "C" size_t printf(const char *format, ...) { + // NOLINTNEXTLINE(google-build-using-namespace) + using namespace orc; va_list args; va_start(args, format); - auto value(vsnprintf(buffer, mebibyte_, format, args)); + auto value(vsnprintf(state_.buffer_, kilopage_, format, args)); va_end(args); -#ifdef __aarch64__ - __asm__ volatile ("dc civac, %0" : : "r" (buffer) : "memory"); -#endif + Clear(state_.buffer_); *control_ = 2; return value; } // NOLINTEND(cppcoreguidelines-pro-type-vararg,cert-dcl50-cpp) +// }}} +#else +#define printf(format, ...) do {} while(false) +#endif +// debug macros {{{ +#define orc_assert(code) do if (!(code)) { \ + printf("orc_assert(%s) @ %s:%u\n", #code, __FILE__, __LINE__); \ + abort(); \ +} while (false) + +#define orc_syscall(expr, ...) ({ \ + auto _value(expr); \ + orc_assert((long) _value != -1); \ +_value; }) + +#define orc_trace() do { \ + printf("orc_trace() @ %s:%u\n", __FILE__, __LINE__); \ +} while (false) +// }}} +// NOLINTBEGIN(performance-no-int-to-ptr) +namespace orc { + +// alloc/free {{{ template Type_ *palloc() { - return nullptr; -} + if (state_.next_ != nullptr) { + const auto page(state_.next_); + state_.next_ = page->next_; + memset(page, 0, sizeof(Page)); + return reinterpret_cast(page); + } -void free(void *page) { + return state_.more_++; } -static const auto PKoffset_(uintptr_t(0x0) - mebibyte_); -uintptr_t PofK(void *address) { return reinterpret_cast(address) - PKoffset_; } -void *KofP(uintptr_t physical) { return reinterpret_cast(physical + PKoffset_); } - - -static const size_t wordbits_(3); -static_assert(1 << wordbits_ == sizeof(uintptr_t)); - -static const size_t pagebits_(12); -static const size_t pagesize_(1 << pagebits_); - -static const size_t indxbits_(pagebits_ - wordbits_); -typedef uintptr_t Table[1 << indxbits_]; - -#define MASK(bits) ((uintptr_t(1) << bits) - 1) +void free(void *data) { + orc_assert((reinterpret_cast(data) & MASK(pagebits_)) == 0); + const auto page(reinterpret_cast(data)); + page->next_ = state_.next_; + state_.next_ = page; +} +// }}} +// exceptions {{{ +void backtrace(uintptr_t lr, const uintptr_t *fp) { +#if 1 && defined(__aarch64__) + for (;;) { + printf("lr = 0x%lx fp = %p\n", lr, fp); + if (fp == nullptr) break; + lr = fp[1]; + fp = reinterpret_cast(fp[0]); + } +#endif +} +extern "C" long enosys(uintptr_t nr, uintptr_t lr, uintptr_t fp) { + printf("enosys(%lu, 0x%lx, 0x%lx)\n", nr, lr, fp); + backtrace(lr, reinterpret_cast(fp)); + abort(); + return -ENOSYS; +} +// }}} +// page fault {{{ #if defined(__aarch64__) +extern "C" uintptr_t efault(uintptr_t sr, uintptr_t lr, uintptr_t fp, uintptr_t x0) { + // https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/ESR-EL1--Exception-Syndrome-Register--EL1-?lang=en#fieldset_0-24_0_10 + printf("efault(0x%lx, 0x%lx, 0x%lx, 0x%lx)\n", sr, lr, fp, x0); + backtrace(lr, reinterpret_cast(fp)); + abort(); + return x0; +} +#endif +// }}} +// page table {{{ +template +[[nodiscard]] bool Scan_(uintptr_t &entry, uintptr_t address, const auto &code) { + //printf("Scan_<%s, %ld>(%p = %lx, 0x%lx)\n", Full_ ? "true" : "false", Level_, &entry, entry, address << pagebits_); + static_assert(Level_ <= 4); + if constexpr (Level_ == 0) + return (Full_ || entry != 0) && code(address << pagebits_, entry); + else { + if constexpr (Level_ != 4) if (!isE(entry)) + if constexpr (Full_) { + entry = TofP(PofK(palloc())); + Clear(&entry); + } else return false; + + auto index((address >> (indxbits_ * (Level_ - 1))) & MASK(indxbits_)); + auto &table(*KofP(PofE(entry))); + if (Scan_(table[index], address, code)) + return true; + address = address & ~MASK(indxbits_ * Level_); + + const auto limit(1 << (indxbits_ +#if defined(__x86_64__) + - (Level_ == 4 ? 1 : 0) +#endif + )); -bool Present(uintptr_t entry) { return (entry & MASK(1)) != 0; } -uintptr_t PofE(uintptr_t entry) { return entry & ~MASK(pagebits_); } + for (++index; index != limit; ++index) + if (Scan_(table[index], address + (index << (indxbits_ * (Level_ - 1))), code)) + return true; + return false; + } +} + +template +[[nodiscard]] bool Scan(uintptr_t base, const Code_ &code) { + orc_assert((base & MASK(pagebits_)) == 0); + orc_assert(intptr_t(base) >= 0); + uintptr_t table; #if 0 #elif defined(__aarch64__) -uintptr_t EofP(uintptr_t physical) { return physical | uintptr_t(0x3); } + __asm__ volatile ("mrs %0, ttbr0_el1" : "=r" (table)); +#elif defined(__x86_64__) + __asm__ volatile ("mov %%cr3, %0" : "=r" (table)); #else #error #endif -template -struct Remap { static void _(uintptr_t &entry, uintptr_t address, const auto &code) { - if constexpr (Level_ != 4) if (!Present(entry)) entry = EofP(PofK(palloc())); - const auto index((address >> (indxbits_ * (Level_ - 1))) & MASK(indxbits_)); - return Remap::_((*static_cast
(KofP(PofE(entry))))[index], address, code); -} }; + return Scan_(table, base >> pagebits_, code); +} -template <> -struct Remap<0> { static void _(uintptr_t &entry, uintptr_t address, const auto &code) { - return code(entry, address << pagebits_); -} }; +template +void Scan(uintptr_t base, size_t size, const Code_ &code) { + if (size == 0) return; + orc_assert((size & MASK(pagebits_)) == 0); + orc_assert(base == 0 || size < -base); + + const auto done(Scan(base, [&](uintptr_t page, uintptr_t &entry) { + if (page >= base + size) + return true; + code(page, entry); + + // XXX: this forces page commit + if (entry != 0 && !isE(entry)) { + orc_assert(PofE(entry) == holdpage_); + entry = entry & ~nextmask_ | PofK(palloc()) | 0x1; + } -void remap(uintptr_t base, size_t size, const auto &code) { - if ((base & MASK(pagebits_)) != 0) abort(); - if ((size & MASK(pagebits_)) != 0) abort(); + Clear(&entry); #if 0 #elif defined(__aarch64__) - uintptr_t table; - if (intptr_t(base) >= 0) - __asm__ volatile ("msr ttbr0_el1, %0" : "=r" (table)); - else - __asm__ volatile ("msr ttbr1_el1, %0" : "=r" (table)); + // XXX: https://forum.osdev.org/viewtopic.php?t=36412&p=303237 + // ^ I think that low bit in the TTBR is some part of MMU.IRGN + __asm__ volatile ("dsb ishst"); + __asm__ volatile ("tlbi vae1is, %0" : : "p" (page>>pagebits_)); + __asm__ volatile ("tlbi vae1is, %0" : : "p" (page>>pagebits_ | 1ull<<48)); + __asm__ volatile ("dsb ish"); + __asm__ volatile ("isb"); +#elif defined(__x86_64__) + __asm__ volatile ("invlpg %0" : : "p" (page)); #else #error #endif - Remap<4>::_(table, base >> pagebits_, [&](uintptr_t &entry, uintptr_t page) { - if (Present(entry)) - free(KofP(PofE(entry))); - code(entry, page); + //printf("page 0x%lx now 0x%lx\n", page, entry); + return false; + })); + orc_assert(done); +} +// }}} +// memory map {{{ +constexpr Entry RofF(unsigned flags) { + return RofF((flags & PROT_WRITE) != 0, (flags & PROT_EXEC) != 0); +} + +uintptr_t $mmap(uintptr_t base, size_t size, int prot, int flags, int file, size_t offset) { + //printf("mmap(0x%lx, 0x%lx, %u, %u, %d, %zu)\n", base, size, prot, flags, file, offset); + orc_assert((prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) == 0); + orc_assert((flags & ~(MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_NORESERVE)) == 0); + orc_assert((flags & MAP_PRIVATE) != 0); + orc_assert((flags & MAP_ANONYMOUS) != 0); + orc_assert(file == -1); + orc_assert(offset == 0); + + const auto rest(RofF(prot)); + + if ((flags & MAP_FIXED) == 0) + if (!Scan(base, [&](uintptr_t page, uintptr_t &entry) { + if (page - base >= size) + return true; + base = page + kilopage_; + return false; + })) { + // XXX: verify that base has sufficient size before end of memory + } + + Scan(base, size, [&](uintptr_t page, uintptr_t &entry) { + if (isE(entry)) free(KofP(PofE(entry))); + entry = rest | holdpage_; }); + + return base; } -void mmap(uintptr_t base, size_t size) { - remap(base, size, [](uintptr_t &entry, uintptr_t page) { entry = EofP(PofK(palloc())); }); +long $madvise(uintptr_t base, size_t size, int advice) { + orc_assert(advice == MADV_DONTNEED || advice == MADV_FREE); + + Scan(base, size, [&](uintptr_t page, uintptr_t &entry) { + orc_assert(entry != 0); + if (isE(entry)) free(KofP(PofE(entry))); + entry = entry & restmask_ | holdpage_; + }); + + return 0; } -void munmap(uintptr_t base, size_t size) { - remap(base, size, [](uintptr_t &entry, uintptr_t page) { entry = 0; }); +long $munmap(uintptr_t base, size_t size) { + //printf("munmap(0x%lx, 0x%lx)\n", base, size); + + Scan(base, size, [&](uintptr_t page, uintptr_t &entry) { + if (isE(entry)) free(KofP(PofE(entry))); + entry = 0; + }); + + return 0; +} + +long $mprotect(uintptr_t base, size_t size, int prot) { + const auto rest(RofF(prot)); + + Scan(base, size, [&](uintptr_t page, uintptr_t &entry) { + orc_assert(entry != 0); + entry = entry & ~restmask_ | rest; + }); + + return 0; +} + +uintptr_t $brk(uintptr_t brk) { + //printf("brk(0x%lx) @ 0x%lx\n", brk, state_.stop_); + if (brk == 0) + return state_.stop_; + + const auto before(Align(state_.stop_)); + const auto after(Align(brk)); + // XXX: limit after + + if (before < after) + $mmap(before, after - before, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + else + $munmap(after, before - after); + + // XXX: maybe zero memory within the same page + return state_.stop_ = brk; +} +// }}} + +long $prlimit64(pid_t pid, int resource, const struct rlimit *new_limit, struct rlimit *old_limit) { + orc_assert(pid == 0); + orc_assert(resource == RLIMIT_STACK); + orc_assert(new_limit == nullptr); + + old_limit->rlim_cur = 8192ul*1024ul; + old_limit->rlim_max = RLIM64_INFINITY; + return 0; } +long $exit_group(int status) { + printf("exit_group(0x%x)\n", status); + abort(); +} + +bool isU(const void *data, size_t size) { + return reinterpret_cast(data) >= 0 && + reinterpret_cast(data) + size >= data; +} + +#ifdef __x86_64__ +long $arch_prctl(int code, unsigned long address) { + switch (code) { + case ARCH_SET_FS: + WriteMSR(0xC0000100 /*fsbase*/, address); + return 0; + case 0x3001: // ARCH_CET_STATUS + return -EINVAL; + default: + orc_assert(false); + } +} #endif -extern "C" void _start() { - (void) pagesize_; +size_t $write(int fd, const void *data, size_t size) { + printf("write(%d, %p, %zu)\n", fd, data, size); + orc_assert(fd == 1 || fd == 2); + orc_assert(isU(data, size)); + orc_assert(size <= kilopage_ - 1); + memcpy(state_.buffer_, data, size); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-constant-array-index) + state_.buffer_[size] = '\0'; + Clear(state_.buffer_); + *control_ = 2; + return size; +} +// system dispatcher {{{ +__attribute__((__naked__)) +void service() { + __asm__ volatile ( // dispatch system call table +// XXX: implement frame pointers #if 0 #elif defined(__aarch64__) - uint64_t *table(nullptr); - __asm__ volatile ("msr ttbr0_el1, %0" : "=r" (table)); - - table = reinterpret_cast(mebibyte_ + pagesize_); - - __asm__ volatile ( - "msr ttbr0_el1, %0\n" - "isb\n" - "mrs x0, sctlr_el1\n" - "orr x0, x0, #1\n" - "msr sctlr_el1, x0\n" - "isb\n" - : : "r" (table) : "memory", "x0"); - - const auto &header(*reinterpret_cast(worker_data)); - const uint64_t entry(header.e_entry); - const auto commands(reinterpret_cast(worker_data + header.e_phoff)); +// https://stackoverflow.com/questions/261419/what-registers-to-save-in-the-arm-c-calling-convention +// XXX: consider -ffixed-xX to avoid saving a few temporary registers + R"( + sub sp, sp, #0x90 + + stp x30, x1, [sp, #0x00] + stp x2, x3, [sp, #0x10] + stp x4, x5, [sp, #0x20] + stp x6, x7, [sp, #0x30] + + stp x8, x9, [sp, #0x40] + stp x10, x11, [sp, #0x50] + stp x12, x13, [sp, #0x60] + stp x14, x15, [sp, #0x70] + + stp x16, x17, [sp, #0x80] + + mrs x16, esr_el1 + mov x17, #0x56000000 + cmp x16, x17 + b.ne .Lnotsvc + + cmp x8, %1 + b.ge .Lenosys + + adr x16, %0 + ldr x16, [x16, x8, lsl #3] + + cmp x16, #0 + b.eq .Lenosys + + blr x16 + .Lreturn: + + ldp x16, x17, [sp, #0x80] + + ldp x14, x15, [sp, #0x70] + ldp x12, x13, [sp, #0x60] + ldp x10, x11, [sp, #0x50] + ldp x8, x9, [sp, #0x40] + + ldp x6, x7, [sp, #0x30] + ldp x4, x5, [sp, #0x20] + ldp x2, x3, [sp, #0x10] + ldp x30, x1, [sp, #0x00] + + add sp, sp, #0x90 + eret + + .Lenosys: + mov x0, x8 + mrs x1, elr_el1 + mov x2, x29 + bl enosys + b .Lreturn + + .Lnotsvc: + mov x3, x0 + mov x0, x16 + mrs x1, elr_el1 + mov x2, x29 + bl efault + b .Lreturn + )" : : "i" (state_.syscalls_), "i" (sizeof(state_.syscalls_) / sizeof(*state_.syscalls_)) +#elif defined(__x86_64__) +// https://wiki.osdev.org/SYSENTER +// https://www.felixcloutier.com/x86/syscall.html +// https://www.felixcloutier.com/x86/sysret.html +// https://stackoverflow.com/questions/2535989/what-are-the-calling-conventions-for-unix-linux-system-calls-and-user-space-f +// https://stackoverflow.com/questions/18024672/what-registers-are-preserved-through-a-linux-x86-64-function-call + R"( + mov %%rsp, (%c2-0x16) + mov $(%c2-0x16), %%rsp + + push %%rcx; push %%rdx + push %%rsi; push %%rdi + + push %%r8; push %%r9 + push %%r10; push %%r11 + + cmp %1, %%rax + jge abort + + mov %0, %%rcx + mov (%%rcx, %%rax, 8), %%rax + + cmp $0, %%rax + je abort + + mov %%r10, %%rcx + call *%%rax + + pop %%r11; pop %%r10 + pop %%r9; pop %%r8 + + pop %%rdi; pop %%rsi + pop %%rdx; pop %%rcx + + // XXX: protect interrupts!! + pop %%rsp + sysretq + )" : : "i" (state_.syscalls_), "i" (sizeof(state_.syscalls_) / sizeof(*state_.syscalls_)), "i" (state_.tables_) +#else +#error +#endif + ); +} +// }}} +// executable loader {{{ +extern "C" void main() { + state_.tables_[0][0] = 0x0; + state_.tables_[0][1] = 0x0; + Clear(state_.tables_[0]); + + state_.more_ = reinterpret_cast(&state_ + 1); + state_.stop_ = 0x555555b2d000; + + state_.syscalls_[__NR_mmap] = reinterpret_cast(&$mmap); + state_.syscalls_[__NR_madvise] = reinterpret_cast(&$madvise); + state_.syscalls_[__NR_munmap] = reinterpret_cast(&$munmap); + state_.syscalls_[__NR_mprotect] = reinterpret_cast(&$mprotect); + state_.syscalls_[__NR_brk] = reinterpret_cast(&$brk); + + state_.syscalls_[__NR_prlimit64] = reinterpret_cast(&$prlimit64); + state_.syscalls_[__NR_exit_group] = reinterpret_cast(&$exit_group); +#if defined(__x86_64__) + state_.syscalls_[__NR_arch_prctl] = reinterpret_cast(&$arch_prctl); +#endif + + state_.syscalls_[__NR_write] = reinterpret_cast(&$write); + + const auto null(orc_syscall($mmap(0, megapage_, 0, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0))); + orc_assert(null == 0); + + const uint64_t slide(megapage_); + + extern const unsigned char _binary_worker_start[]; + const auto &header(*reinterpret_cast(_binary_worker_start)); + const uint64_t entry(header.e_entry + slide); + const auto commands(reinterpret_cast(_binary_worker_start + header.e_phoff)); for (size_t i(0); i != header.e_phnum; ++i) - if (const auto &command(commands[i]); command.p_type == PT_LOAD) - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - printf("load %lx %p %lx\n", command.p_paddr, worker_data + command.p_offset, command.p_filesz); - //memcpy(memory + command.p_paddr, worker_data + command.p_offset, command.p_filesz); - (void) entry; + if (const auto &command(commands[i]); command.p_type == PT_LOAD) { + // https://forum.osdev.org/viewtopic.php?t=48082&p=328893 + // https://stackoverflow.com/questions/76795394/why-are-there-overlapping-and-misaligned-segments-in-a-simple-elf-binary + const auto offset(command.p_vaddr & uint64_t(command.p_align - 1)); + + const auto data(command.p_offset - offset + _binary_worker_start); + const auto size(Align(command.p_filesz + offset)); + + const auto start(command.p_vaddr - offset + slide); + const auto total(Align(command.p_memsz + offset)); + + orc_assert(total >= size); + + const auto writable((command.p_flags & PF_W) != 0); + const auto rest(RofF(writable, (command.p_flags & PF_X) != 0)); + + if (!writable) + Scan(start, size, [&](uintptr_t page, uintptr_t &entry) { + orc_assert(entry == 0); + const auto back(data + (page - start)); + + entry = rest | PofK(back) | 0x1; + }); + else { + Scan(start, size, [&](uintptr_t page, uintptr_t &entry) { + orc_assert(entry == 0); + const auto back(data + (page - start)); + + const auto copy(palloc()); + memcpy(copy, back, kilopage_); + entry = rest | PofK(copy) | 0x1; + }); + + memset(reinterpret_cast(start), 0, offset); + memset(reinterpret_cast(start + offset + command.p_filesz), 0, size - offset - command.p_filesz); + } + + Scan(start + size, total - size, [&](uintptr_t page, uintptr_t &entry) { + orc_assert(entry == 0); + entry = rest; + }); + } + + struct rlimit limit{}; + $prlimit64(0, RLIMIT_STACK, nullptr, &limit); + const size_t size(limit.rlim_cur); + + auto stack(reinterpret_cast(reinterpret_cast(orc_syscall($mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0))) + size)); + + // NOLINTBEGIN(clang-analyzer-core.NullDereference) + + // https://articles.manugarg.com/aboutelfauxiliaryvectors + // https://lwn.net/Articles/519085/ + + // random + // XXX: https://xkcd.com/221/ + // chosen by fair dice roll. + // guaranteed to be random. + *--stack = 0x4444444444444444; + *--stack = 0x4444444444444444; + const auto random(stack); + + // auxv + *--stack = AT_NULL; + *--stack = reinterpret_cast(random); + *--stack = AT_RANDOM; + + // envp + *--stack = 0; + + // argv + *--stack = 0; + + // argc + *--stack = 0; + + // NOLINTEND(clang-analyzer-core.NullDereference) + + const auto atexit(nullptr); + + +#if defined(__x86_64__) + WriteMSR(0xC0000081 /*star*/, (0x8*3<<16)|(0x8*1), 0); + // arch/x86/kernel/cpu/common.c // XXX: deal with $0x100 + WriteMSR(0xC0000082 /*lstar*/, &service); + WriteMSR(0xC0000084 /*sfmask*/, 0x0, 0x257ED5); #endif - abort(); + __asm__ volatile ( // jump to userland entrypoint +#if 0 +#elif defined(__aarch64__) + // sysdeps/aarch64/start.S + R"( + adr x0, vectors + msr vbar_el1, x0 + + mov sp, %4 + + mov x0, %2 + + msr sp_el0, %0 + msr elr_el1, %1 + msr spsr_el1, xzr + eret + + // XXX: this is wasting a ton of space + .balign 0x800 + vectors: + .space 0x200 + .space 0x200 + b %3 + .balign 0x80 + .space 0x380 + )" : : "r" (stack), "r" (entry), "i" (atexit), "i" (service), "r" (state_.tables_) : "x0" +#elif defined(__x86_64__) + // sysdeps/x86_64/start.S + + // https://old.reddit.com/r/osdev/comments/clump5/is_it_okay_to_use_null_descriptor_for_ds_es_and/ + // https://forum.osdev.org/viewtopic.php?f=1&t=22826 + // https://en.wikipedia.org/wiki/FLAGS_register + // XXX: https://news.ycombinator.com/item?id=12552834 + + R"( + mov %2, %%rdx + + push $0x08*4+0x3 // ss + push %0 // sp + // XXX: copy 0x100 from current flags? + push $0x202 // rflags + push $0x08*5+0x3 // cs + push %1 // ip + iretq + )" : : "r" (stack), "r" (entry), "i" (atexit) : "rax", "rcx", "rdx" +#else +#error +#endif + ); } +// }}} +// kernel bootloader {{{ +__attribute__((__naked__)) +extern "C" void _start() { + __asm__ volatile ( // setup memory configuration +#if 0 +#elif defined(__aarch64__) + R"( + // set stack pointer + adr x0, %0 + mov sp, x0 + + mrs x0, cpacr_el1 + // enable Advanced SIMD + orr x0, x0, #0x300000 + msr cpacr_el1, x0 + + // initialize page tables + adr x0, %0 + bl setup + + mov x0, #0x0 + // enable 4-level EL0 paging + orr x0, x0, #0x00000010 + // enable 4-level EL1 paging + orr x0, x0, #0x00100000 + // use 4kB EL1 page granule + orr x0, x0, #0x80000000 + msr tcr_el1, x0 + + // memory attribute index 0 + mov x0, #0xff + msr mair_el1, x0 + + // set page table pointers + adr x0, %0 + add x0, x0, 0x2000 + msr ttbr0_el1, x0 + add x0, x0, 0x1000 + msr ttbr1_el1, x0 + isb + + mrs x0, sctlr_el1 + // enable paging (MMU) + orr x0, x0, #0x0001 // M + // enable data/unified caches + orr x0, x0, #0x0004 // C + // enable EL1 stack alignment + orr x0, x0, #0x0008 // SA + // enable EL0 stack alignment + orr x0, x0, #0x0010 // SA0 + // enable instruction cache + orr x0, x0, #0x1000 // I + // enable EL0 ctr_ell1 access + orr x0, x0, #0x8000 // UCT + // enable EL0 dc cva access + orr x0, x0, #0x4000000 // UCI + msr sctlr_el1, x0 + isb + + mov x0, %1 + + // use high stack pointer + add sp, sp, x0 + + // jump to high kernel + adr x1, %2 + add x1, x1, x0 + br x1 + )" +#elif defined(__x86_64__) + // https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#x86Operandmodifiers + R"( + // start in 32-bit protected mode + .code32 + mov $_start-%c1, %%ebx + sub %%ecx, %%ebx + + mov %%cr4, %%eax + // enable 4-level paging + or $(1<<5), %%eax + // enable SSE instructions + or $(1<<9), %%eax + // enable SSE exceptions + or $(1<<10), %%eax + // XXX: consider CET / PKS + mov %%eax, %%cr4 + + // set stack pointer + mov %0-%c1, %%eax + sub %%ebx, %%eax + mov %%eax, %%esp + + // initialize page tables + mov %0-%c1, %%eax + sub %%ebx, %%eax + call __regcall3__setup + + // enable extended features + mov $0xC0000080, %%ecx // efer + rdmsr + // enable syscall/sysret + or $(1<<0), %%eax + // enable long mode + or $(1<<8), %%eax + // enable no-execute + or $(1<<11), %%eax + wrmsr + + // set page table pointer + mov %0-%c1, %%eax + sub %%ebx, %%eax + add $0x2000, %%eax + mov %%eax, %%cr3 + + mov %%cr0, %%eax + // enable numeric error + or $(1<<5), %%eax + // enable write protect + or $(1<<16), %%eax + // disable no-writethrough + and $~(1<<29), %%eax + // disable cache disable + and $~(1<<30), %%eax + // enable paging + or $(1<<31), %%eax + mov %%eax, %%cr0 + + // low address descriptor + mov %0-%c1, %%eax + sub %%ebx, %%eax + add $0x3000, %%eax + push %%eax + pushw $0xfff + lgdt (%%esp) + add $0x6, %%esp + + // jump to 64-bit segment + jmp $0x08*1, $code64-%c1 + code64: + .code64 + + // high address descriptor + mov %0, %%rax + add $0x3000, %%rax + push %%rax + pushw $0xfff + lgdt (%%rsp) + add $0xa, %%rsp + + // clear segment registers + //mov $0x0, %%ax + //mov %%ax, %%ds + //mov %%ax, %%es + //mov %%ax, %%fs + //mov %%ax, %%gs + + // set stack segment + //mov $0x08*2, %%ss + + // use high stack pointer + mov %%rsp, %%rax + addq %1, %%rax + mov %%rax, %%rsp + + // jump to high kernel + mov %2, %%rax + sub %%rbx, %%rax + jmp *%%rax + )" +#else +#error +#endif + : : "i" (state_.tables_), "i" (-boundary_ - gigapage_), "i" (main)); +} +// }}} +// x86_64 entrypoint {{{ +#if defined(__x86_64__) +__asm__ (R"( // x86 32-bit qemu entrypoint +.pushsection .note.Xen, "a" +.balign 4 +.long 2f - 1f +.long 4484f - 3f +.long 18 // XEN_ELFNOTE_PHYS32_ENTRY +1:.asciz "Xen" +2:.balign 4 +3:.quad _start - 0xffffffffc0000000 +4484:.balign 4 +.popsection +)"); +#endif +// }}} } // NOLINTEND(performance-no-int-to-ptr) diff --git a/srv-kernel/source/paging.cpp b/srv-kernel/source/paging.cpp new file mode 100644 index 000000000..1e4d60b72 --- /dev/null +++ b/srv-kernel/source/paging.cpp @@ -0,0 +1,91 @@ +/* Orchid - WebRTC P2P VPN Market (on Ethereum) + * Copyright (C) 2017-2020 The Orchid Authors +*/ + +/* GNU Affero General Public License, Version 3 {{{ */ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . +**/ +/* }}} */ + + +// this code runs in lower memory +// on x86_64, this code is 32-bit +// little-endian is our friend ;P + +#include "paging.hpp" + +__attribute__((__format__ (__printf__, 1, 2))) +extern "C" size_t printf(const char *format, ...); + +// NOLINTBEGIN(performance-no-int-to-ptr) +namespace orc { + +#if defined(__i386__) +__attribute__((__regcall__)) +#endif +extern "C" void setup(Table tables[]) { +#if 0 +#elif defined(__aarch64__) +// arm separates low/high tables +static constexpr size_t table_(4); +#elif defined(__i386__) +// x86 has a unified low/high table +static constexpr size_t table_(3); +#else +#error +#endif + + tables[0][0] = BofP(0x0); + tables[0][1] = BofP(gigapage_); + tables[1][511] = BofP(boundary_); + + tables[2][0] = TofP(PofL(tables[0])); + tables[table_-1][511] = TofP(PofL(tables[1])); + +#if defined(__i386__) + // tables[3] is the global descriptor table + // https://en.wikipedia.org/wiki/Segment_descriptor + // while limit is often all-f's, it is ignored + // #f## = (G DB _ A) f (P DPL < S) (Type) + // 0x--#-##----------; + tables[3][1] = 0x00Af9B000000ffff; //CS0 + tables[3][2] = 0x00Cf93000000ffff; //SS0 + tables[3][3] = 0x00CfFB000000ffff; //323 + tables[3][4] = 0x00CfF3000000ffff; //SS3 + tables[3][5] = 0x00AfFB000000ffff; //CS3 + // arch/x86/boot/compressed/head_64.S + // arch/x86/realmode/rm/trampoline_64.S + // arch/x86/include/asm/segment.h +#if 0 + .quad 0x0080890000000000 /* TS descriptor */ + .quad 0x0000000000000000 /* TS continued */ + + 8 00 0 0 8b 079000 4087 TSS + 9 00 0 0 00 00ffff fe00 ^ + + A 0 LDT + B 0 ^ + + C 0 TLS_MIN + D 0 + E 0 TLS_MAX + + F 00 4 0 f5 000000 0002 CPUNODE +#endif +#endif +} + +} +// NOLINTEND(performance-no-int-to-ptr) diff --git a/srv-kernel/source/paging.hpp b/srv-kernel/source/paging.hpp new file mode 100644 index 000000000..64a90930b --- /dev/null +++ b/srv-kernel/source/paging.hpp @@ -0,0 +1,112 @@ +/* Orchid - WebRTC P2P VPN Market (on Ethereum) + * Copyright (C) 2017-2020 The Orchid Authors +*/ + +/* GNU Affero General Public License, Version 3 {{{ */ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . +**/ +/* }}} */ + + +#ifndef ORCHID_PAGING_HPP +#define ORCHID_PAGING_HPP + +#include +#include + +// NOLINTBEGIN(performance-no-int-to-ptr) +namespace orc { + +static constexpr size_t addrbits_(48); +static constexpr size_t pagebits_(12); + +typedef uint64_t Entry; +static constexpr size_t wordbits_(3); +static_assert(1 << wordbits_ == sizeof(Entry)); + +static constexpr size_t indxbits_(pagebits_ - wordbits_); +typedef Entry Table[1 << indxbits_]; + +static constexpr size_t kilopage_(1 << (0 * indxbits_ + pagebits_)); +static constexpr size_t megapage_(1 << (1 * indxbits_ + pagebits_)); +static constexpr size_t gigapage_(1 << (2 * indxbits_ + pagebits_)); + +#if defined(__aarch64__) +static constexpr size_t boundary_(gigapage_); +#else +static constexpr size_t boundary_(0); +#endif + +inline uintptr_t PofL(const void *address) { return reinterpret_cast(address); } + +inline uintptr_t PofK(const void *address) { return reinterpret_cast(address) + gigapage_ + boundary_; } + +template +inline Type_ *KofP(uintptr_t physical) { return reinterpret_cast(physical - boundary_ - gigapage_); } + +#define MASK(bits) ((Entry(1) << bits) - 1) + +static constexpr Entry nextmask_(MASK(addrbits_) & ~MASK(pagebits_)); +static constexpr Entry restmask_(~(nextmask_ | 0x1)); +static constexpr uintptr_t holdpage_(0); + +// T: Table +// B: Block + +// E: Entry +// R: Rest +// F: Flags + +// P: Physical +// K: Kernel +// L: Low + +static constexpr bool isE(Entry entry) { return (entry & 0x1) != 0; } +static constexpr uintptr_t PofE(uintptr_t entry) { return entry & nextmask_; } + +#if 0 +#elif defined(__aarch64__) +// https://armv8-ref.codingbelief.com/en/chapter_d4/d43_vmsav8-64_translation_table_format_descriptors.html +// https://medium.com/@om.nara/arm64-normal-memory-attributes-6086012fa0e3 + +static constexpr Entry TofP(uintptr_t physical) { return physical | uintptr_t(0x003); } +static constexpr Entry BofP(uintptr_t physical) { return physical | uintptr_t(0x601); } + +static constexpr Entry RofF(bool writable, bool executable) { + Entry value(1ull << 53 | 0x642); + if (!writable) value |= 1ull << 7; + if (!executable) value |= 1ull << 54; + return value; +} +#elif defined(__x86_64__) || defined(__i386__) +// https://wiki.osdev.org/Paging + +static constexpr Entry TofP(uintptr_t physical) { return physical | uintptr_t(0x07); } +static constexpr Entry BofP(uintptr_t physical) { return physical | uintptr_t(0x83); } + +static constexpr Entry RofF(bool writable, bool executable) { + Entry value(1ull << 2); + if (writable) value |= 1ull << 1; + if (!executable) value |= 1ull << 63; + return value; +} +#else +#error +#endif + +} +// NOLINTEND(performance-no-int-to-ptr) + +#endif//ORCHID_PAGING_HPP diff --git a/srv-kernel/source/scope.hpp b/srv-kernel/source/scope.hpp deleted file mode 120000 index 86f83e0db..000000000 --- a/srv-kernel/source/scope.hpp +++ /dev/null @@ -1 +0,0 @@ -../../p2p/source/scope.hpp \ No newline at end of file diff --git a/srv-kernel/source/worker.cpp b/srv-kernel/source/worker.cpp deleted file mode 100644 index 16f6669a7..000000000 --- a/srv-kernel/source/worker.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/* Orchid - WebRTC P2P VPN Market (on Ethereum) - * Copyright (C) 2017-2020 The Orchid Authors -*/ - -/* GNU Affero General Public License, Version 3 {{{ */ -/* - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . -**/ -/* }}} */ - - -#include "worker.hpp" - -__attribute__((__aligned__(16*1024))) -#include "worker.xxd" diff --git a/srv-kernel/source/worker.hpp b/srv-kernel/source/worker.hpp deleted file mode 100644 index 2d204b840..000000000 --- a/srv-kernel/source/worker.hpp +++ /dev/null @@ -1,29 +0,0 @@ -/* Orchid - WebRTC P2P VPN Market (on Ethereum) - * Copyright (C) 2017-2020 The Orchid Authors -*/ - -/* GNU Affero General Public License, Version 3 {{{ */ -/* - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . -**/ -/* }}} */ - - -#ifndef ORCHID_WORKER_HPP -#define ORCHID_WORKER_HPP - -extern const unsigned char worker_data[]; -extern const unsigned int worker_size; - -#endif//ORCHID_WORKER_HPP diff --git a/srv-worker/extra/config.h b/srv-worker/extra/config.h deleted file mode 100644 index e69de29bb..000000000 diff --git a/srv-worker/makefile b/srv-worker/makefile index 8c01a362c..31d639dfa 100644 --- a/srv-worker/makefile +++ b/srv-worker/makefile @@ -34,7 +34,7 @@ debug: $(binary) .PHONY: test test: $(binary) - strace -ff $< 2>&1 | grep -Ev '(] |^)(<|(qmap|mprotect|munmap)\()' + strace -ff -e 'trace=!write' $< 2>&1 #| sed -e '0,/^execve(/d;/^orc_trace(/d;/^+++ /d;s/\(mmap([^,]*\),[^,]*/\1/;s/mmap(0x\([0-9a-f]*\),\(.*\)= 0x\1/mmap(0X,\2= 0X/;s/\(mmap(NULL,.*\)= 0x[0-9a-f]*/\1= 0X/;s///;s/^\(madvise\|mprotect\)(0x[0-9a-z]*, [0-9]*, /\1(/;s/^munmap(0x[0-9a-f]*, [0-9]*) */munmap() /' | sort | uniq -c | sort -nr source += $(wildcard $(pwd)/source/*.cpp) source += $(pwd)/libc.cpp @@ -43,11 +43,9 @@ cflags += -I$(pwd)/source lflags += -nodefaultlibs lflags += -static-pie +cflags += -ftls-model=local-exec lflags += -lm lflags := $(filter-out -pthread,$(lflags)) -qflags += -femulated-tls -cflags += -fno-plt -lflags += -fno-plt lflags += -lstdc++ lflags += -lgcc @@ -642,7 +640,6 @@ $(output)/%/$(pwd)/jemalloc/$(jemalloc): $(output)/%/$(pwd)/jemalloc/Makefile $(MAKE) -C $(dir $<) $(jemalloc) linked += $(pwd)/jemalloc/$(jemalloc) - ifeq ($(machine),x86_64) lib := $(output)/sysroot/usr/lib/$(host/$(machine)) else diff --git a/vpn-shared/source/client.hpp b/vpn-shared/source/client.hpp index 0aea284f2..b5291b1f7 100644 --- a/vpn-shared/source/client.hpp +++ b/vpn-shared/source/client.hpp @@ -39,9 +39,6 @@ #include "signed.hpp" #include "ticket.hpp" -// XXX: move this somewhere and maybe find a library -namespace gsl { template using owner = Type_; } - namespace orc { struct Currency;