diff options
Diffstat (limited to 'init')
-rw-r--r-- | init/Kconfig | 248 | ||||
-rw-r--r-- | init/Makefile | 5 | ||||
-rw-r--r-- | init/do_mounts.c | 93 | ||||
-rw-r--r-- | init/do_mounts_initrd.c | 11 | ||||
-rw-r--r-- | init/init_task.c | 4 | ||||
-rw-r--r-- | init/initramfs.c | 8 | ||||
-rw-r--r-- | init/main.c | 317 |
7 files changed, 536 insertions, 150 deletions
diff --git a/init/Kconfig b/init/Kconfig index bd7d650d4a99..452bc1835cd4 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -30,6 +30,12 @@ config CC_CAN_LINK config CC_HAS_ASM_GOTO def_bool $(success,$(srctree)/scripts/gcc-goto.sh $(CC)) +config TOOLS_SUPPORT_RELR + def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) + +config CC_HAS_ASM_INLINE + def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null) + config CC_HAS_WARN_MAYBE_UNINITIALIZED def_bool $(cc-option,-Wmaybe-uninitialized) help @@ -53,7 +59,7 @@ config CONSTRUCTORS config IRQ_WORK bool -config BUILDTIME_EXTABLE_SORT +config BUILDTIME_TABLE_SORT bool config THREAD_INFO_IN_TASK @@ -99,29 +105,9 @@ config COMPILE_TEST here. If you are a user/distributor, say N here to exclude useless drivers to be distributed. -config HEADER_TEST - bool "Compile test headers that should be standalone compilable" - help - Compile test headers listed in header-test-y target to ensure they are - self-contained, i.e. compilable as standalone units. - - If you are a developer or tester and want to ensure the requested - headers are self-contained, say Y here. Otherwise, choose N. - -config KERNEL_HEADER_TEST - bool "Compile test kernel headers" - depends on HEADER_TEST - help - Headers in include/ are used to build external moduls. - Compile test them to ensure they are self-contained, i.e. - compilable as standalone units. - - If you are a developer or tester and want to ensure the headers - in include/ are self-contained, say Y here. Otherwise, choose N. - config UAPI_HEADER_TEST bool "Compile test UAPI headers" - depends on HEADER_TEST && HEADERS_INSTALL && CC_CAN_LINK + depends on HEADERS_INSTALL && CC_CAN_LINK help Compile test headers exported to user-space to ensure they are self-contained, i.e. compilable as standalone units. @@ -161,13 +147,13 @@ config LOCALVERSION_AUTO which is done within the script "scripts/setlocalversion".) config BUILD_SALT - string "Build ID Salt" - default "" - help - The build ID is used to link binaries and their debug info. Setting - this option will use the value in the calculation of the build id. - This is mostly useful for distributions which want to ensure the - build is unique between builds. It's safe to leave the default. + string "Build ID Salt" + default "" + help + The build ID is used to link binaries and their debug info. Setting + this option will use the value in the calculation of the build id. + This is mostly useful for distributions which want to ensure the + build is unique between builds. It's safe to leave the default. config HAVE_KERNEL_GZIP bool @@ -780,6 +766,10 @@ config ARCH_SUPPORTS_NUMA_BALANCING config ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH bool +config CC_HAS_INT128 + def_bool y + depends on !$(cc-option,-D__SIZEOF_INT128__=0) + # # For architectures that know their GCC __int128 support is sound # @@ -829,7 +819,7 @@ menuconfig CGROUPS if CGROUPS config PAGE_COUNTER - bool + bool config MEMCG bool "Memory controller" @@ -928,6 +918,28 @@ config RT_GROUP_SCHED endif #CGROUP_SCHED +config UCLAMP_TASK_GROUP + bool "Utilization clamping per group of tasks" + depends on CGROUP_SCHED + depends on UCLAMP_TASK + default n + help + This feature enables the scheduler to track the clamped utilization + of each CPU based on RUNNABLE tasks currently scheduled on that CPU. + + When this option is enabled, the user can specify a min and max + CPU bandwidth which is allowed for each single task in a group. + The max bandwidth allows to clamp the maximum frequency a task + can use, while the min bandwidth allows to define a minimum + frequency a task will always use. + + When task group based utilization clamping is enabled, an eventually + specified task-specific clamp value is constrained by the cgroup + specified clamp value. Both minimum and maximum task clamping cannot + be bigger than the corresponding clamping defined at task group level. + + If in doubt, say N. + config CGROUP_PIDS bool "PIDs controller" help @@ -1069,6 +1081,14 @@ config UTS_NS In this namespace tasks see different info provided with the uname() system call +config TIME_NS + bool "TIME namespace" + depends on GENERIC_VDSO_TIME_NS + default y + help + In this namespace boottime and monotonic clocks can be set. + The time will keep going with the same pace. + config IPC_NS bool "IPC namespace" depends on (SYSVIPC || POSIX_MQUEUE) @@ -1204,25 +1224,44 @@ source "usr/Kconfig" endif +config BOOT_CONFIG + bool "Boot config support" + depends on BLK_DEV_INITRD + default y + help + Extra boot config allows system admin to pass a config file as + complemental extension of kernel cmdline when booting. + The boot config file must be attached at the end of initramfs + with checksum and size. + See <file:Documentation/admin-guide/bootconfig.rst> for details. + + If unsure, say Y. + choice prompt "Compiler optimization level" default CC_OPTIMIZE_FOR_PERFORMANCE config CC_OPTIMIZE_FOR_PERFORMANCE - bool "Optimize for performance" + bool "Optimize for performance (-O2)" help This is the default optimization level for the kernel, building with the "-O2" compiler flag for best performance and most helpful compile-time warnings. -config CC_OPTIMIZE_FOR_SIZE - bool "Optimize for size" +config CC_OPTIMIZE_FOR_PERFORMANCE_O3 + bool "Optimize more for performance (-O3)" + depends on ARC imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED # avoid false positives help - Enabling this option will pass "-Os" instead of "-O2" to - your compiler resulting in a smaller kernel. + Choosing this option will pass "-O3" to your compiler to optimize + the kernel yet more for performance. - If unsure, say N. +config CC_OPTIMIZE_FOR_SIZE + bool "Optimize for size (-Os)" + imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED # avoid false positives + help + Choosing this option will pass "-Os" to your compiler resulting + in a smaller kernel. endchoice @@ -1294,9 +1333,9 @@ menuconfig EXPERT select DEBUG_KERNEL help This option allows certain base kernel options and settings - to be disabled or tweaked. This is for specialized - environments which can tolerate a "non-standard" kernel. - Only use this if you really know what you are doing. + to be disabled or tweaked. This is for specialized + environments which can tolerate a "non-standard" kernel. + Only use this if you really know what you are doing. config UID16 bool "Enable 16-bit UID system calls" if EXPERT @@ -1339,23 +1378,6 @@ config SYSFS_SYSCALL If unsure say Y here. -config SYSCTL_SYSCALL - bool "Sysctl syscall support" if EXPERT - depends on PROC_SYSCTL - default n - select SYSCTL - ---help--- - sys_sysctl uses binary paths that have been found challenging - to properly maintain and use. The interface in /proc/sys - using paths with ascii names is now the primary path to this - information. - - Almost nothing using the binary sysctl interface so if you are - trying to save some space it is probably safe to disable this, - making your kernel marginally smaller. - - If unsure say N here. - config FHANDLE bool "open by fhandle syscalls" if EXPERT select EXPORTFS @@ -1406,11 +1428,11 @@ config BUG bool "BUG() support" if EXPERT default y help - Disabling this option eliminates support for BUG and WARN, reducing - the size of your kernel image and potentially quietly ignoring - numerous fatal conditions. You should only consider disabling this - option for embedded systems with no facilities for reporting errors. - Just say Y. + Disabling this option eliminates support for BUG and WARN, reducing + the size of your kernel image and potentially quietly ignoring + numerous fatal conditions. You should only consider disabling this + option for embedded systems with no facilities for reporting errors. + Just say Y. config ELF_CORE depends on COREDUMP @@ -1426,8 +1448,8 @@ config PCSPKR_PLATFORM select I8253_LOCK default y help - This option allows to disable the internal PC-Speaker - support, saving some memory. + This option allows to disable the internal PC-Speaker + support, saving some memory. config BASE_FULL default y @@ -1515,6 +1537,7 @@ config AIO config IO_URING bool "Enable IO uring support" if EXPERT select ANON_INODES + select IO_WQ default y help This option enables support for the io_uring interface, enabling @@ -1544,29 +1567,29 @@ config MEMBARRIER If unsure, say Y. config KALLSYMS - bool "Load all symbols for debugging/ksymoops" if EXPERT - default y - help - Say Y here to let the kernel print out symbolic crash information and - symbolic stack backtraces. This increases the size of the kernel - somewhat, as all symbols have to be loaded into the kernel image. + bool "Load all symbols for debugging/ksymoops" if EXPERT + default y + help + Say Y here to let the kernel print out symbolic crash information and + symbolic stack backtraces. This increases the size of the kernel + somewhat, as all symbols have to be loaded into the kernel image. config KALLSYMS_ALL bool "Include all symbols in kallsyms" depends on DEBUG_KERNEL && KALLSYMS help - Normally kallsyms only contains the symbols of functions for nicer - OOPS messages and backtraces (i.e., symbols from the text and inittext - sections). This is sufficient for most cases. And only in very rare - cases (e.g., when a debugger is used) all symbols are required (e.g., - names of variables from the data sections, etc). + Normally kallsyms only contains the symbols of functions for nicer + OOPS messages and backtraces (i.e., symbols from the text and inittext + sections). This is sufficient for most cases. And only in very rare + cases (e.g., when a debugger is used) all symbols are required (e.g., + names of variables from the data sections, etc). - This option makes sure that all symbols are loaded into the kernel - image (i.e., symbols from all sections) in cost of increased kernel - size (depending on the kernel configuration, it may be 300KiB or - something like this). + This option makes sure that all symbols are loaded into the kernel + image (i.e., symbols from all sections) in cost of increased kernel + size (depending on the kernel configuration, it may be 300KiB or + something like this). - Say N unless you really need all symbols. + Say N unless you really need all symbols. config KALLSYMS_ABSOLUTE_PERCPU bool @@ -1603,6 +1626,9 @@ config BPF_SYSCALL Enable the bpf() system call that allows to manipulate eBPF programs and maps via file descriptors. +config ARCH_WANT_DEFAULT_BPF_JIT + bool + config BPF_JIT_ALWAYS_ON bool "Permanently enable BPF JIT and remove BPF interpreter" depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT @@ -1610,6 +1636,10 @@ config BPF_JIT_ALWAYS_ON Enables BPF JIT and removes BPF interpreter to avoid speculative execution of BPF instructions by the interpreter +config BPF_JIT_DEFAULT_ON + def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON + depends on HAVE_EBPF_JIT && BPF_JIT + config USERFAULTFD bool "Enable userfaultfd() system call" depends on MMU @@ -1709,12 +1739,12 @@ config DEBUG_PERF_USE_VMALLOC depends on PERF_EVENTS && DEBUG_KERNEL && !PPC select PERF_USE_VMALLOC help - Use vmalloc memory to back perf mmap() buffers. + Use vmalloc memory to back perf mmap() buffers. - Mostly useful for debugging the vmalloc code on platforms - that don't require it. + Mostly useful for debugging the vmalloc code on platforms + that don't require it. - Say N if unsure. + Say N if unsure. endmenu @@ -1930,6 +1960,10 @@ config BASE_SMALL default 0 if BASE_FULL default 1 if !BASE_FULL +config MODULE_SIG_FORMAT + def_bool n + select SYSTEM_DATA_VERIFICATION + menuconfig MODULES bool "Enable loadable module support" option modules @@ -1989,6 +2023,14 @@ config MODVERSIONS make them incompatible with the kernel you are running. If unsure, say N. +config ASM_MODVERSIONS + bool + default HAVE_ASM_MODVERSIONS && MODVERSIONS + help + This enables module versioning for exported symbols also from + assembly. This can be enabled only when the target architecture + supports it. + config MODULE_REL_CRCS bool depends on MODVERSIONS @@ -2006,8 +2048,7 @@ config MODULE_SRCVERSION_ALL config MODULE_SIG bool "Module signature verification" - depends on MODULES - select SYSTEM_DATA_VERIFICATION + select MODULE_SIG_FORMAT help Check modules for valid signatures upon load: the signature is simply appended to the module. For more information see @@ -2017,6 +2058,11 @@ config MODULE_SIG kernel build dependency so that the signing tool can use its crypto library. + You should enable this option if you wish to use either + CONFIG_SECURITY_LOCKDOWN_LSM or lockdown functionality imposed via + another LSM - otherwise unsigned modules will be loadable regardless + of the lockdown policy. + !!!WARNING!!! If you enable this option, you MUST make sure that the module DOES NOT get stripped after being signed. This includes the debuginfo strip done by some packagers (such as rpmbuild) and @@ -2083,7 +2129,6 @@ config MODULE_SIG_HASH config MODULE_COMPRESS bool "Compress modules on installation" - depends on MODULES help Compresses kernel modules when 'make modules_install' is run; gzip or @@ -2119,9 +2164,38 @@ config MODULE_COMPRESS_XZ endchoice +config MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS + bool "Allow loading of modules with missing namespace imports" + help + Symbols exported with EXPORT_SYMBOL_NS*() are considered exported in + a namespace. A module that makes use of a symbol exported with such a + namespace is required to import the namespace via MODULE_IMPORT_NS(). + There is no technical reason to enforce correct namespace imports, + but it creates consistency between symbols defining namespaces and + users importing namespaces they make use of. This option relaxes this + requirement and lifts the enforcement when loading a module. + + If unsure, say N. + +config UNUSED_SYMBOLS + bool "Enable unused/obsolete exported symbols" + default y if X86 + help + Unused but exported symbols make the kernel needlessly bigger. For + that reason most of these unused exports will soon be removed. This + option is provided temporarily to provide a transition period in case + some external kernel module needs one of these symbols anyway. If you + encounter such a case in your module, consider if you are actually + using the right API. (rationale: since nobody in the kernel is using + this in a module, there is a pretty good chance it's actually the + wrong interface to use). If you really need the symbol, please send a + mail to the linux kernel mailing list mentioning the symbol and why + you really need it, and what the merge plan to the mainline kernel for + your module is. + config TRIM_UNUSED_KSYMS bool "Trim unused exported kernel symbols" - depends on MODULES && !UNUSED_SYMBOLS + depends on !UNUSED_SYMBOLS help The kernel and some modules make many symbols available for other modules to use via EXPORT_SYMBOL() and variants. Depending diff --git a/init/Makefile b/init/Makefile index a3e5ce2bcf08..6246a06364d0 100644 --- a/init/Makefile +++ b/init/Makefile @@ -33,5 +33,6 @@ $(obj)/version.o: include/generated/compile.h silent_chk_compile.h = : include/generated/compile.h: FORCE @$($(quiet)chk_compile.h) - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)" + $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" \ + "$(CONFIG_PREEMPT_RT)" "$(CC) $(KBUILD_CFLAGS)" diff --git a/init/do_mounts.c b/init/do_mounts.c index 53cb37b66227..0ae9cc22f2ae 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -212,6 +212,7 @@ static int match_dev_by_label(struct device *dev, const void *data) * a colon. * 9) PARTLABEL=<name> with name being the GPT partition label. * MSDOS partitions do not support labels! + * 10) /dev/cifs represents Root_CIFS (0xfe) * * If name doesn't have fall into the categories above, we return (0,0). * block_class is used to check if something is a disk name. If the disk @@ -268,6 +269,9 @@ dev_t name_to_dev_t(const char *name) res = Root_NFS; if (strcmp(name, "nfs") == 0) goto done; + res = Root_CIFS; + if (strcmp(name, "cifs") == 0) + goto done; res = Root_RAM0; if (strcmp(name, "ram") == 0) goto done; @@ -383,12 +387,27 @@ static void __init get_fs_names(char *page) *s = '\0'; } -static int __init do_mount_root(char *name, char *fs, int flags, void *data) +static int __init do_mount_root(const char *name, const char *fs, + const int flags, const void *data) { struct super_block *s; - int err = ksys_mount(name, "/root", fs, flags, data); - if (err) - return err; + struct page *p = NULL; + char *data_page = NULL; + int ret; + + if (data) { + /* do_mount() requires a full page as fifth argument */ + p = alloc_page(GFP_KERNEL); + if (!p) + return -ENOMEM; + data_page = page_address(p); + /* zero-pad. do_mount() will make sure it's terminated */ + strncpy(data_page, data, PAGE_SIZE); + } + + ret = do_mount(name, "/root", fs, flags, data_page); + if (ret) + goto out; ksys_chdir("/root"); s = current->fs->pwd.dentry->d_sb; @@ -398,7 +417,11 @@ static int __init do_mount_root(char *name, char *fs, int flags, void *data) s->s_type->name, sb_rdonly(s) ? " readonly" : "", MAJOR(ROOT_DEV), MINOR(ROOT_DEV)); - return 0; + +out: + if (p) + put_page(p); + return ret; } void __init mount_block_root(char *name, int flags) @@ -501,6 +524,42 @@ static int __init mount_nfs_root(void) } #endif +#ifdef CONFIG_CIFS_ROOT + +extern int cifs_root_data(char **dev, char **opts); + +#define CIFSROOT_TIMEOUT_MIN 5 +#define CIFSROOT_TIMEOUT_MAX 30 +#define CIFSROOT_RETRY_MAX 5 + +static int __init mount_cifs_root(void) +{ + char *root_dev, *root_data; + unsigned int timeout; + int try, err; + + err = cifs_root_data(&root_dev, &root_data); + if (err != 0) + return 0; + + timeout = CIFSROOT_TIMEOUT_MIN; + for (try = 1; ; try++) { + err = do_mount_root(root_dev, "cifs", root_mountflags, + root_data); + if (err == 0) + return 1; + if (try > CIFSROOT_RETRY_MAX) + break; + + ssleep(timeout); + timeout <<= 1; + if (timeout > CIFSROOT_TIMEOUT_MAX) + timeout = CIFSROOT_TIMEOUT_MAX; + } + return 0; +} +#endif + #if defined(CONFIG_BLK_DEV_RAM) || defined(CONFIG_BLK_DEV_FD) void __init change_floppy(char *fmt, ...) { @@ -542,6 +601,15 @@ void __init mount_root(void) ROOT_DEV = Root_FD0; } #endif +#ifdef CONFIG_CIFS_ROOT + if (ROOT_DEV == Root_CIFS) { + if (mount_cifs_root()) + return; + + printk(KERN_ERR "VFS: Unable to mount root fs via SMB, trying floppy.\n"); + ROOT_DEV = Root_FD0; + } +#endif #ifdef CONFIG_BLK_DEV_FD if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) { /* rd_doload is 2 for a dual initrd/ramload setup */ @@ -621,26 +689,23 @@ void __init prepare_namespace(void) mount_root(); out: - devtmpfs_mount("dev"); - ksys_mount(".", "/", NULL, MS_MOVE, NULL); + devtmpfs_mount(); + do_mount(".", "/", NULL, MS_MOVE, NULL); ksys_chroot("."); } static bool is_tmpfs; -static struct dentry *rootfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int rootfs_init_fs_context(struct fs_context *fc) { - void *fill = ramfs_fill_super; - if (IS_ENABLED(CONFIG_TMPFS) && is_tmpfs) - fill = shmem_fill_super; + return shmem_init_fs_context(fc); - return mount_nodev(fs_type, flags, data, fill); + return ramfs_init_fs_context(fc); } struct file_system_type rootfs_fs_type = { .name = "rootfs", - .mount = rootfs_mount, + .init_fs_context = rootfs_init_fs_context, .kill_sb = kill_litter_super, }; diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index a9c6cc56f505..dab8b1151b56 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -48,13 +48,10 @@ early_param("initrd", early_initrd); static int init_linuxrc(struct subprocess_info *info, struct cred *new) { ksys_unshare(CLONE_FS | CLONE_FILES); - /* stdin/stdout/stderr for /linuxrc */ - ksys_open("/dev/console", O_RDWR, 0); - ksys_dup(0); - ksys_dup(0); + console_on_rootfs(); /* move initrd over / and chdir/chroot in initrd root */ ksys_chdir("/root"); - ksys_mount(".", "/", NULL, MS_MOVE, NULL); + do_mount(".", "/", NULL, MS_MOVE, NULL); ksys_chroot("."); ksys_setsid(); return 0; @@ -89,7 +86,7 @@ static void __init handle_initrd(void) current->flags &= ~PF_FREEZER_SKIP; /* move initrd to rootfs' /old */ - ksys_mount("..", ".", NULL, MS_MOVE, NULL); + do_mount("..", ".", NULL, MS_MOVE, NULL); /* switch root and cwd back to / of rootfs */ ksys_chroot(".."); @@ -103,7 +100,7 @@ static void __init handle_initrd(void) mount_root(); printk(KERN_NOTICE "Trying to move old root to /initrd ... "); - error = ksys_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL); + error = do_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL); if (!error) printk("okay\n"); else { diff --git a/init/init_task.c b/init/init_task.c index 7ab773b9b3cd..9e5cbe5eab7b 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -30,8 +30,6 @@ static struct signal_struct init_signals = { .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers), .cputimer = { .cputime_atomic = INIT_CPUTIME_ATOMIC, - .running = false, - .checking_timer = false, }, #endif INIT_CPU_TIMERS(init_signals) @@ -174,7 +172,7 @@ struct task_struct init_task #ifdef CONFIG_FUNCTION_GRAPH_TRACER .ret_stack = NULL, #endif -#if defined(CONFIG_TRACING) && defined(CONFIG_PREEMPT) +#if defined(CONFIG_TRACING) && defined(CONFIG_PREEMPTION) .trace_recursion = 0, #endif #ifdef CONFIG_LIVEPATCH diff --git a/init/initramfs.c b/init/initramfs.c index c47dad0884f7..8ec1be4d7d51 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -10,6 +10,7 @@ #include <linux/syscalls.h> #include <linux/utime.h> #include <linux/file.h> +#include <linux/memblock.h> static ssize_t __init xwrite(int fd, const char *p, size_t count) { @@ -529,6 +530,13 @@ extern unsigned long __initramfs_size; void __weak free_initrd_mem(unsigned long start, unsigned long end) { +#ifdef CONFIG_ARCH_KEEP_MEMBLOCK + unsigned long aligned_start = ALIGN_DOWN(start, PAGE_SIZE); + unsigned long aligned_end = ALIGN(end, PAGE_SIZE); + + memblock_free(__pa(aligned_start), aligned_end - aligned_start); +#endif + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, "initrd"); } diff --git a/init/main.c b/init/main.c index 96f8d5af52d6..f95b014a5479 100644 --- a/init/main.c +++ b/init/main.c @@ -28,6 +28,7 @@ #include <linux/initrd.h> #include <linux/memblock.h> #include <linux/acpi.h> +#include <linux/bootconfig.h> #include <linux/console.h> #include <linux/nmi.h> #include <linux/percpu.h> @@ -63,7 +64,7 @@ #include <linux/lockdep.h> #include <linux/kmemleak.h> #include <linux/pid_namespace.h> -#include <linux/device.h> +#include <linux/device/driver.h> #include <linux/kthread.h> #include <linux/sched.h> #include <linux/sched/init.h> @@ -136,8 +137,19 @@ char __initdata boot_command_line[COMMAND_LINE_SIZE]; char *saved_command_line; /* Command line for parameter parsing */ static char *static_command_line; -/* Command line for per-initcall parameter parsing */ -static char *initcall_command_line; +/* Untouched extra command line */ +static char *extra_command_line; +/* Extra init arguments */ +static char *extra_init_args; + +#ifdef CONFIG_BOOT_CONFIG +/* Is bootconfig on command line? */ +static bool bootconfig_found; +static bool initargs_found; +#else +# define bootconfig_found false +# define initargs_found false +#endif static char *execute_command; static char *ramdisk_execute_command; @@ -245,10 +257,172 @@ static int __init loglevel(char *str) early_param("loglevel", loglevel); -/* Change NUL term back to "=", to make "param" the whole string. */ -static int __init repair_env_string(char *param, char *val, +#ifdef CONFIG_BOOT_CONFIG + +char xbc_namebuf[XBC_KEYLEN_MAX] __initdata; + +#define rest(dst, end) ((end) > (dst) ? (end) - (dst) : 0) + +static int __init xbc_snprint_cmdline(char *buf, size_t size, + struct xbc_node *root) +{ + struct xbc_node *knode, *vnode; + char *end = buf + size; + char c = '\"'; + const char *val; + int ret; + + xbc_node_for_each_key_value(root, knode, val) { + ret = xbc_node_compose_key_after(root, knode, + xbc_namebuf, XBC_KEYLEN_MAX); + if (ret < 0) + return ret; + + vnode = xbc_node_get_child(knode); + ret = snprintf(buf, rest(buf, end), "%s%c", xbc_namebuf, + vnode ? '=' : ' '); + if (ret < 0) + return ret; + buf += ret; + if (!vnode) + continue; + + c = '\"'; + xbc_array_for_each_value(vnode, val) { + ret = snprintf(buf, rest(buf, end), "%c%s", c, val); + if (ret < 0) + return ret; + buf += ret; + c = ','; + } + if (rest(buf, end) > 2) + strcpy(buf, "\" "); + buf += 2; + } + + return buf - (end - size); +} +#undef rest + +/* Make an extra command line under given key word */ +static char * __init xbc_make_cmdline(const char *key) +{ + struct xbc_node *root; + char *new_cmdline; + int ret, len = 0; + + root = xbc_find_node(key); + if (!root) + return NULL; + + /* Count required buffer size */ + len = xbc_snprint_cmdline(NULL, 0, root); + if (len <= 0) + return NULL; + + new_cmdline = memblock_alloc(len + 1, SMP_CACHE_BYTES); + if (!new_cmdline) { + pr_err("Failed to allocate memory for extra kernel cmdline.\n"); + return NULL; + } + + ret = xbc_snprint_cmdline(new_cmdline, len + 1, root); + if (ret < 0 || ret > len) { + pr_err("Failed to print extra kernel cmdline.\n"); + return NULL; + } + + return new_cmdline; +} + +u32 boot_config_checksum(unsigned char *p, u32 size) +{ + u32 ret = 0; + + while (size--) + ret += *p++; + + return ret; +} + +static int __init bootconfig_params(char *param, char *val, const char *unused, void *arg) { + if (strcmp(param, "bootconfig") == 0) { + bootconfig_found = true; + } else if (strcmp(param, "--") == 0) { + initargs_found = true; + } + return 0; +} + +static void __init setup_boot_config(const char *cmdline) +{ + static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata; + u32 size, csum; + char *data, *copy; + u32 *hdr; + int ret; + + strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE); + parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL, + bootconfig_params); + + if (!bootconfig_found) + return; + + if (!initrd_end) + goto not_found; + + hdr = (u32 *)(initrd_end - 8); + size = hdr[0]; + csum = hdr[1]; + + if (size >= XBC_DATA_MAX) { + pr_err("bootconfig size %d greater than max size %d\n", + size, XBC_DATA_MAX); + return; + } + + data = ((void *)hdr) - size; + if ((unsigned long)data < initrd_start) + goto not_found; + + if (boot_config_checksum((unsigned char *)data, size) != csum) { + pr_err("bootconfig checksum failed\n"); + return; + } + + copy = memblock_alloc(size + 1, SMP_CACHE_BYTES); + if (!copy) { + pr_err("Failed to allocate memory for bootconfig\n"); + return; + } + + memcpy(copy, data, size); + copy[size] = '\0'; + + ret = xbc_init(copy); + if (ret < 0) + pr_err("Failed to parse bootconfig\n"); + else { + pr_info("Load bootconfig: %d bytes %d nodes\n", size, ret); + /* keys starting with "kernel." are passed via cmdline */ + extra_command_line = xbc_make_cmdline("kernel"); + /* Also, "init." keys are init arguments */ + extra_init_args = xbc_make_cmdline("init"); + } + return; +not_found: + pr_err("'bootconfig' found on command line, but no bootconfig found\n"); +} +#else +#define setup_boot_config(cmdline) do { } while (0) +#endif + +/* Change NUL term back to "=", to make "param" the whole string. */ +static void __init repair_env_string(char *param, char *val) +{ if (val) { /* param=val or param="val"? */ if (val == param+strlen(param)+1) @@ -256,11 +430,9 @@ static int __init repair_env_string(char *param, char *val, else if (val == param+strlen(param)+2) { val[-2] = '='; memmove(val-1, val, strlen(val)+1); - val--; } else BUG(); } - return 0; } /* Anything after -- gets handed straight to init. */ @@ -272,7 +444,7 @@ static int __init set_init_arg(char *param, char *val, if (panic_later) return 0; - repair_env_string(param, val, unused, NULL); + repair_env_string(param, val); for (i = 0; argv_init[i]; i++) { if (i == MAX_INIT_ARGS) { @@ -292,14 +464,16 @@ static int __init set_init_arg(char *param, char *val, static int __init unknown_bootoption(char *param, char *val, const char *unused, void *arg) { - repair_env_string(param, val, unused, NULL); + size_t len = strlen(param); + + repair_env_string(param, val); /* Handle obsolete-style parameters */ if (obsolete_checksetup(param)) return 0; /* Unused module parameter. */ - if (strchr(param, '.') && (!val || strchr(param, '.') < val)) + if (strnchr(param, len, '.')) return 0; if (panic_later) @@ -313,7 +487,7 @@ static int __init unknown_bootoption(char *param, char *val, panic_later = "env"; panic_param = param; } - if (!strncmp(param, envp_init[i], val - param)) + if (!strncmp(param, envp_init[i], len+1)) break; } envp_init[i] = param; @@ -374,22 +548,51 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { } */ static void __init setup_command_line(char *command_line) { - size_t len = strlen(boot_command_line) + 1; + size_t len, xlen = 0, ilen = 0; - saved_command_line = memblock_alloc(len, SMP_CACHE_BYTES); - if (!saved_command_line) - panic("%s: Failed to allocate %zu bytes\n", __func__, len); + if (extra_command_line) + xlen = strlen(extra_command_line); + if (extra_init_args) + ilen = strlen(extra_init_args) + 4; /* for " -- " */ - initcall_command_line = memblock_alloc(len, SMP_CACHE_BYTES); - if (!initcall_command_line) - panic("%s: Failed to allocate %zu bytes\n", __func__, len); + len = xlen + strlen(boot_command_line) + 1; + + saved_command_line = memblock_alloc(len + ilen, SMP_CACHE_BYTES); + if (!saved_command_line) + panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen); static_command_line = memblock_alloc(len, SMP_CACHE_BYTES); if (!static_command_line) panic("%s: Failed to allocate %zu bytes\n", __func__, len); - strcpy(saved_command_line, boot_command_line); - strcpy(static_command_line, command_line); + if (xlen) { + /* + * We have to put extra_command_line before boot command + * lines because there could be dashes (separator of init + * command line) in the command lines. + */ + strcpy(saved_command_line, extra_command_line); + strcpy(static_command_line, extra_command_line); + } + strcpy(saved_command_line + xlen, boot_command_line); + strcpy(static_command_line + xlen, command_line); + + if (ilen) { + /* + * Append supplemental init boot args to saved_command_line + * so that user can check what command line options passed + * to init. + */ + len = strlen(saved_command_line); + if (initargs_found) { + saved_command_line[len++] = ' '; + } else { + strcpy(saved_command_line + len, " -- "); + len += 4; + } + + strcpy(saved_command_line + len, extra_init_args); + } } /* @@ -433,7 +636,7 @@ noinline void __ref rest_init(void) /* * Enable might_sleep() and smp_processor_id() checks. - * They cannot be enabled earlier because with CONFIG_PREEMPT=y + * They cannot be enabled earlier because with CONFIG_PREEMPTION=y * kernel_thread() would trigger might_sleep() splats. With * CONFIG_PREEMPT_VOLUNTARY=y the init task might have scheduled * already, but it's stuck on the kthreadd_done completion. @@ -507,7 +710,7 @@ void __init __weak mem_encrypt_init(void) { } void __init __weak poking_init(void) { } -void __init __weak pgd_cache_init(void) { } +void __init __weak pgtable_cache_init(void) { } bool initcall_debug; core_param(initcall_debug, initcall_debug, bool, 0644); @@ -553,9 +756,11 @@ static void __init mm_init(void) * bigger than MAX_ORDER unless SPARSEMEM. */ page_ext_init_flatmem(); + init_debug_pagealloc(); report_meminit(); mem_init(); kmem_cache_init(); + kmemleak_init(); pgtable_init(); debug_objects_mem_init(); vmalloc_init(); @@ -564,7 +769,6 @@ static void __init mm_init(void) init_espfix_bsp(); /* Should be run after espfix64 is set up. */ pti_init(); - pgd_cache_init(); } void __init __weak arch_call_rest_init(void) @@ -593,8 +797,9 @@ asmlinkage __visible void __init start_kernel(void) boot_cpu_init(); page_address_init(); pr_notice("%s", linux_banner); + early_security_init(); setup_arch(&command_line); - mm_init_cpumask(&init_mm); + setup_boot_config(command_line); setup_command_line(command_line); setup_nr_cpu_ids(); setup_per_cpu_areas(); @@ -604,7 +809,7 @@ asmlinkage __visible void __init start_kernel(void) build_all_zonelists(NULL); page_alloc_init(); - pr_notice("Kernel command line: %s\n", boot_command_line); + pr_notice("Kernel command line: %s\n", saved_command_line); /* parameters may set static keys */ jump_label_init(); parse_early_param(); @@ -615,6 +820,9 @@ asmlinkage __visible void __init start_kernel(void) if (!IS_ERR_OR_NULL(after_dashes)) parse_args("Setting init args", after_dashes, NULL, 0, -1, -1, NULL, set_init_arg); + if (extra_init_args) + parse_args("Setting extra init args", extra_init_args, + NULL, 0, -1, -1, NULL, set_init_arg); /* * These use large bootmem allocations and must precede @@ -740,7 +948,6 @@ asmlinkage __visible void __init start_kernel(void) initrd_start = 0; } #endif - kmemleak_init(); setup_per_cpu_pageset(); numa_policy_init(); acpi_early_init(); @@ -991,16 +1198,21 @@ static const char *initcall_level_names[] __initdata = { "late", }; -static void __init do_initcall_level(int level) +static int __init ignore_unknown_bootoption(char *param, char *val, + const char *unused, void *arg) +{ + return 0; +} + +static void __init do_initcall_level(int level, char *command_line) { initcall_entry_t *fn; - strcpy(initcall_command_line, saved_command_line); parse_args(initcall_level_names[level], - initcall_command_line, __start___param, + command_line, __start___param, __stop___param - __start___param, level, level, - NULL, &repair_env_string); + NULL, ignore_unknown_bootoption); trace_initcall_level(initcall_level_names[level]); for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++) @@ -1010,9 +1222,20 @@ static void __init do_initcall_level(int level) static void __init do_initcalls(void) { int level; + size_t len = strlen(saved_command_line) + 1; + char *command_line; + + command_line = kzalloc(len, GFP_KERNEL); + if (!command_line) + panic("%s: Failed to allocate %zu bytes\n", __func__, len); - for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) - do_initcall_level(level); + for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) { + /* Parser modifies command_line, restore it each time */ + strcpy(command_line, saved_command_line); + do_initcall_level(level, command_line); + } + + kfree(command_line); } /* @@ -1043,8 +1266,16 @@ static void __init do_pre_smp_initcalls(void) static int run_init_process(const char *init_filename) { + const char *const *p; + argv_init[0] = init_filename; pr_info("Run %s as init process\n", init_filename); + pr_debug(" with arguments:\n"); + for (p = argv_init; *p; p++) + pr_debug(" %s\n", *p); + pr_debug(" with environment:\n"); + for (p = envp_init; *p; p++) + pr_debug(" %s\n", *p); return do_execve(getname_kernel(init_filename), (const char __user *const __user *)argv_init, (const char __user *const __user *)envp_init); @@ -1091,6 +1322,11 @@ static void mark_readonly(void) } else pr_info("Kernel memory protection disabled.\n"); } +#elif defined(CONFIG_ARCH_HAS_STRICT_KERNEL_RWX) +static inline void mark_readonly(void) +{ + pr_warn("Kernel memory protection not selected by kernel config.\n"); +} #else static inline void mark_readonly(void) { @@ -1156,6 +1392,17 @@ static int __ref kernel_init(void *unused) "See Linux Documentation/admin-guide/init.rst for guidance."); } +void console_on_rootfs(void) +{ + /* Open the /dev/console as stdin, this should never fail */ + if (ksys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) + pr_err("Warning: unable to open an initial console.\n"); + + /* create stdout/stderr */ + (void) ksys_dup(0); + (void) ksys_dup(0); +} + static noinline void __init kernel_init_freeable(void) { /* @@ -1191,12 +1438,8 @@ static noinline void __init kernel_init_freeable(void) do_basic_setup(); - /* Open the /dev/console on the rootfs, this should never fail */ - if (ksys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) - pr_err("Warning: unable to open an initial console.\n"); + console_on_rootfs(); - (void) ksys_dup(0); - (void) ksys_dup(0); /* * check if there is an early userspace init. If yes, let it do all * the work |