diff --git a/.gitignore b/.gitignore index dea29bea5..16325342f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ drivers/hisi/tzdriver/cfc_graph.pyc drivers/hisi/tzdriver/cfc_graphgen.pyc drivers/hisi/tzdriver/cfc_rule_parser.pyc +drivers/huawei_platform/oases/inlinehook_offset.h +net/wireguard/ diff --git a/Makefile b/Makefile index d1769dd0c..dc5541bbf 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 128 +SUBLEVEL = 129 EXTRAVERSION = NAME = Blurry Fish Butt @@ -224,6 +224,8 @@ VPATH := $(srctree)$(if $(KBUILD_EXTMOD),:$(KBUILD_EXTMOD)) export srctree objtree VPATH +CCACHE := ccache + # SUBARCH tells the usermode build what the underlying arch is. That is set # first, and if a usermode build is happening, the "ARCH=um" on the command # line overrides the setting of ARCH below. If a native build is happening, @@ -257,8 +259,8 @@ SUBARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ # "make" in the configured kernel build directory always uses that. # Default value for CROSS_COMPILE is not to prefix executables # Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile -ARCH ?= $(SUBARCH) -CROSS_COMPILE ?= $(CONFIG_CROSS_COMPILE:"%"=%) +ARCH ?= arm64 +CROSS_COMPILE ?= $(CCACHE) $(CONFIG_CROSS_COMPILE:"%"=%) # Architecture as present in compile.h UTS_MACHINE := $(ARCH) @@ -304,11 +306,10 @@ CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ else if [ -x /bin/bash ]; then echo /bin/bash; \ else echo sh; fi ; fi) -GRAPHITE = -fgraphite -fgraphite-identity -floop-interchange -ftree-loop-distribution -floop-strip-mine -floop-block -ftree-loop-linear -HOSTCC = gcc -HOSTCXX = g++ -HOSTCFLAGS = -Wall -Wmissing-prototypes -Wstrict-prototypes -O3 -fomit-frame-pointer $(GRAPHITE) -HOSTCXXFLAGS = -O3 $(GRAPHITE) +HOSTCC = $(CCACHE) gcc +HOSTCXX = $(CCACHE) g++ +HOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89 +HOSTCXXFLAGS = -O2 ifeq ($(shell $(HOSTCC) -v 2>&1 | grep -c "clang version"), 1) HOSTCFLAGS += -Wno-unused-value -Wno-unused-parameter \ @@ -350,11 +351,11 @@ scripts/Kbuild.include: ; include scripts/Kbuild.include # Make variables (CC, etc...) -AS = $(SOURCEANALYZER) $(CROSS_COMPILE)as -LD = $(SOURCEANALYZER) $(CROSS_COMPILE)ld -CC = $(SOURCEANALYZER) $(CCACHE) $(CROSS_COMPILE)gcc +AS = $(CROSS_COMPILE)as +LD = $(CROSS_COMPILE)ld +CC = $(CCACHE) $(CROSS_COMPILE)gcc CPP = $(CC) -E -AR = $(SOURCEANALYZER) $(CROSS_COMPILE)ar +AR = $(CROSS_COMPILE)ar NM = $(CROSS_COMPILE)nm STRIP = $(CROSS_COMPILE)strip OBJCOPY = $(CROSS_COMPILE)objcopy @@ -371,13 +372,12 @@ CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void $(CF) -Wall CFLAGS_MODULE = AFLAGS_MODULE = -LDFLAGS_MODULE = +LDFLAGS_MODULE = --strip-debug CFLAGS_KERNEL = AFLAGS_KERNEL = CFLAGS_GCOV = -fprofile-arcs -ftest-coverage -fno-tree-loop-im CFLAGS_KCOV = -fsanitize-coverage=trace-pc -KERNELFLAGS = $(GRAPHITE) # Use USERINCLUDE when you must reference the UAPI directories only. USERINCLUDE := \ @@ -420,8 +420,7 @@ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common \ -Werror-implicit-function-declaration \ -Wno-format-security \ - -std=gnu89 $(call cc-option,-fno-PIE) \ - -mcpu=cortex-a53 -mtune=cortex-a53 + -std=gnu89 $(call cc-option,-fno-PIE) KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := @@ -674,16 +673,12 @@ KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context) ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os else -ifeq ($(cc-name),clang) -KBUILD_CFLAGS += -O3 -else ifdef CONFIG_PROFILE_ALL_BRANCHES KBUILD_CFLAGS += -O2 else KBUILD_CFLAGS += -O2 endif endif -endif # Tell gcc to never replace conditional load with a non-conditional one KBUILD_CFLAGS += $(call cc-option,--param=allow-store-data-races=0) @@ -704,9 +699,9 @@ KBUILD_CFLAGS += $(call cc-option,-fno-reorder-blocks,) \ $(call cc-option,-fno-partial-inlining) endif -ifneq ($(CONFIG_FRAME_WARN),0) -KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN}) -endif +#ifneq ($(CONFIG_FRAME_WARN),0) +#KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN}) +#endif # Handle stack protector mode. # @@ -773,18 +768,18 @@ KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) endif -ifdef CONFIG_FRAME_POINTER -KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls -else +#ifdef CONFIG_FRAME_POINTER +#KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls +#else # Some targets (ARM with Thumb2, for example), can't be built with frame # pointers. For those, we don't have FUNCTION_TRACER automatically # select FRAME_POINTER. However, FUNCTION_TRACER adds -pg, and this is # incompatible with -fomit-frame-pointer with current GCC, so we don't use # -fomit-frame-pointer with FUNCTION_TRACER. -ifndef CONFIG_FUNCTION_TRACER +#ifndef CONFIG_FUNCTION_TRACER KBUILD_CFLAGS += -fomit-frame-pointer -endif -endif +#endif +#endif KBUILD_CFLAGS += $(call cc-option, -fno-var-tracking-assignments) @@ -805,23 +800,23 @@ KBUILD_CFLAGS += $(call cc-option, -femit-struct-debug-baseonly) \ $(call cc-option,-fno-var-tracking) endif -ifdef CONFIG_FUNCTION_TRACER -ifndef CC_FLAGS_FTRACE -CC_FLAGS_FTRACE := -pg -endif -export CC_FLAGS_FTRACE -ifdef CONFIG_HAVE_FENTRY -CC_USING_FENTRY := $(call cc-option, -mfentry -DCC_USING_FENTRY) -endif -KBUILD_CFLAGS += $(CC_FLAGS_FTRACE) $(CC_USING_FENTRY) -KBUILD_AFLAGS += $(CC_USING_FENTRY) -ifdef CONFIG_DYNAMIC_FTRACE - ifdef CONFIG_HAVE_C_RECORDMCOUNT - BUILD_C_RECORDMCOUNT := y - export BUILD_C_RECORDMCOUNT - endif -endif -endif +#ifdef CONFIG_FUNCTION_TRACER +#ifndef CC_FLAGS_FTRACE +#CC_FLAGS_FTRACE := -pg +#endif +#export CC_FLAGS_FTRACE +#ifdef CONFIG_HAVE_FENTRY +#CC_USING_FENTRY := $(call cc-option, -mfentry -DCC_USING_FENTRY) +#endif +#KBUILD_CFLAGS += $(CC_FLAGS_FTRACE) $(CC_USING_FENTRY) +#KBUILD_AFLAGS += $(CC_USING_FENTRY) +#ifdef CONFIG_DYNAMIC_FTRACE +# ifdef CONFIG_HAVE_C_RECORDMCOUNT +# BUILD_C_RECORDMCOUNT := y +# export BUILD_C_RECORDMCOUNT +# endif +#endif +#endif # We trigger additional mismatches with less inlining ifdef CONFIG_DEBUG_SECTION_MISMATCH diff --git a/arch/arm/boot/dts/at91sam9g25.dtsi b/arch/arm/boot/dts/at91sam9g25.dtsi index a7da0dd0c..0898213f3 100644 --- a/arch/arm/boot/dts/at91sam9g25.dtsi +++ b/arch/arm/boot/dts/at91sam9g25.dtsi @@ -21,7 +21,7 @@ atmel,mux-mask = < /* A B C */ 0xffffffff 0xffe0399f 0xc000001c /* pioA */ - 0x0007ffff 0x8000fe3f 0x00000000 /* pioB */ + 0x0007ffff 0x00047e3f 0x00000000 /* pioB */ 0x80000000 0x07c0ffff 0xb83fffff /* pioC */ 0x003fffff 0x003f8000 0x00000000 /* pioD */ >; diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi index 3daf8d5d7..fb0d1b252 100644 --- a/arch/arm/boot/dts/sama5d4.dtsi +++ b/arch/arm/boot/dts/sama5d4.dtsi @@ -1354,7 +1354,7 @@ pinctrl@fc06a000 { #address-cells = <1>; #size-cells = <1>; - compatible = "atmel,at91sam9x5-pinctrl", "atmel,at91rm9200-pinctrl", "simple-bus"; + compatible = "atmel,sama5d3-pinctrl", "atmel,at91sam9x5-pinctrl", "simple-bus"; ranges = <0xfc068000 0xfc068000 0x100 0xfc06a000 0xfc06a000 0x4000>; /* WARNING: revisit as pin spec has changed */ diff --git a/arch/arm64/configs/hyperplus_defconfig b/arch/arm64/configs/hyperplus_defconfig index ee615263a..5315b5d97 100644 --- a/arch/arm64/configs/hyperplus_defconfig +++ b/arch/arm64/configs/hyperplus_defconfig @@ -3,6 +3,7 @@ # Linux/arm64 4.4.126 Kernel Configuration # +CONFIG_FRANDOM=y CONFIG_WIREGUARD=y # CONFIG_WIREGUARD_DEBUG is not set CONFIG_IOSCHED_ZEN=y @@ -163,7 +164,7 @@ CONFIG_BLK_CGROUP=y # CONFIG_DEBUG_BLK_CGROUP is not set # CONFIG_CHECKPOINT_RESTORE is not set # CONFIG_NAMESPACES is not set -CONFIG_HISI_EAS_SCHED=y +# CONFIG_HISI_EAS_SCHED CONFIG_HISI_RT_OPT=y # CONFIG_SCHED_AUTOGROUP is not set CONFIG_SCHED_TUNE=y @@ -1789,7 +1790,7 @@ CONFIG_PPP_DEFLATE=y CONFIG_PPP_FILTER=y CONFIG_PPP_MPPE=y CONFIG_PPP_MULTILINK=y -CONFIG_PPPOE=m +CONFIG_PPPOE=y CONFIG_PPPOLAC=y CONFIG_PPPOPNS=y CONFIG_PPP_ASYNC=y @@ -4808,7 +4809,9 @@ CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_FAT_DEFAULT_CODEPAGE=437 CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" -# CONFIG_NTFS_FS is not set +CONFIG_NTFS_FS=y +# CONFIG_NTFS_DEBUG is not set +CONFIG_NTFS_RW=y CONFIG_EXFAT_FS=y CONFIG_EXFAT_DISCARD=y CONFIG_EXFAT_DELAYED_SYNC=y diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 7dae55b31..f4a28990a 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -244,7 +244,7 @@ asmlinkage void secondary_start_kernel(void) * the CPU migration code to notice that the CPU is online * before we continue. */ - pr_info("CPU%u: Booted secondary processor [%08x]\n", + pr_debug("CPU%u: Booted secondary processor [%08x]\n", cpu, read_cpuid_id()); update_cpu_boot_status(CPU_BOOT_SUCCESS); /* Make sure the status update is visible before we complete */ diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index a38acc3d1..7cdb06c4b 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -221,92 +221,6 @@ static int __init parse_dt_topology(void) struct cpu_topology cpu_topology[NR_CPUS]; EXPORT_SYMBOL_GPL(cpu_topology); -#ifdef CONFIG_HISI_EAS_SCHED -static const char * const little_cores[] = { - "arm,cortex-a53", - NULL, -}; - -static bool is_little_cpu(struct device_node *cn) -{ - const char * const *lc; - for (lc = little_cores; *lc; lc++) - if (of_device_is_compatible(cn, *lc)) - return true; - return false; -} - -void __init arch_get_fast_and_slow_cpus(struct cpumask *fast, - struct cpumask *slow) -{ - struct device_node *cn = NULL; - int cpu; - - cpumask_clear(fast); - cpumask_clear(slow); - - /* - * Else, parse device tree for little cores. - */ - while ((cn = of_find_node_by_type(cn, "cpu"))) { - const u32 *mpidr; - int len; - - mpidr = of_get_property(cn, "reg", &len); - if (!mpidr || len != 8) { - pr_err("%s missing reg property\n", cn->full_name); - continue; - } - - cpu = get_logical_index(be32_to_cpup(mpidr+1)); - if (cpu == -EINVAL) { - pr_err("couldn't get logical index for mpidr %x\n", - be32_to_cpup(mpidr+1)); - break; - } - - if (is_little_cpu(cn)) - cpumask_set_cpu(cpu, slow); - else - cpumask_set_cpu(cpu, fast); - } - - if (!cpumask_empty(fast) && !cpumask_empty(slow)) - return; - - /* - * We didn't find both big and little cores so let's call all cores - * fast as this will keep the system running, with all cores being - * treated equal. - */ - cpumask_setall(fast); - cpumask_clear(slow); -} - -struct cpumask slow_cpu_mask; -struct cpumask fast_cpu_mask; -void hisi_get_fast_cpus(struct cpumask *cpumask) -{ - cpumask_copy(cpumask, &fast_cpu_mask); -} -EXPORT_SYMBOL(hisi_get_fast_cpus); - -void hisi_get_slow_cpus(struct cpumask *cpumask) -{ - cpumask_copy(cpumask, &slow_cpu_mask); -} -EXPORT_SYMBOL(hisi_get_slow_cpus); - -int hisi_test_fast_cpu(int cpu) -{ - if (cpumask_test_cpu(cpu, &fast_cpu_mask)) - return 1; - else - return 0; -} -EXPORT_SYMBOL(hisi_test_fast_cpu); -#endif - /* sd energy functions */ static inline const struct sched_group_energy * const cpu_cluster_energy(int cpu) @@ -470,7 +384,4 @@ void __init init_cpu_topology(void) init_sched_energy_costs(); -#ifdef CONFIG_HISI_EAS_SCHED - arch_get_fast_and_slow_cpus(&fast_cpu_mask, &slow_cpu_mask); -#endif } diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index c74c32ccc..4f2817689 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -1238,6 +1238,13 @@ __clear_user(void __user *addr, __kernel_size_t size) { __kernel_size_t res; +#ifdef CONFIG_CPU_MICROMIPS +/* micromips memset / bzero also clobbers t7 & t8 */ +#define bzero_clobbers "$4", "$5", "$6", __UA_t0, __UA_t1, "$15", "$24", "$31" +#else +#define bzero_clobbers "$4", "$5", "$6", __UA_t0, __UA_t1, "$31" +#endif /* CONFIG_CPU_MICROMIPS */ + if (eva_kernel_access()) { __asm__ __volatile__( "move\t$4, %1\n\t" @@ -1247,7 +1254,7 @@ __clear_user(void __user *addr, __kernel_size_t size) "move\t%0, $6" : "=r" (res) : "r" (addr), "r" (size) - : "$4", "$5", "$6", __UA_t0, __UA_t1, "$31"); + : bzero_clobbers); } else { might_fault(); __asm__ __volatile__( @@ -1258,7 +1265,7 @@ __clear_user(void __user *addr, __kernel_size_t size) "move\t%0, $6" : "=r" (res) : "r" (addr), "r" (size) - : "$4", "$5", "$6", __UA_t0, __UA_t1, "$31"); + : bzero_clobbers); } return res; diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 8f0019a2e..2d33cf218 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -218,7 +218,7 @@ 1: PTR_ADDIU a0, 1 /* fill bytewise */ R10KCBARRIER(0(ra)) bne t1, a0, 1b - sb a1, -1(a0) + EX(sb, a1, -1(a0), .Lsmall_fixup\@) 2: jr ra /* done */ move a2, zero @@ -249,13 +249,18 @@ PTR_L t0, TI_TASK($28) andi a2, STORMASK LONG_L t0, THREAD_BUADDR(t0) - LONG_ADDU a2, t1 + LONG_ADDU a2, a0 jr ra LONG_SUBU a2, t0 .Llast_fixup\@: jr ra - andi v1, a2, STORMASK + nop + +.Lsmall_fixup\@: + PTR_SUBU a2, t1, a0 + jr ra + PTR_ADDIU a2, 1 .endm diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index dba508fe1..4f7060ec6 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -648,6 +648,10 @@ static int match_pci_device(struct device *dev, int index, (modpath->mod == PCI_FUNC(devfn))); } + /* index might be out of bounds for bc[] */ + if (index >= 6) + return 0; + id = PCI_SLOT(pdev->devfn) | (PCI_FUNC(pdev->devfn) << 5); return (modpath->bc[index] == id); } diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 0eca6efc0..b9e16855a 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -36,7 +36,8 @@ #define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) -#ifdef __SUBARCH_HAS_LWSYNC +/* The sub-arch has lwsync */ +#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC) # define SMPWMB LWSYNC #else # define SMPWMB eieio diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 07a99e638..bab346111 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -21,6 +21,9 @@ /* We calculate number of sg entries based on PAGE_SIZE */ #define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry)) +/* Default time to sleep or delay between OPAL_BUSY/OPAL_BUSY_EVENT loops */ +#define OPAL_BUSY_DELAY_MS 10 + /* /sys/firmware/opal */ extern struct kobject *opal_kobj; diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h index c50868681..e8d6a842f 100644 --- a/arch/powerpc/include/asm/synch.h +++ b/arch/powerpc/include/asm/synch.h @@ -5,10 +5,6 @@ #include #include -#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC) -#define __SUBARCH_HAS_LWSYNC -#endif - #ifndef __ASSEMBLY__ extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup; extern void do_lwsync_fixups(unsigned long value, void *fixup_start, diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 98f81800e..304f07cfa 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -788,7 +788,8 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev) eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]); /* PCI Command: 0x4 */ - eeh_ops->write_config(pdn, PCI_COMMAND, 4, edev->config_space[1]); + eeh_ops->write_config(pdn, PCI_COMMAND, 4, edev->config_space[1] | + PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); /* Check the PCIe link is ready */ eeh_bridge_check_link(edev); diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index a18d648d3..3af014684 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -53,7 +53,7 @@ static int patch_alt_instruction(unsigned int *src, unsigned int *dest, unsigned int *target = (unsigned int *)branch_target(src); /* Branch within the section doesn't need translating */ - if (target < alt_start || target >= alt_end) { + if (target < alt_start || target > alt_end) { instr = translate_branch(dest, src); if (!instr) return 1; diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c index 9db4398de..1bceb95f4 100644 --- a/arch/powerpc/platforms/powernv/opal-nvram.c +++ b/arch/powerpc/platforms/powernv/opal-nvram.c @@ -11,6 +11,7 @@ #define DEBUG +#include #include #include #include @@ -56,9 +57,17 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index) while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_write_nvram(__pa(buf), count, off); - if (rc == OPAL_BUSY_EVENT) + if (rc == OPAL_BUSY_EVENT) { + msleep(OPAL_BUSY_DELAY_MS); opal_poll_events(NULL); + } else if (rc == OPAL_BUSY) { + msleep(OPAL_BUSY_DELAY_MS); + } } + + if (rc) + return -EIO; + *index += count; return count; } diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index b2e5902bd..c670279b3 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -318,7 +318,7 @@ static void hypfs_kill_super(struct super_block *sb) if (sb->s_root) hypfs_delete_tree(sb->s_root); - if (sb_info->update_file) + if (sb_info && sb_info->update_file) hypfs_remove(sb_info->update_file); kfree(sb->s_fs_info); sb->s_fs_info = NULL; diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 42570d8fb..e73979236 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -798,6 +798,7 @@ static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb, /* copy and convert to ebcdic */ memcpy(ipb->hdr.loadparm, buf, lp_len); ASCEBC(ipb->hdr.loadparm, LOADPARM_LEN); + ipb->hdr.flags |= DIAG308_FLAGS_LP_VALID; return len; } diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index c211153ca..56648f4f8 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -140,7 +140,7 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = { static void hard_handler(int sig, siginfo_t *si, void *p) { - struct ucontext *uc = p; + ucontext_t *uc = p; mcontext_t *mc = &uc->uc_mcontext; unsigned long pending = 1UL << sig; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 388dfd3bb..cf5be43fc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -300,11 +300,6 @@ config X86_32_LAZY_GS def_bool y depends on X86_32 && !CC_STACKPROTECTOR -config ARCH_HWEIGHT_CFLAGS - string - default "-fcall-saved-ecx -fcall-saved-edx" if X86_32 - default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64 - config ARCH_SUPPORTS_UPROBES def_bool y diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index 259a7c1ef..44f825c80 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h @@ -2,8 +2,8 @@ #define _ASM_X86_HWEIGHT_H #ifdef CONFIG_64BIT -/* popcnt %edi, %eax -- redundant REX prefix for alignment */ -#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" +/* popcnt %edi, %eax */ +#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc7" /* popcnt %rdi, %rax */ #define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" #define REG_IN "D" @@ -15,19 +15,15 @@ #define REG_OUT "a" #endif -/* - * __sw_hweightXX are called from within the alternatives below - * and callee-clobbered registers need to be taken care of. See - * ARCH_HWEIGHT_CFLAGS in for the respective - * compiler switches. - */ +#define __HAVE_ARCH_SW_HWEIGHT + static __always_inline unsigned int __arch_hweight32(unsigned int w) { - unsigned int res = 0; + unsigned int res; asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT) - : "="REG_OUT (res) - : REG_IN (w)); + : "="REG_OUT (res) + : REG_IN (w)); return res; } @@ -51,11 +47,11 @@ static inline unsigned long __arch_hweight64(__u64 w) #else static __always_inline unsigned long __arch_hweight64(__u64 w) { - unsigned long res = 0; + unsigned long res; asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) - : "="REG_OUT (res) - : REG_IN (w)); + : "="REG_OUT (res) + : REG_IN (w)); return res; } diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 64341aa48..d40ee8a38 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -42,3 +42,5 @@ EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(___preempt_schedule); EXPORT_SYMBOL(___preempt_schedule_notrace); #endif + +EXPORT_SYMBOL(__sw_hweight32); diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index a0695be19..c7efd394c 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -42,6 +42,9 @@ EXPORT_SYMBOL(clear_page); EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(__sw_hweight32); +EXPORT_SYMBOL(__sw_hweight64); + /* * Export string functions. We normally rely on gcc builtin for most of these, * but gcc sometimes decides not to inline them. diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 8ac481870..c8ed431f9 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -26,7 +26,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o lib-$(CONFIG_RETPOLINE) += retpoline.o -obj-y += msr.o msr-reg.o msr-reg-export.o +obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S new file mode 100644 index 000000000..8a602a1e4 --- /dev/null +++ b/arch/x86/lib/hweight.S @@ -0,0 +1,79 @@ +#include + +#include + +/* + * unsigned int __sw_hweight32(unsigned int w) + * %rdi: w + */ +ENTRY(__sw_hweight32) + +#ifdef CONFIG_X86_64 + movl %edi, %eax # w +#endif + __ASM_SIZE(push,) %__ASM_REG(dx) + movl %eax, %edx # w -> t + shrl %edx # t >>= 1 + andl $0x55555555, %edx # t &= 0x55555555 + subl %edx, %eax # w -= t + + movl %eax, %edx # w -> t + shrl $2, %eax # w_tmp >>= 2 + andl $0x33333333, %edx # t &= 0x33333333 + andl $0x33333333, %eax # w_tmp &= 0x33333333 + addl %edx, %eax # w = w_tmp + t + + movl %eax, %edx # w -> t + shrl $4, %edx # t >>= 4 + addl %edx, %eax # w_tmp += t + andl $0x0f0f0f0f, %eax # w_tmp &= 0x0f0f0f0f + imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101 + shrl $24, %eax # w = w_tmp >> 24 + __ASM_SIZE(pop,) %__ASM_REG(dx) + ret +ENDPROC(__sw_hweight32) + +ENTRY(__sw_hweight64) +#ifdef CONFIG_X86_64 + pushq %rdi + pushq %rdx + + movq %rdi, %rdx # w -> t + movabsq $0x5555555555555555, %rax + shrq %rdx # t >>= 1 + andq %rdx, %rax # t &= 0x5555555555555555 + movabsq $0x3333333333333333, %rdx + subq %rax, %rdi # w -= t + + movq %rdi, %rax # w -> t + shrq $2, %rdi # w_tmp >>= 2 + andq %rdx, %rax # t &= 0x3333333333333333 + andq %rdi, %rdx # w_tmp &= 0x3333333333333333 + addq %rdx, %rax # w = w_tmp + t + + movq %rax, %rdx # w -> t + shrq $4, %rdx # t >>= 4 + addq %rdx, %rax # w_tmp += t + movabsq $0x0f0f0f0f0f0f0f0f, %rdx + andq %rdx, %rax # w_tmp &= 0x0f0f0f0f0f0f0f0f + movabsq $0x0101010101010101, %rdx + imulq %rdx, %rax # w_tmp *= 0x0101010101010101 + shrq $56, %rax # w = w_tmp >> 56 + + popq %rdx + popq %rdi + ret +#else /* CONFIG_X86_32 */ + /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */ + pushl %ecx + + call __sw_hweight32 + movl %eax, %ecx # stash away result + movl %edx, %eax # second part of input + call __sw_hweight32 + addl %ecx, %eax # result + + popl %ecx + ret +#endif +ENDPROC(__sw_hweight64) diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c index 1518d2805..fd6825537 100644 --- a/arch/x86/um/stub_segv.c +++ b/arch/x86/um/stub_segv.c @@ -10,7 +10,7 @@ void __attribute__ ((__section__ (".__syscall_stub"))) stub_segv_handler(int sig, siginfo_t *info, void *p) { - struct ucontext *uc = p; + ucontext_t *uc = p; GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA), &uc->uc_mcontext); diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index b48ecbfc4..8c5503c0b 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -205,6 +205,15 @@ static const struct dmi_system_id video_detect_dmi_table[] = { "3570R/370R/470R/450R/510R/4450RV"), }, }, + { + /* https://bugzilla.redhat.com/show_bug.cgi?id=1557060 */ + .callback = video_detect_force_video, + .ident = "SAMSUNG 670Z5E", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), + DMI_MATCH(DMI_PRODUCT_NAME, "670Z5E"), + }, + }, { /* https://bugzilla.redhat.com/show_bug.cgi?id=1094948 */ .callback = video_detect_force_video, diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 9663fcacc..8889a9933 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -166,8 +166,7 @@ enum { BINDER_DEBUG_PRIORITY_CAP = 1U << 13, BINDER_DEBUG_SPINLOCKS = 1U << 14, }; -static uint32_t binder_debug_mask = BINDER_DEBUG_USER_ERROR | - BINDER_DEBUG_FAILED_TRANSACTION | BINDER_DEBUG_DEAD_TRANSACTION; +static uint32_t binder_debug_mask = 0; module_param_named(debug_mask, binder_debug_mask, uint, S_IWUSR | S_IRUGO); static char *binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES; diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 4ac63c0e5..fd377b956 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1582,7 +1582,7 @@ int regmap_raw_write(struct regmap *map, unsigned int reg, return -EINVAL; if (val_len % map->format.val_bytes) return -EINVAL; - if (map->max_raw_write && map->max_raw_write > val_len) + if (map->max_raw_write && map->max_raw_write < val_len) return -E2BIG; map->lock(map->lock_arg); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 58c649dd3..2acb4b5fb 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1128,11 +1128,15 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (info->lo_encrypt_type) { unsigned int type = info->lo_encrypt_type; - if (type >= MAX_LO_CRYPT) - return -EINVAL; + if (type >= MAX_LO_CRYPT) { + err = -EINVAL; + goto exit; + } xfer = xfer_funcs[type]; - if (xfer == NULL) - return -EINVAL; + if (xfer == NULL) { + err = -EINVAL; + goto exit; + } } else xfer = NULL; diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 22b61fdb0..3e910e224 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -614,5 +614,19 @@ config TILE_SROM source "drivers/char/xillybus/Kconfig" +config FRANDOM + tristate "Frandom RNG driver" + help + Frandom is a Linux kernel random number generator, which + is 10-50 times faster than what you get from Linux' built-in + /dev/urandom. And it uses very little (/dev/frandom) or none + (/dev/erandom) of the kernel's entropy pool, so it is very + useful for applications that require a handy source for lots + of random data. + + http://www.billauer.co.il/frandom.html + + If unsure here, select N. + endmenu diff --git a/drivers/char/frandom.c b/drivers/char/frandom.c index b824f66bf..eb042a343 100644 --- a/drivers/char/frandom.c +++ b/drivers/char/frandom.c @@ -182,7 +182,7 @@ static void init_rand_state(struct frandom_state *state, int seedflag) static int frandom_open(struct inode *inode, struct file *filp) { - + struct frandom_state *state; int num = iminor(inode); @@ -191,7 +191,7 @@ static int frandom_open(struct inode *inode, struct file *filp) * explicitly */ if ((num != frandom_minor) && (num != erandom_minor)) return -ENODEV; - + state = kmalloc(sizeof(struct frandom_state), GFP_KERNEL); if (!state) return -ENOMEM; @@ -221,7 +221,7 @@ static int frandom_release(struct inode *inode, struct file *filp) kfree(state->buf); kfree(state); - + return 0; } @@ -236,16 +236,16 @@ static ssize_t frandom_read(struct file *filp, char *buf, size_t count, unsigned int i; unsigned int j; u8 *S; - + if (down_interruptible(&state->sem)) return -ERESTARTSYS; - + if ((frandom_chunklimit > 0) && (count > frandom_chunklimit)) count = frandom_chunklimit; ret = count; /* It's either everything or an error... */ - - i = state->i; + + i = state->i; j = state->j; S = state->S; @@ -263,7 +263,7 @@ static ssize_t frandom_read(struct file *filp, char *buf, size_t count, swap_byte(&S[i], &S[j]); *localbuf++ = S[(S[i] + S[j]) & 0xff]; } - + if (copy_to_user(buf, state->buf, dobytes)) { ret = -EFAULT; goto out; @@ -274,7 +274,7 @@ static ssize_t frandom_read(struct file *filp, char *buf, size_t count, } out: - state->i = i; + state->i = i; state->j = j; up(&state->sem); @@ -308,7 +308,7 @@ static int frandom_init_module(void) /* The buffer size MUST be at least 256 bytes, because we assume that minimal length in init_rand_state(). - */ + */ if (frandom_bufsize < 256) { printk(KERN_ERR "frandom: Refused to load because frandom_bufsize=%d < 256\n",frandom_bufsize); return -EINVAL; @@ -340,7 +340,7 @@ static int frandom_init_module(void) printk(KERN_WARNING "frandom: Failed to register class fastrng\n"); goto error0; } - + /* * Register your major, and accept a dynamic number. This is the * first thing to do, in order to avoid releasing other module's @@ -418,3 +418,4 @@ MODULE_AUTHOR("Eli Billauer "); MODULE_DESCRIPTION("'char_random_frandom' - A fast random generator for " "general usage"); MODULE_LICENSE("GPL"); + diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index 7c4b1ffe8..d56ba46e6 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -891,9 +891,7 @@ static void bcm2835_pll_off(struct clk_hw *hw) const struct bcm2835_pll_data *data = pll->data; spin_lock(&cprman->regs_lock); - cprman_write(cprman, data->cm_ctrl_reg, - cprman_read(cprman, data->cm_ctrl_reg) | - CM_PLL_ANARST); + cprman_write(cprman, data->cm_ctrl_reg, CM_PLL_ANARST); cprman_write(cprman, data->a2w_ctrl_reg, cprman_read(cprman, data->a2w_ctrl_reg) | A2W_PLL_CTRL_PWRDN); @@ -929,6 +927,10 @@ static int bcm2835_pll_on(struct clk_hw *hw) cpu_relax(); } + cprman_write(cprman, data->a2w_ctrl_reg, + cprman_read(cprman, data->a2w_ctrl_reg) | + A2W_PLL_CTRL_PRST_DISABLE); + return 0; } diff --git a/drivers/clk/mvebu/armada-38x.c b/drivers/clk/mvebu/armada-38x.c index 8bccf4ecd..9ff4ea639 100644 --- a/drivers/clk/mvebu/armada-38x.c +++ b/drivers/clk/mvebu/armada-38x.c @@ -46,10 +46,11 @@ static u32 __init armada_38x_get_tclk_freq(void __iomem *sar) } static const u32 armada_38x_cpu_frequencies[] __initconst = { - 0, 0, 0, 0, - 1066 * 1000 * 1000, 0, 0, 0, + 666 * 1000 * 1000, 0, 800 * 1000 * 1000, 0, + 1066 * 1000 * 1000, 0, 1200 * 1000 * 1000, 0, 1332 * 1000 * 1000, 0, 0, 0, - 1600 * 1000 * 1000, + 1600 * 1000 * 1000, 0, 0, 0, + 1866 * 1000 * 1000, 0, 0, 2000 * 1000 * 1000, }; static u32 __init armada_38x_get_cpu_freq(void __iomem *sar) @@ -75,11 +76,11 @@ static const struct coreclk_ratio armada_38x_coreclk_ratios[] __initconst = { }; static const int armada_38x_cpu_l2_ratios[32][2] __initconst = { - {0, 1}, {0, 1}, {0, 1}, {0, 1}, - {1, 2}, {0, 1}, {0, 1}, {0, 1}, + {1, 2}, {0, 1}, {1, 2}, {0, 1}, + {1, 2}, {0, 1}, {1, 2}, {0, 1}, {1, 2}, {0, 1}, {0, 1}, {0, 1}, {1, 2}, {0, 1}, {0, 1}, {0, 1}, - {0, 1}, {0, 1}, {0, 1}, {0, 1}, + {1, 2}, {0, 1}, {0, 1}, {1, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, @@ -90,7 +91,7 @@ static const int armada_38x_cpu_ddr_ratios[32][2] __initconst = { {1, 2}, {0, 1}, {0, 1}, {0, 1}, {1, 2}, {0, 1}, {0, 1}, {0, 1}, {1, 2}, {0, 1}, {0, 1}, {0, 1}, - {0, 1}, {0, 1}, {0, 1}, {0, 1}, + {1, 2}, {0, 1}, {0, 1}, {7, 15}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, diff --git a/drivers/cpufreq/cpu-boost.c b/drivers/cpufreq/cpu-boost.c index e980faae2..d59d44c31 100644 --- a/drivers/cpufreq/cpu-boost.c +++ b/drivers/cpufreq/cpu-boost.c @@ -40,11 +40,6 @@ module_param(input_boost_enabled, uint, 0644); static unsigned int input_boost_ms = 40; module_param(input_boost_ms, uint, 0644); -#ifdef CONFIG_DYNAMIC_STUNE_BOOST -static int dynamic_stune_boost; -module_param(dynamic_stune_boost, uint, 0644); -#endif /* CONFIG_DYNAMIC_STUNE_BOOST */ - static struct delayed_work input_boost_rem; static u64 last_input_time; #define MIN_INPUT_INTERVAL (150 * USEC_PER_MSEC) @@ -174,11 +169,6 @@ static void do_input_boost_rem(struct work_struct *work) i_sync_info->input_boost_min = 0; } -#ifdef CONFIG_DYNAMIC_STUNE_BOOST - /* Reset dynamic stune boost value to the default value */ - reset_stune_boost("top-app"); -#endif /* CONFIG_DYNAMIC_STUNE_BOOST */ - /* Update policies for all online CPUs */ update_policy_online(); @@ -201,11 +191,6 @@ static void do_input_boost(struct work_struct *work) /* Update policies for all online CPUs */ update_policy_online(); -#ifdef CONFIG_DYNAMIC_STUNE_BOOST - /* Set dynamic stune boost value */ - do_stune_boost("top-app", dynamic_stune_boost); -#endif /* CONFIG_DYNAMIC_STUNE_BOOST */ - queue_delayed_work(cpu_boost_wq, &input_boost_rem, msecs_to_jiffies(input_boost_ms)); } @@ -261,11 +246,6 @@ static int cpuboost_input_connect(struct input_handler *handler, static void cpuboost_input_disconnect(struct input_handle *handle) { -#ifdef CONFIG_DYNAMIC_STUNE_BOOST - /* Reset dynamic stune boost value to the default value */ - reset_stune_boost("top-app"); -#endif /* CONFIG_DYNAMIC_STUNE_BOOST */ - input_close_device(handle); input_unregister_handle(handle); kfree(handle); diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 21768b337..4410041c3 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -1077,7 +1077,9 @@ static int __init devfreq_init(void) return PTR_ERR(devfreq_class); } - devfreq_wq = create_freezable_workqueue("devfreq_wq"); + devfreq_wq = alloc_workqueue("devfreq_wq", + WQ_HIGHPRI | WQ_UNBOUND | WQ_FREEZABLE | + WQ_MEM_RECLAIM, 0); if (!devfreq_wq) { class_destroy(devfreq_class); pr_err("%s: couldn't create workqueue\n", __FILE__); diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 66c073fc8..82a7c89ca 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1473,10 +1473,10 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) { check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; rmb(); - initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD); - rmb(); cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC); rmb(); + initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD); + rmb(); cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; rmb(); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index fb6ad1438..83aee9e81 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -238,9 +238,10 @@ int radeon_bo_create(struct radeon_device *rdev, * may be slow * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 */ - +#ifndef CONFIG_COMPILE_TEST #warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ thanks to write-combining +#endif if (bo->flags & RADEON_GEM_GTT_WC) DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index d9007cc37..892d0a71d 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -5964,9 +5964,9 @@ static void si_set_pcie_lane_width_in_smc(struct radeon_device *rdev, { u32 lane_width; u32 new_lane_width = - (radeon_new_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT; + ((radeon_new_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT) + 1; u32 current_lane_width = - (radeon_current_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT; + ((radeon_current_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT) + 1; if (new_lane_width != current_lane_width) { radeon_set_pcie_lanes(rdev, new_lane_width); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 52fc0fdd3..9715c783b 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1331,7 +1331,7 @@ u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags) * of implement() working on 8 byte chunks */ - int len = hid_report_len(report) + 7; + u32 len = hid_report_len(report) + 7; return kmalloc(len, flags); } @@ -1396,7 +1396,7 @@ void __hid_request(struct hid_device *hid, struct hid_report *report, { char *buf; int ret; - int len; + u32 len; buf = hid_alloc_report_buf(report, GFP_KERNEL); if (!buf) @@ -1422,14 +1422,14 @@ void __hid_request(struct hid_device *hid, struct hid_report *report, } EXPORT_SYMBOL_GPL(__hid_request); -int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size, +int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size, int interrupt) { struct hid_report_enum *report_enum = hid->report_enum + type; struct hid_report *report; struct hid_driver *hdrv; unsigned int a; - int rsize, csize = size; + u32 rsize, csize = size; u8 *cdata = data; int ret = 0; @@ -1487,7 +1487,7 @@ EXPORT_SYMBOL_GPL(hid_report_raw_event); * * This is data entry for lower layers. */ -int hid_input_report(struct hid_device *hid, int type, u8 *data, int size, int interrupt) +int hid_input_report(struct hid_device *hid, int type, u8 *data, u32 size, int interrupt) { struct hid_report_enum *report_enum; struct hid_driver *hdrv; diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 53e54855c..8d74e691a 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1258,7 +1258,8 @@ static void hidinput_led_worker(struct work_struct *work) led_work); struct hid_field *field; struct hid_report *report; - int len, ret; + int ret; + u32 len; __u8 *buf; field = hidinput_get_led_field(hid); diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index f62a9d660..9de379c1b 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -314,7 +314,8 @@ static struct attribute_group mt_attribute_group = { static void mt_get_feature(struct hid_device *hdev, struct hid_report *report) { struct mt_device *td = hid_get_drvdata(hdev); - int ret, size = hid_report_len(report); + int ret; + u32 size = hid_report_len(report); u8 *buf; /* @@ -919,7 +920,7 @@ static void mt_set_input_mode(struct hid_device *hdev) struct hid_report_enum *re; struct mt_class *cls = &td->mtclass; char *buf; - int report_len; + u32 report_len; if (td->inputmode < 0) return; diff --git a/drivers/hid/hid-rmi.c b/drivers/hid/hid-rmi.c index 67cd059a8..41a4a2af9 100644 --- a/drivers/hid/hid-rmi.c +++ b/drivers/hid/hid-rmi.c @@ -110,8 +110,8 @@ struct rmi_data { u8 *writeReport; u8 *readReport; - int input_report_size; - int output_report_size; + u32 input_report_size; + u32 output_report_size; unsigned long flags; diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index 9ab1f5b6b..e10fe61c1 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -197,6 +197,11 @@ static ssize_t hidraw_get_report(struct file *file, char __user *buffer, size_t int ret = 0, len; unsigned char report_number; + if (!hidraw_table[minor] || !hidraw_table[minor]->exist) { + ret = -ENODEV; + goto out; + } + dev = hidraw_table[minor]->hid; if (!dev->ll_driver->raw_request) { diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 312aa1e33..4c3ed078c 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -137,10 +137,10 @@ struct i2c_hid { * register of the HID * descriptor. */ unsigned int bufsize; /* i2c buffer size */ - char *inbuf; /* Input buffer */ - char *rawbuf; /* Raw Input buffer */ - char *cmdbuf; /* Command buffer */ - char *argsbuf; /* Command arguments buffer */ + u8 *inbuf; /* Input buffer */ + u8 *rawbuf; /* Raw Input buffer */ + u8 *cmdbuf; /* Command buffer */ + u8 *argsbuf; /* Command arguments buffer */ unsigned long flags; /* device flags */ @@ -387,7 +387,8 @@ static int i2c_hid_hwreset(struct i2c_client *client) static void i2c_hid_get_input(struct i2c_hid *ihid) { - int ret, ret_size; + int ret; + u32 ret_size; int size = le16_to_cpu(ihid->hdesc.wMaxInputLength); if (size > ihid->bufsize) @@ -412,7 +413,7 @@ static void i2c_hid_get_input(struct i2c_hid *ihid) return; } - if (ret_size > size) { + if ((ret_size > size) || (ret_size <= 2)) { dev_err(&ihid->client->dev, "%s: incomplete report (%d/%d)\n", __func__, size, ret_size); return; diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index a629f7c13..ac63e5620 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -447,6 +447,7 @@ static int ina2xx_probe(struct i2c_client *client, /* set the device type */ data->config = &ina2xx_config[id->driver_data]; + mutex_init(&data->config_lock); if (of_property_read_u32(dev->of_node, "shunt-resistor", &val) < 0) { struct ina2xx_platform_data *pdata = dev_get_platdata(dev); @@ -473,8 +474,6 @@ static int ina2xx_probe(struct i2c_client *client, return -ENODEV; } - mutex_init(&data->config_lock); - data->groups[group++] = &ina2xx_group; if (id->driver_data == ina226) data->groups[group++] = &ina226_group; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 960fcb613..ea3bc9bb1 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1230,6 +1230,9 @@ static int ucma_set_ib_path(struct ucma_context *ctx, if (!optlen) return -EINVAL; + if (!ctx->cm_id->device) + return -EINVAL; + memset(&sa_path, 0, sizeof(sa_path)); ib_sa_unpack_path(path_data->path_rec, &sa_path); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 9a99cee26..4fd289261 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2581,9 +2581,11 @@ static int srp_abort(struct scsi_cmnd *scmnd) ret = FAST_IO_FAIL; else ret = FAILED; - srp_free_req(ch, req, scmnd, 0); - scmnd->result = DID_ABORT << 16; - scmnd->scsi_done(scmnd); + if (ret == SUCCESS) { + srp_free_req(ch, req, scmnd, 0); + scmnd->result = DID_ABORT << 16; + scmnd->scsi_done(scmnd); + } return ret; } @@ -3309,12 +3311,10 @@ static ssize_t srp_create_target(struct device *dev, num_online_nodes()); const int ch_end = ((node_idx + 1) * target->ch_count / num_online_nodes()); - const int cv_start = (node_idx * ibdev->num_comp_vectors / - num_online_nodes() + target->comp_vector) - % ibdev->num_comp_vectors; - const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors / - num_online_nodes() + target->comp_vector) - % ibdev->num_comp_vectors; + const int cv_start = node_idx * ibdev->num_comp_vectors / + num_online_nodes(); + const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors / + num_online_nodes(); int cpu_idx = 0; for_each_online_cpu(cpu) { diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index a7d516f97..10068a481 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -389,6 +389,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ pasid_max - 1, GFP_KERNEL); if (ret < 0) { kfree(svm); + kfree(sdev); goto out; } svm->pasid = ret; diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index e710493c0..9aa452555 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -88,7 +88,7 @@ static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) { - struct v4l2_clip __user *kclips = kp->clips; + struct v4l2_clip __user *kclips; struct v4l2_clip32 __user *uclips; u32 n = kp->clipcount; compat_caddr_t p; @@ -103,6 +103,8 @@ static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user if (!kp->clipcount) return 0; + if (get_user(kclips, &kp->clips)) + return -EFAULT; if (get_user(p, &up->clips)) return -EFAULT; uclips = compat_ptr(p); diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c index 76e8bce6f..ad572a0f2 100644 --- a/drivers/mmc/host/jz4740_mmc.c +++ b/drivers/mmc/host/jz4740_mmc.c @@ -368,9 +368,9 @@ static void jz4740_mmc_set_irq_enabled(struct jz4740_mmc_host *host, host->irq_mask &= ~irq; else host->irq_mask |= irq; - spin_unlock_irqrestore(&host->lock, flags); writew(host->irq_mask, host->base + JZ_REG_MMC_IMASK); + spin_unlock_irqrestore(&host->lock, flags); } static void jz4740_mmc_clock_enable(struct jz4740_mmc_host *host, diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index b2fb0528c..07ad86759 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -244,7 +244,7 @@ static int ubiblock_open(struct block_device *bdev, fmode_t mode) * in any case. */ if (mode & FMODE_WRITE) { - ret = -EPERM; + ret = -EROFS; goto out_unlock; } diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 27de04632..a2e6c7848 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -889,6 +889,17 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, return -EINVAL; } + /* + * Both UBI and UBIFS have been designed for SLC NAND and NOR flashes. + * MLC NAND is different and needs special care, otherwise UBI or UBIFS + * will die soon and you will lose all your data. + */ + if (mtd->type == MTD_MLCNANDFLASH) { + pr_err("ubi: refuse attaching mtd%d - MLC NAND is not supported\n", + mtd->index); + return -EINVAL; + } + if (ubi_num == UBI_DEV_NUM_AUTO) { /* Search for an empty slot in the @ubi_devices array */ for (ubi_num = 0; ubi_num < UBI_MAX_DEVICES; ubi_num++) diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c index 30d3999dd..ed62f1efe 100644 --- a/drivers/mtd/ubi/fastmap-wl.c +++ b/drivers/mtd/ubi/fastmap-wl.c @@ -360,7 +360,6 @@ static void ubi_fastmap_close(struct ubi_device *ubi) { int i; - flush_work(&ubi->fm_work); return_unused_pool_pebs(ubi, &ubi->fm_pool); return_unused_pool_pebs(ubi, &ubi->fm_wl_pool); diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index 27ed25252..cfd81eb1b 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -509,6 +509,10 @@ slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize) if(x < 0 || x > comp->rslot_limit) goto bad; + /* Check if the cstate is initialized */ + if (!comp->rstate[x].initialized) + goto bad; + comp->flags &=~ SLF_TOSS; comp->recv_current = x; } else { @@ -673,6 +677,7 @@ slhc_remember(struct slcompress *comp, unsigned char *icp, int isize) if (cs->cs_tcp.doff > 5) memcpy(cs->cs_tcpopt, icp + ihl*4 + sizeof(struct tcphdr), (cs->cs_tcp.doff - 5) * 4); cs->cs_hsize = ihl*2 + cs->cs_tcp.doff*2; + cs->initialized = true; /* Put headers back on packet * Neither header checksum is recalculated */ diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index f9343bee1..6578127db 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -704,6 +704,12 @@ static const struct usb_device_id products[] = { USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, +}, { + /* Cinterion AHS3 modem by GEMALTO */ + USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0055, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, }, { /* Telit modules */ USB_VENDOR_AND_INTERFACE_INFO(0x1bc7, USB_CLASS_COMM, diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index ebdee8f01..a6d429950 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -618,7 +618,8 @@ static int lan78xx_read_otp(struct lan78xx_net *dev, u32 offset, offset += 0x100; else ret = -EINVAL; - ret = lan78xx_read_raw_otp(dev, offset, length, data); + if (!ret) + ret = lan78xx_read_raw_otp(dev, offset, length, data); } return ret; diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c index b7f72f9c7..b3691712d 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c +++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c @@ -1454,6 +1454,7 @@ static int rtl8187_probe(struct usb_interface *intf, goto err_free_dev; } mutex_init(&priv->io_mutex); + mutex_init(&priv->conf_mutex); SET_IEEE80211_DEV(dev, &intf->dev); usb_set_intfdata(intf, dev); @@ -1627,7 +1628,6 @@ static int rtl8187_probe(struct usb_interface *intf, printk(KERN_ERR "rtl8187: Cannot register device\n"); goto err_free_dmabuf; } - mutex_init(&priv->conf_mutex); skb_queue_head_init(&priv->b_tx_status.queue); wiphy_info(dev->wiphy, "hwaddr %pM, %s V%d + %s, rfkill mask %d\n", diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 0b8d26559..fee4c01fb 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -2024,7 +2024,10 @@ static void netback_changed(struct xenbus_device *dev, case XenbusStateInitialised: case XenbusStateReconfiguring: case XenbusStateReconfigured: + break; + case XenbusStateUnknown: + wake_up_all(&module_unload_q); break; case XenbusStateInitWait: @@ -2155,7 +2158,9 @@ static int xennet_remove(struct xenbus_device *dev) xenbus_switch_state(dev, XenbusStateClosing); wait_event(module_unload_q, xenbus_read_driver_state(dev->otherend) == - XenbusStateClosing); + XenbusStateClosing || + xenbus_read_driver_state(dev->otherend) == + XenbusStateUnknown); xenbus_switch_state(dev, XenbusStateClosed); wait_event(module_unload_q, diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 0b3e0bfa7..572ca192c 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -587,6 +587,7 @@ static unsigned int get_slot_status(struct acpiphp_slot *slot) { unsigned long long sta = 0; struct acpiphp_func *func; + u32 dvid; list_for_each_entry(func, &slot->funcs, sibling) { if (func->flags & FUNC_HAS_STA) { @@ -597,19 +598,27 @@ static unsigned int get_slot_status(struct acpiphp_slot *slot) if (ACPI_SUCCESS(status) && sta) break; } else { - u32 dvid; - - pci_bus_read_config_dword(slot->bus, - PCI_DEVFN(slot->device, - func->function), - PCI_VENDOR_ID, &dvid); - if (dvid != 0xffffffff) { + if (pci_bus_read_dev_vendor_id(slot->bus, + PCI_DEVFN(slot->device, func->function), + &dvid, 0)) { sta = ACPI_STA_ALL; break; } } } + if (!sta) { + /* + * Check for the slot itself since it may be that the + * ACPI slot is a device below PCIe upstream port so in + * that case it may not even be reachable yet. + */ + if (pci_bus_read_dev_vendor_id(slot->bus, + PCI_DEVFN(slot->device, 0), &dvid, 0)) { + sta = ACPI_STA_ALL; + } + } + return (unsigned int)sta; } diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 4bb5262f7..742ca57ec 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -126,7 +126,7 @@ static inline int qdio_check_ccq(struct qdio_q *q, unsigned int ccq) static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state, int start, int count, int auto_ack) { - int rc, tmp_count = count, tmp_start = start, nr = q->nr, retried = 0; + int rc, tmp_count = count, tmp_start = start, nr = q->nr; unsigned int ccq = 0; qperf_inc(q, eqbs); @@ -149,14 +149,7 @@ static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state, qperf_inc(q, eqbs_partial); DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, "EQBS part:%02x", tmp_count); - /* - * Retry once, if that fails bail out and process the - * extracted buffers before trying again. - */ - if (!retried++) - goto again; - else - return count - tmp_count; + return count - tmp_count; } DBF_ERROR("%4x EQBS ERROR", SCH_NO(q)); @@ -212,7 +205,10 @@ static int qdio_do_sqbs(struct qdio_q *q, unsigned char state, int start, return 0; } -/* returns number of examined buffers and their common state in *state */ +/* + * Returns number of examined buffers and their common state in *state. + * Requested number of buffers-to-examine must be > 0. + */ static inline int get_buf_states(struct qdio_q *q, unsigned int bufnr, unsigned char *state, unsigned int count, int auto_ack, int merge_pending) @@ -223,17 +219,23 @@ static inline int get_buf_states(struct qdio_q *q, unsigned int bufnr, if (is_qebsm(q)) return qdio_do_eqbs(q, state, bufnr, count, auto_ack); - for (i = 0; i < count; i++) { - if (!__state) { - __state = q->slsb.val[bufnr]; - if (merge_pending && __state == SLSB_P_OUTPUT_PENDING) - __state = SLSB_P_OUTPUT_EMPTY; - } else if (merge_pending) { - if ((q->slsb.val[bufnr] & __state) != __state) - break; - } else if (q->slsb.val[bufnr] != __state) - break; + /* get initial state: */ + __state = q->slsb.val[bufnr]; + if (merge_pending && __state == SLSB_P_OUTPUT_PENDING) + __state = SLSB_P_OUTPUT_EMPTY; + + for (i = 1; i < count; i++) { bufnr = next_buf(bufnr); + + /* merge PENDING into EMPTY: */ + if (merge_pending && + q->slsb.val[bufnr] == SLSB_P_OUTPUT_PENDING && + __state == SLSB_P_OUTPUT_EMPTY) + continue; + + /* stop if next state differs from initial state: */ + if (q->slsb.val[bufnr] != __state) + break; } *state = __state; return i; diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig index eec76141d..dd32ece1b 100644 --- a/drivers/soc/qcom/Kconfig +++ b/drivers/soc/qcom/Kconfig @@ -49,3 +49,7 @@ config QCOM_SMD_RPM Say M here if you want to include support for the Qualcomm RPM as a module. This will build a module called "qcom-smd-rpm". + +config STATE_NOTIFIER + bool "State Notifier" + diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile index 10a93d168..6dd0c6cc7 100644 --- a/drivers/soc/qcom/Makefile +++ b/drivers/soc/qcom/Makefile @@ -3,3 +3,5 @@ obj-$(CONFIG_QCOM_PM) += spm.o obj-$(CONFIG_QCOM_SMD) += smd.o obj-$(CONFIG_QCOM_SMD_RPM) += smd-rpm.o obj-$(CONFIG_QCOM_SMEM) += smem.o + +obj-$(CONFIG_STATE_NOTIFIER) += state_notifier.o diff --git a/drivers/soc/qcom/state_notifier.c b/drivers/soc/qcom/state_notifier.c new file mode 100644 index 000000000..d975642f8 --- /dev/null +++ b/drivers/soc/qcom/state_notifier.c @@ -0,0 +1,133 @@ +/* + * State Notifier Driver + * + * Copyright (c) 2013-2017, Pranav Vashi + * (c) 2017, Joe Maples + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include + +#define DEFAULT_SUSPEND_DEFER_TIME 1 +#define STATE_NOTIFIER "state_notifier" + +/* + * debug = 1 will print all + */ +static unsigned int debug; +module_param_named(debug_mask, debug, uint, 0644); + +#define dprintk(msg...) \ +do { \ + if (debug) \ + pr_info(msg); \ +} while (0) + +static bool enabled = true; +module_param_named(enabled, enabled, bool, 0664); +static unsigned int suspend_defer_time = DEFAULT_SUSPEND_DEFER_TIME; +module_param_named(suspend_defer_time, suspend_defer_time, uint, 0664); +static struct delayed_work suspend_work; +static struct workqueue_struct *susp_wq; +struct work_struct resume_work; +bool state_suspended; +module_param_named(state_suspended, state_suspended, bool, 0444); +static bool suspend_in_progress; + +static BLOCKING_NOTIFIER_HEAD(state_notifier_list); + +/** + * state_register_client - register a client notifier + * @nb: notifier block to callback on events + */ +int state_register_client(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&state_notifier_list, nb); +} +EXPORT_SYMBOL(state_register_client); + +/** + * state_unregister_client - unregister a client notifier + * @nb: notifier block to callback on events + */ +int state_unregister_client(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&state_notifier_list, nb); +} +EXPORT_SYMBOL(state_unregister_client); + +/** + * state_notifier_call_chain - notify clients on state_events + * @val: Value passed unmodified to notifier function + * @v: pointer passed unmodified to notifier function + * + */ +int state_notifier_call_chain(unsigned long val, void *v) +{ + return blocking_notifier_call_chain(&state_notifier_list, val, v); +} +EXPORT_SYMBOL_GPL(state_notifier_call_chain); + +static void _suspend_work(struct work_struct *work) +{ + state_suspended = true; + state_notifier_call_chain(STATE_NOTIFIER_SUSPEND, NULL); + suspend_in_progress = false; + dprintk("%s: suspend completed.\n", STATE_NOTIFIER); +} + +static void _resume_work(struct work_struct *work) +{ + state_suspended = false; + state_notifier_call_chain(STATE_NOTIFIER_ACTIVE, NULL); + dprintk("%s: resume completed.\n", STATE_NOTIFIER); +} + +void state_suspend(void) +{ + dprintk("%s: suspend called.\n", STATE_NOTIFIER); + if (state_suspended || suspend_in_progress || !enabled) + return; + + suspend_in_progress = true; + + queue_delayed_work(susp_wq, &suspend_work, + msecs_to_jiffies(suspend_defer_time * 1000)); +} + +void state_resume(void) +{ + dprintk("%s: resume called.\n", STATE_NOTIFIER); + cancel_delayed_work_sync(&suspend_work); + suspend_in_progress = false; + + if (state_suspended) + queue_work(susp_wq, &resume_work); +} + +static int __init state_notifier_init(void) +{ + susp_wq = + alloc_workqueue("state_susp_wq", + WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, 0); + + if (!susp_wq) + pr_err("State Notifier failed to allocate suspend workqueue\n"); + + INIT_DELAYED_WORK(&suspend_work, _suspend_work); + INIT_WORK(&resume_work, _resume_work); + + return 0; +} + +subsys_initcall(state_notifier_init); + +MODULE_AUTHOR("Pranav Vashi "); +MODULE_DESCRIPTION("State Notifier Driver"); +MODULE_LICENSE("GPLv2"); diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index 30918edef..7fad58554 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -47,3 +47,4 @@ obj-$(CONFIG_FB_TFT) += fbtft/ obj-$(CONFIG_FSL_MC_BUS) += fsl-mc/ obj-$(CONFIG_WILC1000) += wilc1000/ obj-$(CONFIG_MOST) += most/ + diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c index c5547bd71..6a8300108 100644 --- a/drivers/thermal/imx_thermal.c +++ b/drivers/thermal/imx_thermal.c @@ -589,6 +589,9 @@ static int imx_thermal_probe(struct platform_device *pdev) regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN); regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP); + data->irq_enabled = true; + data->mode = THERMAL_DEVICE_ENABLED; + ret = devm_request_threaded_irq(&pdev->dev, data->irq, imx_thermal_alarm_irq, imx_thermal_alarm_irq_thread, 0, "imx_thermal", data); @@ -600,9 +603,6 @@ static int imx_thermal_probe(struct platform_device *pdev) return ret; } - data->irq_enabled = true; - data->mode = THERMAL_DEVICE_ENABLED; - return 0; } diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 20a41f7de..6713fd195 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -627,6 +627,7 @@ static const struct dev_pm_ops nhi_pm_ops = { * we just disable hotplug, the * pci-tunnels stay alive. */ + .thaw_noirq = nhi_resume_noirq, .restore_noirq = nhi_resume_noirq, }; diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 41dda25da..190e5dc15 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -2238,6 +2238,12 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, } if (tty_hung_up_p(file)) break; + /* + * Abort readers for ttys which never actually + * get hung up. See __tty_hangup(). + */ + if (test_bit(TTY_HUPPING, &tty->flags)) + break; if (!timeout) break; if (file->f_flags & O_NONBLOCK) { diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index a638c1738..89fd20382 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -702,6 +702,14 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session) return; } + /* + * Some console devices aren't actually hung up for technical and + * historical reasons, which can lead to indefinite interruptible + * sleep in n_tty_read(). The following explicitly tells + * n_tty_read() to abort readers. + */ + set_bit(TTY_HUPPING, &tty->flags); + /* inuse_filps is protected by the single tty lock, this really needs to change if we want to flush the workqueue with the lock held */ @@ -757,6 +765,7 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session) * can't yet guarantee all that. */ set_bit(TTY_HUPPED, &tty->flags); + clear_bit(TTY_HUPPING, &tty->flags); tty_unlock(tty); if (f) diff --git a/drivers/usb/core/generic.c b/drivers/usb/core/generic.c index a05431a69..f096c82d4 100644 --- a/drivers/usb/core/generic.c +++ b/drivers/usb/core/generic.c @@ -212,8 +212,13 @@ static int generic_suspend(struct usb_device *udev, pm_message_t msg) if (!udev->parent) rc = hcd_bus_suspend(udev, msg); - /* Non-root devices don't need to do anything for FREEZE or PRETHAW */ - else if (msg.event == PM_EVENT_FREEZE || msg.event == PM_EVENT_PRETHAW) + /* + * Non-root USB2 devices don't need to do anything for FREEZE + * or PRETHAW. USB3 devices don't support global suspend and + * needs to be selectively suspended. + */ + else if ((msg.event == PM_EVENT_FREEZE || msg.event == PM_EVENT_PRETHAW) + && (udev->speed < USB_SPEED_SUPER)) rc = 0; else rc = usb_port_suspend(udev, msg); diff --git a/drivers/usb/musb/musb_gadget_ep0.c b/drivers/usb/musb/musb_gadget_ep0.c index 10d30afe4..a0d141736 100644 --- a/drivers/usb/musb/musb_gadget_ep0.c +++ b/drivers/usb/musb/musb_gadget_ep0.c @@ -114,15 +114,19 @@ static int service_tx_status_request( } is_in = epnum & USB_DIR_IN; - if (is_in) { - epnum &= 0x0f; + epnum &= 0x0f; + if (epnum >= MUSB_C_NUM_EPS) { + handled = -EINVAL; + break; + } + + if (is_in) ep = &musb->endpoints[epnum].ep_in; - } else { + else ep = &musb->endpoints[epnum].ep_out; - } regs = musb->endpoints[epnum].regs; - if (epnum >= MUSB_C_NUM_EPS || !ep->desc) { + if (!ep->desc) { handled = -EINVAL; break; } diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index fe2b470d7..c55c632a3 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -752,6 +752,62 @@ static int __init init_pci_cap_pcix_perm(struct perm_bits *perm) return 0; } +static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos, + int count, struct perm_bits *perm, + int offset, __le32 val) +{ + __le16 *ctrl = (__le16 *)(vdev->vconfig + pos - + offset + PCI_EXP_DEVCTL); + int readrq = le16_to_cpu(*ctrl) & PCI_EXP_DEVCTL_READRQ; + + count = vfio_default_config_write(vdev, pos, count, perm, offset, val); + if (count < 0) + return count; + + /* + * The FLR bit is virtualized, if set and the device supports PCIe + * FLR, issue a reset_function. Regardless, clear the bit, the spec + * requires it to be always read as zero. NB, reset_function might + * not use a PCIe FLR, we don't have that level of granularity. + */ + if (*ctrl & cpu_to_le16(PCI_EXP_DEVCTL_BCR_FLR)) { + u32 cap; + int ret; + + *ctrl &= ~cpu_to_le16(PCI_EXP_DEVCTL_BCR_FLR); + + ret = pci_user_read_config_dword(vdev->pdev, + pos - offset + PCI_EXP_DEVCAP, + &cap); + + if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) + pci_try_reset_function(vdev->pdev); + } + + /* + * MPS is virtualized to the user, writes do not change the physical + * register since determining a proper MPS value requires a system wide + * device view. The MRRS is largely independent of MPS, but since the + * user does not have that system-wide view, they might set a safe, but + * inefficiently low value. Here we allow writes through to hardware, + * but we set the floor to the physical device MPS setting, so that + * we can at least use full TLPs, as defined by the MPS value. + * + * NB, if any devices actually depend on an artificially low MRRS + * setting, this will need to be revisited, perhaps with a quirk + * though pcie_set_readrq(). + */ + if (readrq != (le16_to_cpu(*ctrl) & PCI_EXP_DEVCTL_READRQ)) { + readrq = 128 << + ((le16_to_cpu(*ctrl) & PCI_EXP_DEVCTL_READRQ) >> 12); + readrq = max(readrq, pcie_get_mps(vdev->pdev)); + + pcie_set_readrq(vdev->pdev, readrq); + } + + return count; +} + /* Permissions for PCI Express capability */ static int __init init_pci_cap_exp_perm(struct perm_bits *perm) { @@ -759,26 +815,67 @@ static int __init init_pci_cap_exp_perm(struct perm_bits *perm) if (alloc_perm_bits(perm, PCI_CAP_EXP_ENDPOINT_SIZEOF_V2)) return -ENOMEM; + perm->writefn = vfio_exp_config_write; + p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); /* - * Allow writes to device control fields (includes FLR!) - * but not to devctl_phantom which could confuse IOMMU - * or to the ARI bit in devctl2 which is set at probe time + * Allow writes to device control fields, except devctl_phantom, + * which could confuse IOMMU, MPS, which can break communication + * with other physical devices, and the ARI bit in devctl2, which + * is set at probe time. FLR and MRRS get virtualized via our + * writefn. */ - p_setw(perm, PCI_EXP_DEVCTL, NO_VIRT, ~PCI_EXP_DEVCTL_PHANTOM); + p_setw(perm, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD | + PCI_EXP_DEVCTL_READRQ, ~PCI_EXP_DEVCTL_PHANTOM); p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI); return 0; } +static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos, + int count, struct perm_bits *perm, + int offset, __le32 val) +{ + u8 *ctrl = vdev->vconfig + pos - offset + PCI_AF_CTRL; + + count = vfio_default_config_write(vdev, pos, count, perm, offset, val); + if (count < 0) + return count; + + /* + * The FLR bit is virtualized, if set and the device supports AF + * FLR, issue a reset_function. Regardless, clear the bit, the spec + * requires it to be always read as zero. NB, reset_function might + * not use an AF FLR, we don't have that level of granularity. + */ + if (*ctrl & PCI_AF_CTRL_FLR) { + u8 cap; + int ret; + + *ctrl &= ~PCI_AF_CTRL_FLR; + + ret = pci_user_read_config_byte(vdev->pdev, + pos - offset + PCI_AF_CAP, + &cap); + + if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) + pci_try_reset_function(vdev->pdev); + } + + return count; +} + /* Permissions for Advanced Function capability */ static int __init init_pci_cap_af_perm(struct perm_bits *perm) { if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_AF])) return -ENOMEM; + perm->writefn = vfio_af_config_write; + p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); - p_setb(perm, PCI_AF_CTRL, NO_VIRT, PCI_AF_CTRL_FLR); + p_setb(perm, PCI_AF_CTRL, PCI_AF_CTRL_FLR, PCI_AF_CTRL_FLR); return 0; } diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index 016bd9355..aa93df583 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -450,7 +450,7 @@ static bool watchdog_is_running(void) is_running = (superio_inb(watchdog.sioaddr, SIO_REG_ENABLE) & BIT(0)) && (superio_inb(watchdog.sioaddr, F71808FG_REG_WDT_CONF) - & F71808FG_FLAG_WD_EN); + & BIT(F71808FG_FLAG_WD_EN)); superio_exit(watchdog.sioaddr); diff --git a/fs/aio.c b/fs/aio.c index 88ede4a84..f77b87a64 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1596,7 +1596,6 @@ long do_io_submit(aio_context_t ctx_id, long nr, struct kioctx *ctx; long ret = 0; int i = 0; - struct blk_plug plug; if (unlikely(nr < 0)) return -EINVAL; @@ -1613,8 +1612,6 @@ long do_io_submit(aio_context_t ctx_id, long nr, return -EINVAL; } - blk_start_plug(&plug); - /* * AKPM: should this return a partial result if some of the IOs were * successfully submitted? @@ -1637,7 +1634,6 @@ long do_io_submit(aio_context_t ctx_id, long nr, if (ret) break; } - blk_finish_plug(&plug); percpu_ref_put(&ctx->users); return i ? i : ret; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 7a54c6a86..500098cdb 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -746,7 +746,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t m autofs4_del_active(dentry); - inode = autofs4_get_inode(dir->i_sb, S_IFDIR | 0555); + inode = autofs4_get_inode(dir->i_sb, S_IFDIR | mode); if (!inode) return -ENOMEM; d_add(dentry, inode); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 8a0243efd..991acb78f 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -147,6 +147,25 @@ static int padzero(unsigned long elf_bss) #define ELF_BASE_PLATFORM NULL #endif +/* + * Use get_random_int() to implement AT_RANDOM while avoiding depletion + * of the entropy pool. + */ +static void get_atrandom_bytes(unsigned char *buf, size_t nbytes) +{ + unsigned char *p = buf; + + while (nbytes) { + unsigned int random_variable; + size_t chunk = min(nbytes, sizeof(random_variable)); + + random_variable = get_random_int(); + memcpy(p, &random_variable, chunk); + p += chunk; + nbytes -= chunk; + } +} + static int create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, unsigned long load_addr, unsigned long interp_load_addr) @@ -208,7 +227,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, /* * Generate 16 random bytes for userspace PRNG seeding. */ - get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes)); + get_atrandom_bytes(k_rand_bytes, sizeof(k_rand_bytes)); u_rand_bytes = (elf_addr_t __user *) STACK_ALLOC(p, sizeof(k_rand_bytes)); if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes))) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index f97110461..78c51ce91 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -242,8 +242,6 @@ static int ext4_init_block_bitmap(struct super_block *sb, */ ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group), sb->s_blocksize * 8, bh->b_data); - ext4_block_bitmap_csum_set(sb, block_group, gdp, bh); - ext4_group_desc_csum_set(sb, block_group, gdp); return 0; } @@ -447,6 +445,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) err = ext4_init_block_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); + set_buffer_verified(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); if (err) { diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 5388207d2..e10c12f59 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -63,44 +63,6 @@ void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); } -/* Initializes an uninitialized inode bitmap */ -static int ext4_init_inode_bitmap(struct super_block *sb, - struct buffer_head *bh, - ext4_group_t block_group, - struct ext4_group_desc *gdp) -{ - struct ext4_group_info *grp; - struct ext4_sb_info *sbi = EXT4_SB(sb); - J_ASSERT_BH(bh, buffer_locked(bh)); - - /* If checksum is bad mark all blocks and inodes use to prevent - * allocation, essentially implementing a per-group read-only flag. */ - if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { - grp = ext4_get_group_info(sb, block_group); - if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) - percpu_counter_sub(&sbi->s_freeclusters_counter, - grp->bb_free); - set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); - if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { - int count; - count = ext4_free_inodes_count(sb, gdp); - percpu_counter_sub(&sbi->s_freeinodes_counter, - count); - } - set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); - return -EFSBADCRC; - } - - memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); - ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, - bh->b_data); - ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh, - EXT4_INODES_PER_GROUP(sb) / 8); - ext4_group_desc_csum_set(sb, block_group, gdp); - - return 0; -} - void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate) { if (uptodate) { @@ -184,17 +146,14 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) ext4_lock_group(sb, block_group); if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { - err = ext4_init_inode_bitmap(sb, bh, block_group, desc); + memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); + ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), + sb->s_blocksize * 8, bh->b_data); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); set_buffer_verified(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); - if (err) { - ext4_error(sb, "Failed to init inode bitmap for group " - "%u: %d", block_group, err); - goto out; - } return bh; } ext4_unlock_group(sb, block_group); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 84da8fd0a..ae003b453 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -377,7 +377,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode, static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, unsigned int len) { - int ret, size; + int ret, size, no_expand; struct ext4_inode_info *ei = EXT4_I(inode); if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) @@ -387,15 +387,14 @@ static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, if (size < len) return -ENOSPC; - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); if (ei->i_inline_off) ret = ext4_update_inline_data(handle, inode, len); else ret = ext4_create_inline_data(handle, inode, len); - up_write(&EXT4_I(inode)->xattr_sem); - + ext4_write_unlock_xattr(inode, &no_expand); return ret; } @@ -529,7 +528,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping, struct inode *inode, unsigned flags) { - int ret, needed_blocks; + int ret, needed_blocks, no_expand; handle_t *handle = NULL; int retries = 0, sem_held = 0; struct page *page = NULL; @@ -569,7 +568,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping, goto out; } - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); sem_held = 1; /* If some one has already done this for us, just exit. */ if (!ext4_has_inline_data(inode)) { @@ -605,7 +604,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping, page_cache_release(page); page = NULL; ext4_orphan_add(handle, inode); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); sem_held = 0; ext4_journal_stop(handle); handle = NULL; @@ -631,7 +630,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping, page_cache_release(page); } if (sem_held) - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); if (handle) ext4_journal_stop(handle); brelse(iloc.bh); @@ -724,7 +723,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping, int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, struct page *page) { - int ret; + int ret, no_expand; void *kaddr; struct ext4_iloc iloc; @@ -742,7 +741,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, goto out; } - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); BUG_ON(!ext4_has_inline_data(inode)); kaddr = kmap_atomic(page); @@ -752,7 +751,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, /* clear page dirty so that writepages wouldn't work for us. */ ClearPageDirty(page); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); brelse(iloc.bh); out: return copied; @@ -763,7 +762,7 @@ ext4_journalled_write_inline_data(struct inode *inode, unsigned len, struct page *page) { - int ret; + int ret, no_expand; void *kaddr; struct ext4_iloc iloc; @@ -773,11 +772,11 @@ ext4_journalled_write_inline_data(struct inode *inode, return NULL; } - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); kaddr = kmap_atomic(page); ext4_write_inline_data(inode, &iloc, kaddr, 0, len); kunmap_atomic(kaddr); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return iloc.bh; } @@ -1261,7 +1260,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle, int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, struct dentry *dentry, struct inode *inode) { - int ret, inline_size; + int ret, inline_size, no_expand; void *inline_start; struct ext4_iloc iloc; struct inode *dir = d_inode(dentry->d_parent); @@ -1270,7 +1269,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, if (ret) return ret; - down_write(&EXT4_I(dir)->xattr_sem); + ext4_write_lock_xattr(dir, &no_expand); if (!ext4_has_inline_data(dir)) goto out; @@ -1316,7 +1315,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, out: ext4_mark_inode_dirty(handle, dir); - up_write(&EXT4_I(dir)->xattr_sem); + ext4_write_unlock_xattr(dir, &no_expand); brelse(iloc.bh); return ret; } @@ -1676,7 +1675,7 @@ int ext4_delete_inline_entry(handle_t *handle, struct buffer_head *bh, int *has_inline_data) { - int err, inline_size; + int err, inline_size, no_expand; struct ext4_iloc iloc; void *inline_start; @@ -1684,7 +1683,7 @@ int ext4_delete_inline_entry(handle_t *handle, if (err) return err; - down_write(&EXT4_I(dir)->xattr_sem); + ext4_write_lock_xattr(dir, &no_expand); if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; goto out; @@ -1719,7 +1718,7 @@ int ext4_delete_inline_entry(handle_t *handle, ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size); out: - up_write(&EXT4_I(dir)->xattr_sem); + ext4_write_unlock_xattr(dir, &no_expand); brelse(iloc.bh); if (err != -ENOENT) ext4_std_error(dir->i_sb, err); @@ -1818,11 +1817,11 @@ int empty_inline_dir(struct inode *dir, int *has_inline_data) int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) { - int ret; + int ret, no_expand; - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); ret = ext4_destroy_inline_data_nolock(handle, inode); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return ret; } @@ -1907,7 +1906,7 @@ int ext4_try_to_evict_inline_data(handle_t *handle, void ext4_inline_data_truncate(struct inode *inode, int *has_inline) { handle_t *handle; - int inline_size, value_len, needed_blocks; + int inline_size, value_len, needed_blocks, no_expand; size_t i_size; void *value = NULL; struct ext4_xattr_ibody_find is = { @@ -1924,7 +1923,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline) if (IS_ERR(handle)) return; - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); if (!ext4_has_inline_data(inode)) { *has_inline = 0; ext4_journal_stop(handle); @@ -1982,7 +1981,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline) up_write(&EXT4_I(inode)->i_data_sem); out: brelse(is.iloc.bh); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); kfree(value); if (inode->i_nlink) ext4_orphan_del(handle, inode); @@ -1998,7 +1997,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline) int ext4_convert_inline_data(struct inode *inode) { - int error, needed_blocks; + int error, needed_blocks, no_expand; handle_t *handle; struct ext4_iloc iloc; @@ -2020,15 +2019,10 @@ int ext4_convert_inline_data(struct inode *inode) goto out_free; } - down_write(&EXT4_I(inode)->xattr_sem); - if (!ext4_has_inline_data(inode)) { - up_write(&EXT4_I(inode)->xattr_sem); - goto out; - } - - error = ext4_convert_inline_data_nolock(handle, inode, &iloc); - up_write(&EXT4_I(inode)->xattr_sem); -out: + ext4_write_lock_xattr(inode, &no_expand); + if (ext4_has_inline_data(inode)) + error = ext4_convert_inline_data_nolock(handle, inode, &iloc); + ext4_write_unlock_xattr(inode, &no_expand); ext4_journal_stop(handle); out_free: brelse(iloc.bh); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d6d4235a4..154f7ed65 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1526,6 +1526,8 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd, BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); if (invalidate) { + if (page_mapped(page)) + clear_page_dirty_for_io(page); block_invalidatepage(page, 0, PAGE_CACHE_SIZE); ClearPageUptodate(page); } @@ -3279,29 +3281,29 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, * case, we allocate an io_end structure to hook to the iocb. */ iocb->private = NULL; - ext4_inode_aio_set(inode, NULL); - if (!is_sync_kiocb(iocb)) { - io_end = ext4_init_io_end(inode, GFP_NOFS); - if (!io_end) { - ret = -ENOMEM; - goto retake_lock; - } - /* - * Grab reference for DIO. Will be dropped in ext4_end_io_dio() - */ - iocb->private = ext4_get_io_end(io_end); - /* - * we save the io structure for current async direct - * IO, so that later ext4_map_blocks() could flag the - * io structure whether there is a unwritten extents - * needs to be converted when IO is completed. - */ - ext4_inode_aio_set(inode, io_end); - } - if (overwrite) { get_block_func = ext4_get_block_write_nolock; } else { + ext4_inode_aio_set(inode, NULL); + if (!is_sync_kiocb(iocb)) { + io_end = ext4_init_io_end(inode, GFP_NOFS); + if (!io_end) { + ret = -ENOMEM; + goto retake_lock; + } + /* + * Grab reference for DIO. Will be dropped in + * ext4_end_io_dio() + */ + iocb->private = ext4_get_io_end(io_end); + /* + * we save the io structure for current async direct + * IO, so that later ext4_map_blocks() could flag the + * io structure whether there is a unwritten extents + * needs to be converted when IO is completed. + */ + ext4_inode_aio_set(inode, io_end); + } get_block_func = ext4_get_block_write; dio_flags = DIO_LOCKING; } @@ -4273,6 +4275,12 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) goto bad_inode; raw_inode = ext4_raw_inode(&iloc); + if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) { + EXT4_ERROR_INODE(inode, "root inode unallocated"); + ret = -EFSCORRUPTED; + goto bad_inode; + } + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > diff --git a/fs/ext4/super.c b/fs/ext4/super.c index bc79e2ca4..8cff133ff 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2130,6 +2130,8 @@ static int ext4_check_descriptors(struct super_block *sb, ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Block bitmap for group %u overlaps " "superblock", i); + if (!(sb->s_flags & MS_RDONLY)) + return 0; } if (block_bitmap < first_block || block_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " @@ -2142,6 +2144,8 @@ static int ext4_check_descriptors(struct super_block *sb, ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode bitmap for group %u overlaps " "superblock", i); + if (!(sb->s_flags & MS_RDONLY)) + return 0; } if (inode_bitmap < first_block || inode_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " @@ -2154,6 +2158,8 @@ static int ext4_check_descriptors(struct super_block *sb, ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode table for group %u overlaps " "superblock", i); + if (!(sb->s_flags & MS_RDONLY)) + return 0; } if (inode_table < first_block || inode_table + sbi->s_itb_per_group - 1 > last_block) { diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index c356b4954..b16bfb52e 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1143,16 +1143,14 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, struct ext4_xattr_block_find bs = { .s = { .not_found = -ENODATA, }, }; - unsigned long no_expand; + int no_expand; int error; if (!name) return -EINVAL; if (strlen(name) > 255) return -ERANGE; - down_write(&EXT4_I(inode)->xattr_sem); - no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); - ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); + ext4_write_lock_xattr(inode, &no_expand); error = ext4_reserve_inode_write(handle, inode, &is.iloc); if (error) @@ -1213,7 +1211,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, ext4_xattr_update_super_block(handle, inode->i_sb); inode->i_ctime = ext4_current_time(inode); if (!value) - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); + no_expand = 0; error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); /* * The bh is consumed by ext4_mark_iloc_dirty, even with @@ -1227,9 +1225,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, cleanup: brelse(is.iloc.bh); brelse(bs.bh); - if (no_expand == 0) - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return error; } @@ -1313,12 +1309,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, int error = 0, tried_min_extra_isize = 0; int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); int isize_diff; /* How much do we need to grow i_extra_isize */ + int no_expand; + + if (ext4_write_trylock_xattr(inode, &no_expand) == 0) + return 0; - down_write(&EXT4_I(inode)->xattr_sem); - /* - * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty - */ - ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); retry: isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize; if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) @@ -1512,8 +1507,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, } brelse(bh); out: - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return 0; cleanup: @@ -1525,10 +1519,10 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, kfree(bs); brelse(bh); /* - * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode - * size expansion failed. + * Inode size expansion failed; don't try again */ - up_write(&EXT4_I(inode)->xattr_sem); + no_expand = 1; + ext4_write_unlock_xattr(inode, &no_expand); return error; } diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 10b0f7323..cdc413476 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -101,6 +101,38 @@ extern const struct xattr_handler ext4_xattr_security_handler; #define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c" +/* + * The EXT4_STATE_NO_EXPAND is overloaded and used for two purposes. + * The first is to signal that there the inline xattrs and data are + * taking up so much space that we might as well not keep trying to + * expand it. The second is that xattr_sem is taken for writing, so + * we shouldn't try to recurse into the inode expansion. For this + * second case, we need to make sure that we take save and restore the + * NO_EXPAND state flag appropriately. + */ +static inline void ext4_write_lock_xattr(struct inode *inode, int *save) +{ + down_write(&EXT4_I(inode)->xattr_sem); + *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); +} + +static inline int ext4_write_trylock_xattr(struct inode *inode, int *save) +{ + if (down_write_trylock(&EXT4_I(inode)->xattr_sem) == 0) + return 0; + *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); + return 1; +} + +static inline void ext4_write_unlock_xattr(struct inode *inode, int *save) +{ + if (*save == 0) + ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); + up_write(&EXT4_I(inode)->xattr_sem); +} + extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ddba99214..85afd26ca 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -748,11 +748,12 @@ int inode_congested(struct inode *inode, int cong_bits) */ if (inode && inode_to_wb_is_valid(inode)) { struct bdi_writeback *wb; - bool locked, congested; + struct wb_lock_cookie lock_cookie = {}; + bool congested; - wb = unlocked_inode_to_wb_begin(inode, &locked); + wb = unlocked_inode_to_wb_begin(inode, &lock_cookie); congested = wb_congested(wb, cong_bits); - unlocked_inode_to_wb_end(inode, locked); + unlocked_inode_to_wb_end(inode, &lock_cookie); return congested; } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 624a57a9c..4759df4eb 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -914,7 +914,7 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) } /* - * This is a variaon of __jbd2_update_log_tail which checks for validity of + * This is a variation of __jbd2_update_log_tail which checks for validity of * provided log tail and locks j_checkpoint_mutex. So it is safe against races * with other threads updating log tail. */ @@ -1384,6 +1384,9 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, journal_superblock_t *sb = journal->j_superblock; int ret; + if (is_journal_aborted(journal)) + return -EIO; + BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", tail_block, tail_tid); diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index d86c5e317..600da1a4d 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -345,7 +345,7 @@ static void jffs2_put_super (struct super_block *sb) static void jffs2_kill_sb(struct super_block *sb) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - if (!(sb->s_flags & MS_RDONLY)) + if (c && !(sb->s_flags & MS_RDONLY)) jffs2_stop_garbage_collect_thread(c); kill_mtd_super(sb); kfree(c); diff --git a/fs/namei.c b/fs/namei.c index 0fcad42e4..de57dd59d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -219,9 +219,10 @@ getname_kernel(const char * filename) if (len <= EMBEDDED_NAME_MAX) { result->name = (char *)result->iname; } else if (len <= PATH_MAX) { + const size_t size = offsetof(struct filename, iname[1]); struct filename *tmp; - tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); + tmp = kmalloc(size, GFP_KERNEL); if (unlikely(!tmp)) { __putname(result); return ERR_PTR(-ENOMEM); diff --git a/fs/namespace.c b/fs/namespace.c index 38e42eb4b..0189f3f5d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1035,7 +1035,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, goto out_free; } - mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED); + mnt->mnt.mnt_flags = old->mnt.mnt_flags; + mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL); /* Don't allow unprivileged users to change mount flags */ if (flag & CL_UNPRIVILEGED) { mnt->mnt.mnt_flags |= MNT_LOCK_ATIME; diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index e0e5f7c3c..8a459b179 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -92,7 +92,7 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, u32 event_mask, void *data, int data_type) { - __u32 marks_mask, marks_ignored_mask; + __u32 marks_mask = 0, marks_ignored_mask = 0; struct path *path = data; pr_debug("%s: inode_mark=%p vfsmnt_mark=%p mask=%x data=%p" @@ -108,24 +108,20 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, !d_can_lookup(path->dentry)) return false; - if (inode_mark && vfsmnt_mark) { - marks_mask = (vfsmnt_mark->mask | inode_mark->mask); - marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask); - } else if (inode_mark) { - /* - * if the event is for a child and this inode doesn't care about - * events on the child, don't send it! - */ - if ((event_mask & FS_EVENT_ON_CHILD) && - !(inode_mark->mask & FS_EVENT_ON_CHILD)) - return false; - marks_mask = inode_mark->mask; - marks_ignored_mask = inode_mark->ignored_mask; - } else if (vfsmnt_mark) { - marks_mask = vfsmnt_mark->mask; - marks_ignored_mask = vfsmnt_mark->ignored_mask; - } else { - BUG(); + /* + * if the event is for a child and this inode doesn't care about + * events on the child, don't send it! + */ + if (inode_mark && + (!(event_mask & FS_EVENT_ON_CHILD) || + (inode_mark->mask & FS_EVENT_ON_CHILD))) { + marks_mask |= inode_mark->mask; + marks_ignored_mask |= inode_mark->ignored_mask; + } + + if (vfsmnt_mark) { + marks_mask |= vfsmnt_mark->mask; + marks_ignored_mask |= vfsmnt_mark->ignored_mask; } if (d_is_dir(path->dentry) && diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index a72097b62..00985f9db 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2643,7 +2643,7 @@ static int journal_init_dev(struct super_block *super, if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; - reiserfs_warning(super, + reiserfs_warning(super, "sh-457", "journal_init_dev: Cannot open '%s': %i", jdev_name, result); return result; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 1fd90c079..0bb6de356 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1728,8 +1728,11 @@ static void ubifs_remount_ro(struct ubifs_info *c) dbg_save_space_info(c); - for (i = 0; i < c->jhead_cnt; i++) - ubifs_wbuf_sync(&c->jheads[i].wbuf); + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + ubifs_ro_mode(c, err); + } c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); @@ -1795,8 +1798,11 @@ static void ubifs_put_super(struct super_block *sb) int err; /* Synchronize write-buffers */ - for (i = 0; i < c->jhead_cnt; i++) - ubifs_wbuf_sync(&c->jheads[i].wbuf); + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + ubifs_ro_mode(c, err); + } /* * We are being cleanly unmounted which means the diff --git a/include/asm-generic/param.h b/include/asm-generic/param.h index 04e715bcc..3205b4263 100644 --- a/include/asm-generic/param.h +++ b/include/asm-generic/param.h @@ -5,6 +5,6 @@ # undef HZ # define HZ CONFIG_HZ /* Internal kernel timer frequency */ -# define USER_HZ 100 /* some user interfaces are */ +# define USER_HZ CONFIG_HZ /* some user interfaces are */ # define CLOCKS_PER_SEC (USER_HZ) /* in "ticks" like times() */ #endif /* __ASM_GENERIC_PARAM_H */ diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 5d5b98d32..cfb03abba 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -197,6 +197,11 @@ static inline void set_bdi_congested(struct backing_dev_info *bdi, int sync) set_wb_congested(bdi->wb.congested, sync); } +struct wb_lock_cookie { + bool locked; + unsigned long flags; +}; + #ifdef CONFIG_CGROUP_WRITEBACK /** diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 090356561..38f140216 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -374,7 +374,7 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode) /** * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction * @inode: target inode - * @lockedp: temp bool output param, to be passed to the end function + * @cookie: output param, to be passed to the end function * * The caller wants to access the wb associated with @inode but isn't * holding inode->i_lock, mapping->tree_lock or wb->list_lock. This @@ -382,12 +382,12 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode) * association doesn't change until the transaction is finished with * unlocked_inode_to_wb_end(). * - * The caller must call unlocked_inode_to_wb_end() with *@lockdep - * afterwards and can't sleep during transaction. IRQ may or may not be - * disabled on return. + * The caller must call unlocked_inode_to_wb_end() with *@cookie afterwards and + * can't sleep during the transaction. IRQs may or may not be disabled on + * return. */ static inline struct bdi_writeback * -unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) +unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie) { rcu_read_lock(); @@ -395,10 +395,10 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) * Paired with store_release in inode_switch_wb_work_fn() and * ensures that we see the new wb if we see cleared I_WB_SWITCH. */ - *lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH; + cookie->locked = smp_load_acquire(&inode->i_state) & I_WB_SWITCH; - if (unlikely(*lockedp)) - spin_lock_irq(&inode->i_mapping->tree_lock); + if (unlikely(cookie->locked)) + spin_lock_irqsave(&inode->i_mapping->tree_lock, cookie->flags); /* * Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock. @@ -410,12 +410,14 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) /** * unlocked_inode_to_wb_end - end inode wb access transaction * @inode: target inode - * @locked: *@lockedp from unlocked_inode_to_wb_begin() + * @cookie: @cookie from unlocked_inode_to_wb_begin() */ -static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) +static inline void unlocked_inode_to_wb_end(struct inode *inode, + struct wb_lock_cookie *cookie) { - if (unlikely(locked)) - spin_unlock_irq(&inode->i_mapping->tree_lock); + if (unlikely(cookie->locked)) + spin_unlock_irqrestore(&inode->i_mapping->tree_lock, + cookie->flags); rcu_read_unlock(); } @@ -462,12 +464,13 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode) } static inline struct bdi_writeback * -unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) +unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie) { return inode_to_wb(inode); } -static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) +static inline void unlocked_inode_to_wb_end(struct inode *inode, + struct wb_lock_cookie *cookie) { } diff --git a/include/linux/hid.h b/include/linux/hid.h index 698f1fc8b..7127afa03 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -796,7 +796,7 @@ extern int hidinput_connect(struct hid_device *hid, unsigned int force); extern void hidinput_disconnect(struct hid_device *); int hid_set_field(struct hid_field *, unsigned, __s32); -int hid_input_report(struct hid_device *, int type, u8 *, int, int); +int hid_input_report(struct hid_device *, int type, u8 *, u32, int); int hidinput_find_field(struct hid_device *hid, unsigned int type, unsigned int code, struct hid_field **field); struct hid_field *hidinput_get_led_field(struct hid_device *hid); unsigned int hidinput_count_leds(struct hid_device *hid); @@ -1101,13 +1101,13 @@ static inline void hid_hw_wait(struct hid_device *hdev) * * @report: the report we want to know the length */ -static inline int hid_report_len(struct hid_report *report) +static inline u32 hid_report_len(struct hid_report *report) { /* equivalent to DIV_ROUND_UP(report->size, 8) + !!(report->id > 0) */ return ((report->size - 1) >> 3) + 1 + (report->id > 0); } -int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size, +int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size, int interrupt); /* HID quirks API */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 355013f7b..6d6f83dda 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -236,10 +236,14 @@ extern pgprot_t protection_map[16]; * ->fault function. The vma's ->fault is responsible for returning a bitmask * of VM_FAULT_xxx flags that give details about how the fault was handled. * + * MM layer fills up gfp_mask for page allocations but fault handler might + * alter it if its implementation requires a different allocation context. + * * pgoff should be used in favour of virtual_address, if possible. */ struct vm_fault { unsigned int flags; /* FAULT_FLAG_xxx flags */ + gfp_t gfp_mask; /* gfp mask to be used for allocations */ pgoff_t pgoff; /* Logical page offset based on vma */ void __user *virtual_address; /* Faulting virtual address */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 2f4953c80..69f366621 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -233,9 +233,10 @@ extern void proc_sched_set_task(struct task_struct *p); #define TASK_WAKING 256 #define TASK_PARKED 512 #define TASK_NOLOAD 1024 -#define TASK_STATE_MAX 2048 +#define TASK_NEW 2048 +#define TASK_STATE_MAX 4096 -#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN" +#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn" extern char ___assert_task_state[1 - 2*!!( sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 0a2535647..5e566733c 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -35,10 +35,6 @@ enum { sysctl_hung_task_timeout_secs = 0 }; extern int sysctl_max_map_count; -#ifdef CONFIG_BOOST_KILL -extern unsigned int sysctl_boost_killing; -#endif - #ifdef CONFIG_HW_VIP_THREAD #include #endif diff --git a/include/linux/state_notifier.h b/include/linux/state_notifier.h new file mode 100644 index 000000000..ffb4fba75 --- /dev/null +++ b/include/linux/state_notifier.h @@ -0,0 +1,20 @@ +#ifndef __LINUX_STATE_NOTIFIER_H +#define __LINUX_STATE_NOTIFIER_H + +#include + +#define STATE_NOTIFIER_ACTIVE 0x01 +#define STATE_NOTIFIER_SUSPEND 0x02 + +struct state_event { + void *data; +}; + +extern bool state_suspended; +extern void state_suspend(void); +extern void state_resume(void); +int state_register_client(struct notifier_block *nb); +int state_unregister_client(struct notifier_block *nb); +int state_notifier_call_chain(unsigned long val, void *v); + +#endif /* _LINUX_STATE_NOTIFIER_H */ diff --git a/include/linux/tty.h b/include/linux/tty.h index a1042afff..d67ceb3f5 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -342,6 +342,7 @@ struct tty_file_private { #define TTY_PTY_LOCK 16 /* pty private */ #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ #define TTY_HUPPED 18 /* Post driver->hangup() */ +#define TTY_HUPPING 19 /* Hangup in progress */ #define TTY_LDISC_HALTED 22 /* Line discipline is halted */ #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty)) diff --git a/include/net/slhc_vj.h b/include/net/slhc_vj.h index 8716d5942..8fcf8908a 100644 --- a/include/net/slhc_vj.h +++ b/include/net/slhc_vj.h @@ -127,6 +127,7 @@ typedef __u32 int32; */ struct cstate { byte_t cs_this; /* connection id number (xmit) */ + bool initialized; /* true if initialized */ struct cstate *next; /* next in ring (xmit) */ struct iphdr cs_ip; /* ip/tcp hdr from most recent packet */ struct tcphdr cs_tcp; diff --git a/include/sound/pcm_oss.h b/include/sound/pcm_oss.h index 760c969d8..12bbf8c81 100644 --- a/include/sound/pcm_oss.h +++ b/include/sound/pcm_oss.h @@ -57,6 +57,7 @@ struct snd_pcm_oss_runtime { char *buffer; /* vmallocated period */ size_t buffer_used; /* used length from period buffer */ struct mutex params_lock; + atomic_t rw_ref; /* concurrent read/write accesses */ #ifdef CONFIG_SND_PCM_OSS_PLUGINS struct snd_pcm_plugin *plugin_first; struct snd_pcm_plugin *plugin_last; diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 920ae6f96..99ce03357 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -769,9 +769,9 @@ TRACE_EVENT(sched_load_avg_cpu, __entry->util_avg_pelt = cfs_rq->avg.util_avg; __entry->util_avg_walt = 0; #ifdef CONFIG_SCHED_WALT - __entry->util_avg_walt = - cpu_rq(cpu)->prev_runnable_sum << SCHED_LOAD_SHIFT; - do_div(__entry->util_avg_walt, walt_ravg_window); + __entry->util_avg_walt = + div64_u64(cpu_rq(cpu)->cumulative_runnable_avg, + walt_ravg_window >> SCHED_LOAD_SHIFT); if (!walt_disabled && sysctl_sched_use_walt_cpu_util) __entry->util_avg = __entry->util_avg_walt; #endif @@ -783,81 +783,24 @@ TRACE_EVENT(sched_load_avg_cpu, __entry->util_avg_pelt, __entry->util_avg_walt) ); -/* - * Tracepoint for eas attribute store - */ -TRACE_EVENT(eas_attr_store, - - TP_PROTO(const char *name, int value), - - TP_ARGS(name, value), - - TP_STRUCT__entry( - __array( char, name, TASK_COMM_LEN ) - __field( int, value ) - ), - - TP_fast_assign( - memcpy(__entry->name, name, TASK_COMM_LEN); - __entry->value = value; - ), - - TP_printk("name=%s value=%d", __entry->name, __entry->value) -); - -/* - * Tracepoint for schedtune_boost - */ -TRACE_EVENT(sched_tune_boost, - - TP_PROTO(const char *name, int boost), - - TP_ARGS(name, boost), - - TP_STRUCT__entry( - __array( char, name, TASK_COMM_LEN ) - __field( int, boost ) - ), - - TP_fast_assign( - memcpy(__entry->name, name, TASK_COMM_LEN); - __entry->boost = boost; - ), - - TP_printk("name=%s boost=%d", __entry->name, __entry->boost) -); - /* * Tracepoint for sched_tune_config settings */ TRACE_EVENT(sched_tune_config, - TP_PROTO(int boost, int pb_nrg_gain, int pb_cap_gain, int pc_nrg_gain, int pc_cap_gain), + TP_PROTO(int boost), - TP_ARGS(boost, pb_nrg_gain, pb_cap_gain, pc_nrg_gain, pc_cap_gain), + TP_ARGS(boost), TP_STRUCT__entry( __field( int, boost ) - __field( int, pb_nrg_gain ) - __field( int, pb_cap_gain ) - __field( int, pc_nrg_gain ) - __field( int, pc_cap_gain ) ), TP_fast_assign( __entry->boost = boost; - __entry->pb_nrg_gain = pb_nrg_gain; - __entry->pb_cap_gain = pb_cap_gain; - __entry->pc_nrg_gain = pc_nrg_gain; - __entry->pc_cap_gain = pc_cap_gain; - ), - - TP_printk("boost=%d " - "pb_nrg_gain=%d pb_cap_gain=%d " - "pc_nrg_gain=%d pc_cap_gain=%d", - __entry->boost, - __entry->pb_nrg_gain, __entry->pb_cap_gain, - __entry->pc_nrg_gain, __entry->pc_cap_gain) + ), + + TP_printk("boost=%d ", __entry->boost) ); /* @@ -893,9 +836,9 @@ TRACE_EVENT(sched_boost_cpu, TRACE_EVENT(sched_tune_tasks_update, TP_PROTO(struct task_struct *tsk, int cpu, int tasks, int idx, - int boost, int max_boost, u64 group_ts), + int boost, int max_boost), - TP_ARGS(tsk, cpu, tasks, idx, boost, max_boost, group_ts), + TP_ARGS(tsk, cpu, tasks, idx, boost, max_boost), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -905,7 +848,6 @@ TRACE_EVENT(sched_tune_tasks_update, __field( int, idx ) __field( int, boost ) __field( int, max_boost ) - __field( u64, group_ts ) ), TP_fast_assign( @@ -916,15 +858,13 @@ TRACE_EVENT(sched_tune_tasks_update, __entry->idx = idx; __entry->boost = boost; __entry->max_boost = max_boost; - __entry->group_ts = group_ts; ), TP_printk("pid=%d comm=%s " - "cpu=%d tasks=%d idx=%d boost=%d max_boost=%d timeout=%llu", + "cpu=%d tasks=%d idx=%d boost=%d max_boost=%d", __entry->pid, __entry->comm, __entry->cpu, __entry->tasks, __entry->idx, - __entry->boost, __entry->max_boost, - __entry->group_ts) + __entry->boost, __entry->max_boost) ); /* @@ -989,11 +929,9 @@ TRACE_EVENT(sched_find_best_target, TP_PROTO(struct task_struct *tsk, bool prefer_idle, unsigned long min_util, int start_cpu, - bool low_util_mode, int low_util_cpu, int best_idle, int best_active, int target), TP_ARGS(tsk, prefer_idle, min_util, start_cpu, - low_util_mode, low_util_cpu, best_idle, best_active, target), TP_STRUCT__entry( @@ -1002,8 +940,6 @@ TRACE_EVENT(sched_find_best_target, __field( unsigned long, min_util ) __field( bool, prefer_idle ) __field( int, start_cpu ) - __field( bool, low_util_mode ) - __field( int, low_util_cpu ) __field( int, best_idle ) __field( int, best_active ) __field( int, target ) @@ -1015,23 +951,76 @@ TRACE_EVENT(sched_find_best_target, __entry->min_util = min_util; __entry->prefer_idle = prefer_idle; __entry->start_cpu = start_cpu; - __entry->low_util_mode = low_util_mode; - __entry->low_util_cpu = low_util_cpu; __entry->best_idle = best_idle; __entry->best_active = best_active; __entry->target = target; ), TP_printk("pid=%d comm=%s prefer_idle=%d start_cpu=%d " - "low_util_mode=%d, low_util_cpu=%d " "best_idle=%d best_active=%d target=%d", __entry->pid, __entry->comm, __entry->prefer_idle, __entry->start_cpu, - __entry->low_util_mode, __entry->low_util_cpu, __entry->best_idle, __entry->best_active, __entry->target) ); +/* + * Tracepoint for accounting sched group energy + */ +TRACE_EVENT(sched_energy_diff, + + TP_PROTO(struct task_struct *tsk, int scpu, int dcpu, int udelta, + int nrgb, int nrga, int nrgd, int capb, int capa, int capd, + int nrgn, int nrgp), + + TP_ARGS(tsk, scpu, dcpu, udelta, + nrgb, nrga, nrgd, capb, capa, capd, + nrgn, nrgp), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, scpu ) + __field( int, dcpu ) + __field( int, udelta ) + __field( int, nrgb ) + __field( int, nrga ) + __field( int, nrgd ) + __field( int, capb ) + __field( int, capa ) + __field( int, capd ) + __field( int, nrgn ) + __field( int, nrgp ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->scpu = scpu; + __entry->dcpu = dcpu; + __entry->udelta = udelta; + __entry->nrgb = nrgb; + __entry->nrga = nrga; + __entry->nrgd = nrgd; + __entry->capb = capb; + __entry->capa = capa; + __entry->capd = capd; + __entry->nrgn = nrgn; + __entry->nrgp = nrgp; + ), + + TP_printk("pid=%d comm=%s " + "src_cpu=%d dst_cpu=%d usage_delta=%d " + "nrg_before=%d nrg_after=%d nrg_diff=%d " + "cap_before=%d cap_after=%d cap_delta=%d " + "nrg_delta=%d nrg_payoff=%d", + __entry->pid, __entry->comm, + __entry->scpu, __entry->dcpu, __entry->udelta, + __entry->nrgb, __entry->nrga, __entry->nrgd, + __entry->capb, __entry->capa, __entry->capd, + __entry->nrgn, __entry->nrgp) +); + /* * Tracepoint for schedtune_tasks_update */ @@ -1113,6 +1102,7 @@ TRACE_EVENT(walt_update_task_ravg, __field( int, cpu ) __field( u64, cs ) __field( u64, ps ) + __field(unsigned long, util ) __field( u32, curr_window ) __field( u32, prev_window ) __field( u64, nt_cs ) @@ -1136,6 +1126,8 @@ TRACE_EVENT(walt_update_task_ravg, __entry->irqtime = irqtime; __entry->cs = rq->curr_runnable_sum; __entry->ps = rq->prev_runnable_sum; + __entry->util = rq->prev_runnable_sum << SCHED_LOAD_SHIFT; + do_div(__entry->util, walt_ravg_window); __entry->curr_window = p->ravg.curr_window; __entry->prev_window = p->ravg.prev_window; __entry->nt_cs = rq->nt_curr_runnable_sum; @@ -1144,15 +1136,14 @@ TRACE_EVENT(walt_update_task_ravg, ), TP_printk("wc %llu ws %llu delta %llu event %d cpu %d cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu" - " cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u" + " cs %llu ps %llu util %lu cur_window %u prev_window %u active_wins %u" , __entry->wallclock, __entry->win_start, __entry->delta, __entry->evt, __entry->cpu, __entry->cur_pid, __entry->pid, __entry->comm, __entry->mark_start, __entry->delta_m, __entry->demand, __entry->sum, __entry->irqtime, - __entry->cs, __entry->ps, + __entry->cs, __entry->ps, __entry->util, __entry->curr_window, __entry->prev_window, - __entry->nt_cs, __entry->nt_ps, __entry->active_windows ) ); diff --git a/ipc/shm.c b/ipc/shm.c index 4982a4e7f..a492dd81c 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -198,6 +198,12 @@ static int __shm_open(struct vm_area_struct *vma) if (IS_ERR(shp)) return PTR_ERR(shp); + if (shp->shm_file != sfd->file) { + /* ID was reused */ + shm_unlock(shp); + return -EINVAL; + } + shp->shm_atim = get_seconds(); shp->shm_lprid = task_tgid_vnr(current); shp->shm_nattch++; @@ -414,8 +420,9 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma) int ret; /* - * In case of remap_file_pages() emulation, the file can represent - * removed IPC ID: propogate shm_lock() error to caller. + * In case of remap_file_pages() emulation, the file can represent an + * IPC ID that was removed, and possibly even reused by another shm + * segment already. Propagate this case as an error to caller. */ ret =__shm_open(vma); if (ret) @@ -439,6 +446,7 @@ static int shm_release(struct inode *ino, struct file *file) struct shm_file_data *sfd = shm_file_data(file); put_ipc_ns(sfd->ns); + fput(sfd->file); shm_file_data(file) = NULL; kfree(sfd); return 0; @@ -1198,7 +1206,16 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, file->f_mapping = shp->shm_file->f_mapping; sfd->id = shp->shm_perm.id; sfd->ns = get_ipc_ns(ns); - sfd->file = shp->shm_file; + /* + * We need to take a reference to the real shm file to prevent the + * pointer from becoming stale in cases where the lifetime of the outer + * file extends beyond that of the shm segment. It's not usually + * possible, but it can happen during remap_file_pages() emulation as + * that unmaps the memory, then does ->mmap() via file reference only. + * We'll deny the ->mmap() if the shm segment was since removed, but to + * detect shm ID reuse we need to compare the file pointers. + */ + sfd->file = get_file(shp->shm_file); sfd->vm_ops = NULL; err = security_mmap_file(file, prot, flags); diff --git a/kernel/hwcfs/hwcfs_common.c b/kernel/hwcfs/hwcfs_common.c index a22047497..9d615bfee 100644 --- a/kernel/hwcfs/hwcfs_common.c +++ b/kernel/hwcfs/hwcfs_common.c @@ -327,10 +327,6 @@ static int vip_can_migrate(struct task_struct *p, struct rq *src_rq, struct rq * return 1; } -extern void hisi_get_fast_cpus(struct cpumask *cpumask); -extern void hisi_get_slow_cpus(struct cpumask *cpumask); -static struct cpumask hisi_slow_cpu_mask; - static int __do_vip_balance(void *data) { struct rq *src_rq = data; diff --git a/kernel/resource.c b/kernel/resource.c index c09d484f7..73348f574 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -611,7 +611,8 @@ static int __find_resource(struct resource *root, struct resource *old, alloc.start = constraint->alignf(constraint->alignf_data, &avail, size, constraint->align); alloc.end = alloc.start + size - 1; - if (resource_contains(&avail, &alloc)) { + if (alloc.start <= alloc.end && + resource_contains(&avail, &alloc)) { new->start = alloc.start; new->end = alloc.end; return 0; diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 5bc46e18d..7a060dbdb 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -19,7 +19,7 @@ endif obj-y += core.o loadavg.o clock.o cputime.o obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o -obj-y += wait.o swait.o completion.o idle.o +obj-y += wait.o completion.o idle.o obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o energy.o obj-$(CONFIG_SCHED_WALT) += walt.o obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 558f7bf50..a46275654 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -75,6 +75,7 @@ #include #include #include +#include #include #include @@ -1196,18 +1197,6 @@ void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_ma p->nr_cpus_allowed = cpumask_weight(new_mask); } -static const struct cpumask *adjust_cpumask(const struct task_struct *p, - const struct cpumask *old_mask) -{ - static const unsigned long allowed_cpus = 0xf; - - if (!(p->flags & PF_KTHREAD) || p->kthread_per_cpu) - return old_mask; - - /* Force as many kthreads as possible to run on the little cluster */ - return to_cpumask(&allowed_cpus); -} - void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { struct rq *rq = task_rq(p); @@ -1215,7 +1204,6 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) lockdep_assert_held(&p->pi_lock); - new_mask = adjust_cpumask(p, new_mask); queued = task_on_rq_queued(p); running = task_current(rq, p); @@ -1427,8 +1415,6 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p) struct migration_swap_arg arg; int ret = -EINVAL; - get_online_cpus(); - arg = (struct migration_swap_arg){ .src_task = cur, .src_cpu = task_cpu(cur), @@ -1439,6 +1425,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p) if (arg.src_cpu == arg.dst_cpu) goto out; + /* + * These three tests are all lockless; this is OK since all of them + * will be re-checked with proper locks held further down the line. + */ if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu)) goto out; @@ -1452,7 +1442,6 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p) ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg); out: - put_online_cpus(); return ret; } @@ -1674,9 +1663,8 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags, lockdep_assert_held(&p->pi_lock); if (p->nr_cpus_allowed > 1) - cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags, - sibling_count_hint); - + cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags, + sibling_count_hint); /* * In order not to call set_task_cpu() on a blocking task we need @@ -2218,8 +2206,8 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) memset(&p->se.statistics, 0, sizeof(p->se.statistics)); #endif -#ifdef CONFIG_CPU_FREQ_STAT - cpufreq_task_stats_init(p); +#ifdef CONFIG_CPU_FREQ_TIMES + cpufreq_task_times_init(p); #endif RB_CLEAR_NODE(&p->dl.rb_node); @@ -2301,11 +2289,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) __sched_fork(clone_flags, p); /* - * We mark the process as running here. This guarantees that + * We mark the process as NEW here. This guarantees that * nobody will actually run it, and a signal or other external * event cannot wake it up and insert it on the runqueue either. */ - p->state = TASK_RUNNING; + p->state = TASK_NEW; /* * Make sure we do not leak PI boosting priority to the child. @@ -2342,6 +2330,8 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->sched_class = &fair_sched_class; } + init_entity_runnable_average(&p->se); + /* * The child is not yet in the pid-hash so no cgroup attach races, * and the cgroup is pinned to this child due to cgroup_fork() @@ -2475,11 +2465,6 @@ static int dl_overflow(struct task_struct *p, int policy, extern void init_dl_bw(struct dl_bw *dl_b); -#ifdef CONFIG_HISI_EAS_SCHED - #define task_should_forkboost(task) \ - ((task && task->parent && task->parent->pid > 2)) -#endif - /* * wake_up_new_task - wake up a newly created task for the first time. * @@ -2493,22 +2478,13 @@ void wake_up_new_task(struct task_struct *p) struct rq *rq; raw_spin_lock_irqsave(&p->pi_lock, flags); + p->state = TASK_RUNNING; walt_init_new_task_load(p); /* Initialize new task's runnable average */ -#ifdef CONFIG_HISI_EAS_SCHED - if (task_should_forkboost(p)) { - init_entity_runnable_average(&p->se); - } else { - struct sched_entity *se= &p->se; - struct sched_avg *sa= &se->avg; - memset(sa, 0, sizeof(*sa)); - } -#else init_entity_runnable_average(&p->se); -#endif #ifdef CONFIG_SMP /* @@ -4634,13 +4610,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) struct task_struct *p; int retval; - get_online_cpus(); rcu_read_lock(); p = find_process_by_pid(pid); if (!p) { rcu_read_unlock(); - put_online_cpus(); return -ESRCH; } @@ -4716,7 +4690,6 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) free_cpumask_var(cpus_allowed); out_put_task: put_task_struct(p); - put_online_cpus(); return retval; } @@ -4761,7 +4734,6 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) unsigned long flags; int retval; - get_online_cpus(); rcu_read_lock(); retval = -ESRCH; @@ -4774,12 +4746,11 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) goto out_unlock; raw_spin_lock_irqsave(&p->pi_lock, flags); - cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); + cpumask_and(mask, &p->cpus_allowed, cpu_active_mask); raw_spin_unlock_irqrestore(&p->pi_lock, flags); out_unlock: rcu_read_unlock(); - put_online_cpus(); return retval; } @@ -5144,8 +5115,17 @@ void sched_show_task(struct task_struct *p) state = __ffs(state) + 1; printk(KERN_INFO "%-15.15s %c", p->comm, state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); +#if BITS_PER_LONG == 32 + if (state == TASK_RUNNING) + printk(KERN_CONT " running "); + else + printk(KERN_CONT " %08lx ", thread_saved_pc(p)); +#else if (state == TASK_RUNNING) printk(KERN_CONT " running task "); + else + printk(KERN_CONT " %016lx ", thread_saved_pc(p)); +#endif #ifdef CONFIG_DEBUG_STACK_USAGE free = stack_not_used(p); #endif @@ -5187,9 +5167,6 @@ void show_state_filter(unsigned long state_filter) if (!state_filter || (p->state & state_filter)) sched_show_task(p); - /* show 'init' state always */ - if (p->pid == 1) - sched_show_task(p); } #ifdef CONFIG_SCHED_DEBUG @@ -5383,7 +5360,6 @@ void sched_setnuma(struct task_struct *p, int nid) unsigned long flags; bool queued, running; - new_mask = adjust_cpumask(p, new_mask); rq = task_rq_lock(p, &flags); queued = task_on_rq_queued(p); running = task_current(rq, p); @@ -6298,8 +6274,6 @@ static void free_sched_domain(struct rcu_head *rcu) kfree(sd->groups->sgc); kfree(sd->groups); } - if (sd->shared && atomic_dec_and_test(&sd->shared->ref)) - kfree(sd->shared); kfree(sd); } @@ -6818,9 +6792,6 @@ static void claim_allocations(int cpu, struct sched_domain *sd) WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd); *per_cpu_ptr(sdd->sd, cpu) = NULL; - if (atomic_read(&(*per_cpu_ptr(sdd->sds, cpu))->ref)) - *per_cpu_ptr(sdd->sds, cpu) = NULL; - if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref)) *per_cpu_ptr(sdd->sg, cpu) = NULL; @@ -6868,12 +6839,10 @@ static int sched_domains_curr_level; static struct sched_domain * sd_init(struct sched_domain_topology_level *tl, - const struct cpumask *cpu_map, struct sched_domain *child, int cpu) { - struct sd_data *sdd = &tl->data; - struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); - int sd_id, sd_weight, sd_flags = 0; + struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); + int sd_weight, sd_flags = 0; #ifdef CONFIG_NUMA /* @@ -6928,9 +6897,6 @@ sd_init(struct sched_domain_topology_level *tl, #endif }; - cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); - sd_id = cpumask_first(sched_domain_span(sd)); - /* * Convert topological properties into behaviour. */ @@ -6973,16 +6939,7 @@ sd_init(struct sched_domain_topology_level *tl, sd->idle_idx = 1; } - /* - * For all levels sharing cache; connect a sched_domain_shared - * instance. - */ - sd->shared = *per_cpu_ptr(sdd->sds, sd_id); - atomic_inc(&sd->shared->ref); - if (sd->flags & SD_SHARE_PKG_RESOURCES) - atomic_inc(&sd->shared->ref); - - sd->private = sdd; + sd->private = &tl->data; return sd; } @@ -7320,10 +7277,6 @@ static int __sdt_alloc(const struct cpumask *cpu_map) if (!sdd->sd) return -ENOMEM; - sdd->sds = alloc_percpu(struct sched_domain_shared *); - if (!sdd->sds) - return -ENOMEM; - sdd->sg = alloc_percpu(struct sched_group *); if (!sdd->sg) return -ENOMEM; @@ -7334,7 +7287,6 @@ static int __sdt_alloc(const struct cpumask *cpu_map) for_each_cpu(j, cpu_map) { struct sched_domain *sd; - struct sched_domain_shared *sds; struct sched_group *sg; struct sched_group_capacity *sgc; @@ -7345,13 +7297,6 @@ static int __sdt_alloc(const struct cpumask *cpu_map) *per_cpu_ptr(sdd->sd, j) = sd; - sds = kzalloc_node(sizeof(struct sched_domain_shared), - GFP_KERNEL, cpu_to_node(j)); - if (!sds) - return -ENOMEM; - - *per_cpu_ptr(sdd->sds, j) = sds; - sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), GFP_KERNEL, cpu_to_node(j)); if (!sg) @@ -7391,8 +7336,6 @@ static void __sdt_free(const struct cpumask *cpu_map) kfree(*per_cpu_ptr(sdd->sd, j)); } - if (sdd->sds) - kfree(*per_cpu_ptr(sdd->sds, j)); if (sdd->sg) kfree(*per_cpu_ptr(sdd->sg, j)); if (sdd->sgc) @@ -7400,8 +7343,6 @@ static void __sdt_free(const struct cpumask *cpu_map) } free_percpu(sdd->sd); sdd->sd = NULL; - free_percpu(sdd->sds); - sdd->sds = NULL; free_percpu(sdd->sg); sdd->sg = NULL; free_percpu(sdd->sgc); @@ -7413,15 +7354,14 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, const struct cpumask *cpu_map, struct sched_domain_attr *attr, struct sched_domain *child, int cpu) { - struct sched_domain *sd = sd_init(tl, cpu_map, child, cpu); - if (!sd) - return child; + struct sched_domain *sd = sd_init(tl, child, cpu); + + cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); if (child) { sd->level = child->level + 1; sched_domain_level_max = max(sched_domain_level_max, sd->level); child->parent = sd; - sd->child = child; if (!cpumask_subset(sched_domain_span(child), sched_domain_span(sd))) { @@ -7800,14 +7740,17 @@ void __init sched_init_smp(void) sched_init_numa(); - get_online_cpus(); + /* + * There's no userspace yet to cause hotplug operations; hence all the + * cpu masks are stable and all blatant races in the below code cannot + * happen. + */ mutex_lock(&sched_domains_mutex); init_sched_domains(cpu_active_mask); cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); if (cpumask_empty(non_isolated_cpus)) cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); mutex_unlock(&sched_domains_mutex); - put_online_cpus(); hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE); hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE); @@ -8275,11 +8218,6 @@ void sched_offline_group(struct task_group *tg) spin_unlock_irqrestore(&task_group_lock, flags); } -/* change task's runqueue when it moves between groups. - * The caller of this function should have put the task in its new group - * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to - * reflect its new group. - */ static void sched_change_group(struct task_struct *tsk, int type) { struct task_group *tg; @@ -8321,7 +8259,7 @@ void sched_move_task(struct task_struct *tsk) queued = task_on_rq_queued(tsk); if (queued) - dequeue_task(rq, tsk, flags); + dequeue_task(rq, tsk, DEQUEUE_SAVE); if (unlikely(running)) put_prev_task(rq, tsk); @@ -8783,6 +8721,7 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) { struct task_struct *task; struct cgroup_subsys_state *css; + int ret = 0; cgroup_taskset_for_each(task, css, tset) { #ifdef CONFIG_RT_GROUP_SCHED @@ -8793,8 +8732,24 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) if (task->sched_class != &fair_sched_class) return -EINVAL; #endif + /* + * Serialize against wake_up_new_task() such that if its + * running, we're sure to observe its full state. + */ + raw_spin_lock_irq(&task->pi_lock); + /* + * Avoid calling sched_move_task() before wake_up_new_task() + * has happened. This would lead to problems with PELT, due to + * move wanting to detach+attach while we're not attached yet. + */ + if (task->state == TASK_NEW) + ret = -EINVAL; + raw_spin_unlock_irq(&task->pi_lock); + + if (ret) + break; } - return 0; + return ret; } static void cpu_cgroup_attach(struct cgroup_taskset *tset) @@ -9137,7 +9092,6 @@ struct cgroup_subsys cpu_cgrp_subsys = { .fork = cpu_cgroup_fork, .can_attach = cpu_cgroup_can_attach, .attach = cpu_cgroup_attach, - .allow_attach = subsys_cgroup_allow_attach, .legacy_cftypes = cpu_files, .early_init = 1, }; diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index fba235c7d..dc87f30f2 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c @@ -31,81 +31,58 @@ static inline int right_child(int i) return (i << 1) + 2; } -static void cpudl_heapify_down(struct cpudl *cp, int idx) +static void cpudl_exchange(struct cpudl *cp, int a, int b) { - int l, r, largest; + int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu; + + swap(cp->elements[a].cpu, cp->elements[b].cpu); + swap(cp->elements[a].dl , cp->elements[b].dl ); - int orig_cpu = cp->elements[idx].cpu; - u64 orig_dl = cp->elements[idx].dl; + swap(cp->elements[cpu_a].idx, cp->elements[cpu_b].idx); +} - if (left_child(idx) >= cp->size) - return; +static void cpudl_heapify(struct cpudl *cp, int idx) +{ + int l, r, largest; /* adapted from lib/prio_heap.c */ while(1) { - u64 largest_dl; l = left_child(idx); r = right_child(idx); largest = idx; - largest_dl = orig_dl; - if ((l < cp->size) && dl_time_before(orig_dl, - cp->elements[l].dl)) { + if ((l < cp->size) && dl_time_before(cp->elements[idx].dl, + cp->elements[l].dl)) largest = l; - largest_dl = cp->elements[l].dl; - } - if ((r < cp->size) && dl_time_before(largest_dl, - cp->elements[r].dl)) + + if ((r < cp->size) && dl_time_before(cp->elements[largest].dl, + cp->elements[r].dl)) largest = r; if (largest == idx) break; - /* pull largest child onto idx */ - cp->elements[idx].cpu = cp->elements[largest].cpu; - cp->elements[idx].dl = cp->elements[largest].dl; - cp->elements[cp->elements[idx].cpu].idx = idx; + /* Push idx down the heap one level and bump one up */ + cpudl_exchange(cp, largest, idx); idx = largest; } - /* actual push down of saved original values orig_* */ - cp->elements[idx].cpu = orig_cpu; - cp->elements[idx].dl = orig_dl; - cp->elements[cp->elements[idx].cpu].idx = idx; } -static void cpudl_heapify_up(struct cpudl *cp, int idx) +static void cpudl_change_key(struct cpudl *cp, int idx, u64 new_dl) { - int p; - - int orig_cpu = cp->elements[idx].cpu; - u64 orig_dl = cp->elements[idx].dl; + WARN_ON(idx == IDX_INVALID || !cpu_present(idx)); - if (idx == 0) - return; - - do { - p = parent(idx); - if (dl_time_before(orig_dl, cp->elements[p].dl)) - break; - /* pull parent onto idx */ - cp->elements[idx].cpu = cp->elements[p].cpu; - cp->elements[idx].dl = cp->elements[p].dl; - cp->elements[cp->elements[idx].cpu].idx = idx; - idx = p; - } while (idx != 0); - /* actual push up of saved original values orig_* */ - cp->elements[idx].cpu = orig_cpu; - cp->elements[idx].dl = orig_dl; - cp->elements[cp->elements[idx].cpu].idx = idx; -} - -static void cpudl_heapify(struct cpudl *cp, int idx) -{ - if (idx > 0 && dl_time_before(cp->elements[parent(idx)].dl, - cp->elements[idx].dl)) - cpudl_heapify_up(cp, idx); - else - cpudl_heapify_down(cp, idx); + if (dl_time_before(new_dl, cp->elements[idx].dl)) { + cp->elements[idx].dl = new_dl; + cpudl_heapify(cp, idx); + } else { + cp->elements[idx].dl = new_dl; + while (idx > 0 && dl_time_before(cp->elements[parent(idx)].dl, + cp->elements[idx].dl)) { + cpudl_exchange(cp, idx, parent(idx)); + idx = parent(idx); + } + } } static inline int cpudl_maximum(struct cpudl *cp) @@ -145,15 +122,16 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, } /* - * cpudl_clear - remove a cpu from the cpudl max-heap + * cpudl_set - update the cpudl max-heap * @cp: the cpudl max-heap context * @cpu: the target cpu + * @dl: the new earliest deadline for this cpu * * Notes: assumes cpu_rq(cpu)->lock is locked * * Returns: (void) */ -void cpudl_clear(struct cpudl *cp, int cpu) +void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid) { int old_idx, new_cpu; unsigned long flags; @@ -163,58 +141,46 @@ void cpudl_clear(struct cpudl *cp, int cpu) raw_spin_lock_irqsave(&cp->lock, flags); old_idx = cp->elements[cpu].idx; - if (old_idx == IDX_INVALID) { - /* - * Nothing to remove if old_idx was invalid. - * This could happen if a rq_offline_dl is - * called for a CPU without -dl tasks running. - */ - } else { + if (!is_valid) { + /* remove item */ + if (old_idx == IDX_INVALID) { + /* + * Nothing to remove if old_idx was invalid. + * This could happen if a rq_offline_dl is + * called for a CPU without -dl tasks running. + */ + goto out; + } new_cpu = cp->elements[cp->size - 1].cpu; cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl; cp->elements[old_idx].cpu = new_cpu; cp->size--; cp->elements[new_cpu].idx = old_idx; cp->elements[cpu].idx = IDX_INVALID; - cpudl_heapify(cp, old_idx); + while (old_idx > 0 && dl_time_before( + cp->elements[parent(old_idx)].dl, + cp->elements[old_idx].dl)) { + cpudl_exchange(cp, old_idx, parent(old_idx)); + old_idx = parent(old_idx); + } cpumask_set_cpu(cpu, cp->free_cpus); + cpudl_heapify(cp, old_idx); + goto out; } - raw_spin_unlock_irqrestore(&cp->lock, flags); -} -/* - * cpudl_set - update the cpudl max-heap - * @cp: the cpudl max-heap context - * @cpu: the target cpu - * @dl: the new earliest deadline for this cpu - * - * Notes: assumes cpu_rq(cpu)->lock is locked - * - * Returns: (void) - */ -void cpudl_set(struct cpudl *cp, int cpu, u64 dl) -{ - int old_idx; - unsigned long flags; - - WARN_ON(!cpu_present(cpu)); - - raw_spin_lock_irqsave(&cp->lock, flags); - - old_idx = cp->elements[cpu].idx; if (old_idx == IDX_INVALID) { - int new_idx = cp->size++; - cp->elements[new_idx].dl = dl; - cp->elements[new_idx].cpu = cpu; - cp->elements[cpu].idx = new_idx; - cpudl_heapify_up(cp, new_idx); + cp->size++; + cp->elements[cp->size - 1].dl = 0; + cp->elements[cp->size - 1].cpu = cpu; + cp->elements[cpu].idx = cp->size - 1; + cpudl_change_key(cp, cp->size - 1, dl); cpumask_clear_cpu(cpu, cp->free_cpus); } else { - cp->elements[old_idx].dl = dl; - cpudl_heapify(cp, old_idx); + cpudl_change_key(cp, old_idx, dl); } +out: raw_spin_unlock_irqrestore(&cp->lock, flags); } diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h index f7da8c55b..fcbdf83fe 100644 --- a/kernel/sched/cpudeadline.h +++ b/kernel/sched/cpudeadline.h @@ -23,8 +23,7 @@ struct cpudl { #ifdef CONFIG_SMP int cpudl_find(struct cpudl *cp, struct task_struct *p, struct cpumask *later_mask); -void cpudl_set(struct cpudl *cp, int cpu, u64 dl); -void cpudl_clear(struct cpudl *cp, int cpu); +void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid); int cpudl_init(struct cpudl *cp); void cpudl_set_freecpu(struct cpudl *cp, int cpu); void cpudl_clear_freecpu(struct cpudl *cp, int cpu); diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 9d1e766d7..efe7a210c 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -7,9 +7,7 @@ #include #include "sched.h" #include "walt.h" -#ifdef CONFIG_CPU_FREQ_POWER_STAT -#include -#endif +#include #ifdef CONFIG_IRQ_TIME_ACCOUNTING @@ -169,9 +167,9 @@ void account_user_time(struct task_struct *p, cputime_t cputime, /* Account for user time used */ acct_account_cputime(p); -#ifdef CONFIG_CPU_FREQ_STAT - /* Account power usage for system time */ - acct_update_power(p, cputime); +#ifdef CONFIG_CPU_FREQ_TIMES + /* Account power usage for user time */ + cpufreq_acct_update_power(p, cputime); #endif } @@ -224,9 +222,9 @@ void __account_system_time(struct task_struct *p, cputime_t cputime, /* Account for system time used */ acct_account_cputime(p); -#ifdef CONFIG_CPU_FREQ_STAT +#ifdef CONFIG_CPU_FREQ_TIMES /* Account power usage for system time */ - acct_update_power(p, cputime); + cpufreq_acct_update_power(p, cputime); #endif } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 1af035bd9..5c6ffddca 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -949,7 +949,7 @@ static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) */ dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr; dl_rq->earliest_dl.curr = deadline; - cpudl_set(&rq->rd->cpudl, rq->cpu, deadline); + cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1); } else if (dl_rq->earliest_dl.next == 0 || dl_time_before(deadline, dl_rq->earliest_dl.next)) { /* @@ -973,7 +973,7 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) if (!dl_rq->dl_nr_running) { dl_rq->earliest_dl.curr = 0; dl_rq->earliest_dl.next = 0; - cpudl_clear(&rq->rd->cpudl, rq->cpu); + cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0); } else { struct rb_node *leftmost = dl_rq->rb_leftmost; struct sched_dl_entity *entry; @@ -981,7 +981,7 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) entry = rb_entry(leftmost, struct sched_dl_entity, rb_node); dl_rq->earliest_dl.curr = entry->deadline; dl_rq->earliest_dl.next = next_deadline(rq); - cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline); + cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1); } } @@ -1600,7 +1600,6 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) || task_running(rq, task) || - !dl_task(task) || !task_on_rq_queued(task))) { double_unlock_balance(rq, later_rq); later_rq = NULL; @@ -1881,7 +1880,7 @@ static void rq_online_dl(struct rq *rq) cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu); if (rq->dl.dl_nr_running > 0) - cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr); + cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1); } /* Assumes rq->lock is held */ @@ -1890,7 +1889,7 @@ static void rq_offline_dl(struct rq *rq) if (rq->dl.overloaded) dl_clear_overload(rq); - cpudl_clear(&rq->rd->cpudl, rq->cpu); + cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0); cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu); } diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index e95cddf57..7f7116622 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -618,9 +618,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) P(se.statistics.nr_wakeups_fbt_no_cpu); P(se.statistics.nr_wakeups_fbt_no_sd); P(se.statistics.nr_wakeups_fbt_pref_idle); - P(se.statistics.nr_wakeups_fbt_pref_idle_lum); - P(se.statistics.nr_wakeups_fbt_best_active); - P(se.statistics.nr_wakeups_fbt_best_idle); P(se.statistics.nr_wakeups_fbt_count); /* cas */ /* select_task_rq_fair() */ diff --git a/kernel/sched/energy.c b/kernel/sched/energy.c index 2c3553556..b0656b7a9 100644 --- a/kernel/sched/energy.c +++ b/kernel/sched/energy.c @@ -46,30 +46,6 @@ static void free_resources(void) } } -static void dump_energy_model(void) -{ - int cpu, sd_level, idx; - struct sched_group_energy *sge; - - for_each_possible_cpu(cpu) { - for_each_possible_sd_level(sd_level) { - sge = sge_array[cpu][sd_level]; - if (!sge) - continue; - - pr_info("EAS: cpu %d sd_level = %d\n", cpu, sd_level); - for (idx = 0; idx < sge->nr_idle_states; idx++) - pr_info("Idle state [%d] = p %lu\n", idx, - sge->idle_states[idx].power); - - for (idx = 0; idx < sge->nr_cap_states; idx++) - pr_info("Idle state [%d] = c %lu p %lu\n", idx, - sge->cap_states[idx].cap, - sge->cap_states[idx].power); - } - } -} - void init_sched_energy_costs(void) { struct device_node *cn, *cp; @@ -140,8 +116,6 @@ void init_sched_energy_costs(void) } } - dump_energy_model(); - pr_info("Sched-energy-costs installed from DT\n"); return; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c21a1b14c..cb9063454 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -42,7 +42,6 @@ #include #endif - /* * Targeted preemption latency for CPU-bound tasks: * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds) @@ -55,34 +54,15 @@ * (to see the precise effective timeslice length of your workload, * run vmstat and monitor the context-switches (cs) field) */ -#ifdef CONFIG_ZEN_INTERACTIVE -unsigned int sysctl_sched_latency = 3000000ULL; -unsigned int normalized_sysctl_sched_latency = 3000000ULL; -#else unsigned int sysctl_sched_latency = 6000000ULL; unsigned int normalized_sysctl_sched_latency = 6000000ULL; -#endif unsigned int sysctl_sched_sync_hint_enable = 1; -unsigned int sysctl_sched_cstate_aware = 0; - -#ifdef CONFIG_HISI_EAS_SCHED -int global_boost_enabled_flag = 0; -int boot_boost = 1; -unsigned int sd_capacity_margin = 1280; -unsigned long up_migration_util_filter = 25; -int hisi_test_fast_cpu(int cpu); -void hisi_get_fast_cpus(struct cpumask *cpumask); -#endif +unsigned int sysctl_sched_cstate_aware = 1; #ifdef CONFIG_SCHED_WALT -#ifdef CONFIG_SCHED_WALT_DEFAULT unsigned int sysctl_sched_use_walt_cpu_util = 1; unsigned int sysctl_sched_use_walt_task_util = 1; -#else -unsigned int sysctl_sched_use_walt_cpu_util = 0; -unsigned int sysctl_sched_use_walt_task_util = 0; -#endif __read_mostly unsigned int sysctl_sched_walt_cpu_high_irqload = (10 * NSEC_PER_MSEC); #endif @@ -102,22 +82,13 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling * Minimal preemption granularity for CPU-bound tasks: * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) */ -#ifdef CONFIG_ZEN_INTERACTIVE -unsigned int sysctl_sched_min_granularity = 300000ULL; -unsigned int normalized_sysctl_sched_min_granularity = 300000ULL; -#else unsigned int sysctl_sched_min_granularity = 750000ULL; unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; -#endif /* * is kept at sysctl_sched_latency / sysctl_sched_min_granularity */ -#ifdef CONFIG_ZEN_INTERACTIVE -static unsigned int sched_nr_latency = 10; -#else static unsigned int sched_nr_latency = 8; -#endif /* * After fork, child runs first. If set to 0 (default) then @@ -133,18 +104,10 @@ unsigned int sysctl_sched_child_runs_first __read_mostly; * and reduces their over-scheduling. Synchronous workloads will still * have immediate wakeup/sleep latencies. */ -#ifdef CONFIG_ZEN_INTERACTIVE -unsigned int sysctl_sched_wakeup_granularity = 500000UL; -unsigned int normalized_sysctl_sched_wakeup_granularity = 500000UL; - -const_debug unsigned int sysctl_sched_migration_cost = 250000UL; -#else unsigned int sysctl_sched_wakeup_granularity = 1000000UL; unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; -/*const_debug unsigned int sysctl_sched_migration_cost = 500000UL;*/ -const_debug unsigned int sysctl_sched_migration_cost = 0UL; -#endif +const_debug unsigned int sysctl_sched_migration_cost = 500000UL; /* * The exponential sliding window over which load is averaged for shares @@ -164,12 +127,14 @@ unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL; * * default: 5 msec, units: microseconds */ -#ifdef CONFIG_ZEN_INTERACTIVE -unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL; -#else unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; #endif -#endif + +/* + * The margin used when comparing utilization with CPU capacity: + * util * margin < capacity * 1024 + */ +unsigned int capacity_margin = 1280; /* ~20% */ static inline void update_load_add(struct load_weight *lw, unsigned long inc) { @@ -786,6 +751,13 @@ void init_entity_runnable_average(struct sched_entity *se) if (entity_is_task(se)) sa->load_avg = scale_load_down(se->load.weight); sa->load_sum = sa->load_avg * LOAD_AVG_MAX; + /* + * In previous Android versions, we used to have: + * sa->util_avg = scale_load_down(SCHED_LOAD_SCALE); + * sa->util_sum = sa->util_avg * LOAD_AVG_MAX; + * However, that functionality has been moved to enqueue. + * It is unclear if we should restore this in enqueue. + */ /* * At this point, util_avg won't be used in select_task_rq_fair anyway */ @@ -794,6 +766,11 @@ void init_entity_runnable_average(struct sched_entity *se) /* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */ } +static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq); +static int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq); +static void attach_entity_cfs_rq(struct sched_entity *se); +static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se); + /* * With new tasks being created, their initial util_avgs are extrapolated * based on the cfs_rq's current util_avg: @@ -823,7 +800,7 @@ void post_init_entity_util_avg(struct sched_entity *se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); struct sched_avg *sa = &se->avg; - long cap = (long)(scale_load_down(SCHED_LOAD_SCALE) - cfs_rq->avg.util_avg) / 2; + long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2; if (cap > 0) { if (cfs_rq->avg.util_avg != 0) { @@ -835,18 +812,45 @@ void post_init_entity_util_avg(struct sched_entity *se) } else { sa->util_avg = cap; } + /* + * If we wish to restore tuning via setting initial util, + * this is where we should do it. + */ sa->util_sum = sa->util_avg * LOAD_AVG_MAX; } + + if (entity_is_task(se)) { + struct task_struct *p = task_of(se); + if (p->sched_class != &fair_sched_class) { + /* + * For !fair tasks do: + * + update_cfs_rq_load_avg(now, cfs_rq, false); + attach_entity_load_avg(cfs_rq, se); + switched_from_fair(rq, p); + * + * such that the next switched_to_fair() has the + * expected state. + */ + se->avg.last_update_time = cfs_rq_clock_task(cfs_rq); + return; + } + } + + attach_entity_cfs_rq(se); } -#else +#else /* !CONFIG_SMP */ void init_entity_runnable_average(struct sched_entity *se) { } void post_init_entity_util_avg(struct sched_entity *se) { } -#endif +static void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) +{ +} +#endif /* CONFIG_SMP */ /* * Update the current task's runtime statistics. @@ -3162,10 +3166,6 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) if (update_freq && (decayed || removed_util)) cfs_rq_util_change(cfs_rq); - /* Trace CPU load, unless cfs_rq belongs to a non-root task_group */ - if (cfs_rq == &rq_of(cfs_rq)->cfs) - trace_sched_load_avg_cpu(cpu_of(rq_of(cfs_rq)), cfs_rq); - return decayed || removed; } @@ -3174,7 +3174,6 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) */ #define UPDATE_TG 0x1 #define SKIP_AGE_LOAD 0x2 -#define SKIP_CPUFREQ 0x4 /* Update task and its cfs_rq load average */ static inline void update_load_avg(struct sched_entity *se, int flags) @@ -3195,7 +3194,7 @@ static inline void update_load_avg(struct sched_entity *se, int flags) cfs_rq->curr == se, NULL); } - decayed = update_cfs_rq_load_avg(now, cfs_rq, !(flags & SKIP_CPUFREQ)); + decayed = update_cfs_rq_load_avg(now, cfs_rq, true); decayed |= propagate_entity_load_avg(se); if (decayed && (flags & UPDATE_TG)) @@ -3315,18 +3314,18 @@ void sync_entity_load_avg(struct sched_entity *se) void remove_entity_load_avg(struct sched_entity *se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); - u64 last_update_time; /* - * Newly created task or never used group entity should not be removed - * from its (source) cfs_rq + * tasks cannot exit without having gone through wake_up_new_task() -> + * post_init_entity_util_avg() which will have added things to the + * cfs_rq, so we can remove unconditionally. + * + * Similarly for groups, they will have passed through + * post_init_entity_util_avg() before unregister_sched_fair_group() + * calls this. */ - if (se->avg.last_update_time == 0) - return; - - last_update_time = cfs_rq_last_update_time(cfs_rq); - __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL); + sync_entity_load_avg(se); atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg); atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg); } @@ -3371,7 +3370,6 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) #define UPDATE_TG 0x0 #define SKIP_AGE_LOAD 0x0 -#define SKIP_CPUFREQ 0x0 static inline void update_load_avg(struct sched_entity *se, int not_used1){} static inline void @@ -3588,8 +3586,6 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); static void dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { - int update_flags; - /* * Update run-time statistics of the 'current'. */ @@ -3603,12 +3599,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * - For group entity, update its weight to reflect the new share * of its group cfs_rq. */ - update_flags = UPDATE_TG; - - if (flags & DEQUEUE_IDLE) - update_flags |= SKIP_CPUFREQ; - - update_load_avg(se, update_flags); + update_load_avg(se, UPDATE_TG); dequeue_entity_load_avg(cfs_rq, se); update_stats_dequeue(cfs_rq, se); @@ -4631,7 +4622,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) WARN_ON(task_rq(p) != rq); - if (rq->cfs.h_nr_running > 1) { + if (cfs_rq->nr_running > 1) { u64 slice = sched_slice(cfs_rq, se); u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime; s64 delta = slice - ran; @@ -4672,42 +4663,13 @@ static inline void hrtick_update(struct rq *rq) #endif #ifdef CONFIG_SMP - -static inline long -schedtune_task_margin(struct task_struct *p); static bool __cpu_overutilized(int cpu, int delta); static bool cpu_overutilized(int cpu); -static bool cpu_halfutilized(int cpu); -static bool need_spread_task(int cpu); unsigned long boosted_cpu_util(int cpu); #else #define boosted_cpu_util(cpu) cpu_util_freq(cpu) #endif -static inline bool -is_sd_overutilized(struct sched_domain *sd) -{ - if (sd) - return sd->shared->overutilized; - else - return false; -} - -static inline void -set_sd_overutilized(struct sched_domain *sd) -{ - if (sd) - sd->shared->overutilized = true; -} - -static inline void -clear_sd_overutilized(struct sched_domain *sd) -{ - if (sd) - sd->shared->overutilized = false; -} - - /* * The enqueue_task method is called before nr_running is * increased. Here we update the fair scheduling stats and @@ -4717,29 +4679,9 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) { struct cfs_rq *cfs_rq; - struct sched_domain *sd; struct sched_entity *se = &p->se; #ifdef CONFIG_SMP int task_new = flags & ENQUEUE_WAKEUP_NEW; - - /* - * Update SchedTune accounting. - * - * We do it before updating the CPU capacity to ensure the - * boost value of the current task is accounted for in the - * selection of the OPP. - * - * We do it also in the case where we enqueue a throttled task; - * we could argue that a throttled task should not boost a CPU, - * however: - * a) properly implementing CPU boosting considering throttled - * tasks will increase a lot the complexity of the solution - * b) it's not easy to quantify the benefits introduced by - * such a more complex solution. - * Thus, for the time being we go for the simple solution and boost - * also for throttled RQs. - */ - schedtune_enqueue_task(p, cpu_of(rq)); #endif /* @@ -4789,16 +4731,33 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) add_nr_running(rq, 1); #ifdef CONFIG_SMP - if (!se) { - walt_inc_cumulative_runnable_avg(rq, p); - rcu_read_lock(); - sd = rcu_dereference(rq->sd); - if (!task_new && !is_sd_overutilized(sd) && - cpu_overutilized(rq->cpu)) - set_sd_overutilized(sd); - rcu_read_unlock(); + /* + * Update SchedTune accounting. + * + * We do it before updating the CPU capacity to ensure the + * boost value of the current task is accounted for in the + * selection of the OPP. + * + * We do it also in the case where we enqueue a throttled task; + * we could argue that a throttled task should not boost a CPU, + * however: + * a) properly implementing CPU boosting considering throttled + * tasks will increase a lot the complexity of the solution + * b) it's not easy to quantify the benefits introduced by + * such a more complex solution. + * Thus, for the time being we go for the simple solution and boost + * also for throttled RQs. + */ + schedtune_enqueue_task(p, cpu_of(rq)); + if (!se) { + walt_inc_cumulative_runnable_avg(rq, p); + if (!task_new && !rq->rd->overutilized && + cpu_overutilized(rq->cpu)) { + rq->rd->overutilized = true; + trace_sched_overutilized(true); + } } #endif /* CONFIG_SMP */ @@ -4818,20 +4777,6 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct sched_entity *se = &p->se; int task_sleep = flags & DEQUEUE_SLEEP; -#ifdef CONFIG_SMP - /* - * Update SchedTune accounting - * - * We do it before updating the CPU capacity to ensure the - * boost value of the current task is accounted for in the - * selection of the OPP. - */ - schedtune_dequeue_task(p, cpu_of(rq)); -#endif - - if (task_sleep && rq->nr_running == 1) - flags |= DEQUEUE_IDLE; - for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); dequeue_entity(cfs_rq, se, flags); @@ -4866,8 +4811,6 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) #endif for_each_sched_entity(se) { - int update_flags; - cfs_rq = cfs_rq_of(se); cfs_rq->h_nr_running--; walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p); @@ -4875,12 +4818,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (cfs_rq_throttled(cfs_rq)) break; - update_flags = UPDATE_TG; - - if (flags & DEQUEUE_IDLE) - update_flags |= SKIP_CPUFREQ; - - update_load_avg(se, update_flags); + update_load_avg(se, UPDATE_TG); update_cfs_shares(se); } @@ -4888,6 +4826,16 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) sub_nr_running(rq, 1); #ifdef CONFIG_SMP + + /* + * Update SchedTune accounting + * + * We do it before updating the CPU capacity to ensure the + * boost value of the current task is accounted for in the + * selection of the OPP. + */ + schedtune_dequeue_task(p, cpu_of(rq)); + if (!se) walt_dec_cumulative_runnable_avg(rq, p); #endif /* CONFIG_SMP */ @@ -5302,85 +5250,33 @@ unsigned long capacity_curr_of(int cpu) >> SCHED_CAPACITY_SHIFT; } -/* - * Returns the current capacity of cpu after applying both - * cpu and min freq scaling. - */ -unsigned long capacity_min_of(int cpu) -{ - if (!sched_feat(MIN_CAPACITY_CAPPING)) - return 0; - return arch_scale_cpu_capacity(NULL, cpu) * - arch_scale_min_freq_capacity(NULL, cpu) - >> SCHED_CAPACITY_SHIFT; -} - - static inline bool energy_aware(void) { return sched_feat(ENERGY_AWARE); } -/* - * CPU candidates. - * - * These are labels to reference CPU candidates for an energy_diff. - * Currently we support only two possible candidates: the task's previous CPU - * and another candiate CPU. - * More advanced/aggressive EAS selection policies can consider more - * candidates. - */ -#define EAS_CPU_PRV 0 -#define EAS_CPU_NXT 1 -#define EAS_CPU_BKP 2 -#define EAS_CPU_CNT 3 - -/* - * energy_diff - supports the computation of the estimated energy impact in - * moving a "task"'s "util_delta" between different CPU candidates. - */ struct energy_env { - /* Utilization to move */ - struct task_struct *p; - int util_delta; - - /* Mask of CPUs candidates to evaluate */ - cpumask_t cpus_mask; - - /* CPU candidates to evaluate */ - struct { - - /* CPU ID, must be in cpus_mask */ - int cpu_id; - - /* - * Index (into sched_group_energy::cap_states) of the OPP the - * CPU needs to run at if the task is placed on it. - * This includes the both active and blocked load, due to - * other tasks on this CPU, as well as the task's own - * utilization. - */ - int cap_idx; - int cap; - - /* Estimated system energy */ - unsigned int energy; - - /* Estimated energy variation wrt EAS_CPU_PRV */ - int nrg_delta; - - } cpu[EAS_CPU_CNT]; - - /* - * Index (into energy_env::cpu) of the morst energy efficient CPU for - * the specified energy_env::task - */ - int next_idx; - - /* Support data */ struct sched_group *sg_top; struct sched_group *sg_cap; - struct sched_group *sg; + int cap_idx; + int util_delta; + int src_cpu; + int dst_cpu; + int trg_cpu; + int energy; + int payoff; + struct task_struct *task; + struct { + int before; + int after; + int delta; + int diff; + } nrg; + struct { + int before; + int after; + int delta; + } cap; }; static int cpu_util_wake(int cpu, struct task_struct *p); @@ -5408,33 +5304,24 @@ static unsigned long __cpu_norm_util(unsigned long util, unsigned long capacity) return (util << SCHED_CAPACITY_SHIFT)/capacity; } -static unsigned long group_max_util(struct energy_env *eenv, int cpu_idx) +static unsigned long group_max_util(struct energy_env *eenv) { unsigned long max_util = 0; unsigned long util; int cpu; for_each_cpu(cpu, sched_group_cpus(eenv->sg_cap)) { - util = cpu_util_wake(cpu, eenv->p); + util = cpu_util_wake(cpu, eenv->task); /* * If we are looking at the target CPU specified by the eenv, * then we should add the (estimated) utilization of the task * assuming we will wake it up on that CPU. */ - if (unlikely(cpu == eenv->cpu[cpu_idx].cpu_id)) + if (unlikely(cpu == eenv->trg_cpu)) util += eenv->util_delta; max_util = max(max_util, util); - - /* - * Take into account any minimum frequency imposed - * elsewhere which limits the energy states available - * If the MIN_CAPACITY_CAPPING feature is not enabled - * capacity_min_of will return 0 (not capped). - */ - max_util = max(max_util, capacity_min_of(cpu)); - } return max_util; @@ -5452,21 +5339,21 @@ static unsigned long group_max_util(struct energy_env *eenv, int cpu_idx) * estimate (more busy). */ static unsigned -long group_norm_util(struct energy_env *eenv, int cpu_idx) +long group_norm_util(struct energy_env *eenv, struct sched_group *sg) { - unsigned long capacity = eenv->cpu[cpu_idx].cap; + unsigned long capacity = sg->sge->cap_states[eenv->cap_idx].cap; unsigned long util, util_sum = 0; int cpu; - for_each_cpu(cpu, sched_group_cpus(eenv->sg)) { - util = cpu_util_wake(cpu, eenv->p); + for_each_cpu(cpu, sched_group_cpus(sg)) { + util = cpu_util_wake(cpu, eenv->task); /* * If we are looking at the target CPU specified by the eenv, * then we should add the (estimated) utilization of the task * assuming we will wake it up on that CPU. */ - if (unlikely(cpu == eenv->cpu[cpu_idx].cpu_id)) + if (unlikely(cpu == eenv->trg_cpu)) util += eenv->util_delta; util_sum += __cpu_norm_util(util, capacity); @@ -5475,53 +5362,27 @@ long group_norm_util(struct energy_env *eenv, int cpu_idx) return min_t(unsigned long, util_sum, SCHED_CAPACITY_SCALE); } -static int find_new_capacity(struct energy_env *eenv, int cpu_idx) +static int find_new_capacity(struct energy_env *eenv, + const struct sched_group_energy * const sge) { - const struct sched_group_energy *sge = eenv->sg->sge; int idx, max_idx = sge->nr_cap_states - 1; - unsigned long util = group_max_util(eenv, cpu_idx); + unsigned long util = group_max_util(eenv); /* default is max_cap if we don't find a match */ - eenv->cpu[cpu_idx].cap_idx = max_idx; - eenv->cpu[cpu_idx].cap = sge->cap_states[max_idx].cap; + eenv->cap_idx = max_idx; for (idx = 0; idx < sge->nr_cap_states; idx++) { if (sge->cap_states[idx].cap >= util) { - /* Keep track of SG's capacity */ - eenv->cpu[cpu_idx].cap_idx = idx; - eenv->cpu[cpu_idx].cap = sge->cap_states[idx].cap; + eenv->cap_idx = idx; break; } } - return eenv->cpu[cpu_idx].cap_idx; -} - -static int find_cpu_new_capacity(int cpu, unsigned long util) -{ - struct sched_domain *sd; - const struct sched_group_energy *sge; - int idx; - - sd = rcu_dereference(per_cpu(sd_ea, cpu)); - if (!sd) - return INT_MAX; - - sge = sd->groups->sge; - - for (idx = 0; idx < sge->nr_cap_states; idx++) - if (sge->cap_states[idx].cap >= util) - break; - - if (idx == sge->nr_cap_states) - idx = idx - 1; - - return idx; + return eenv->cap_idx; } -static int group_idle_state(struct energy_env *eenv, int cpu_idx) +static int group_idle_state(struct energy_env *eenv, struct sched_group *sg) { - struct sched_group *sg = eenv->sg; int i, state = INT_MAX; int src_in_grp, dst_in_grp; long grp_util = 0; @@ -5533,10 +5394,8 @@ static int group_idle_state(struct energy_env *eenv, int cpu_idx) /* Take non-cpuidle idling into account (active idle/arch_cpu_idle()) */ state++; - src_in_grp = cpumask_test_cpu(eenv->cpu[EAS_CPU_PRV].cpu_id, - sched_group_cpus(sg)); - dst_in_grp = cpumask_test_cpu(eenv->cpu[cpu_idx].cpu_id, - sched_group_cpus(sg)); + src_in_grp = cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg)); + dst_in_grp = cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg)); if (src_in_grp == dst_in_grp) { /* both CPUs under consideration are in the same group or not in * either group, migration should leave idle state the same. @@ -5549,8 +5408,8 @@ static int group_idle_state(struct energy_env *eenv, int cpu_idx) * achievable when we move the task. */ for_each_cpu(i, sched_group_cpus(sg)) { - grp_util += cpu_util_wake(i, eenv->p); - if (unlikely(i == eenv->cpu[cpu_idx].cpu_id)) + grp_util += cpu_util_wake(i, eenv->task); + if (unlikely(i == eenv->trg_cpu)) grp_util += eenv->util_delta; } @@ -5586,65 +5445,19 @@ static int group_idle_state(struct energy_env *eenv, int cpu_idx) } /* - * calc_sg_energy: compute energy for the eenv's SG (i.e. eenv->sg). - * - * This works in iterations to compute the SG's energy for each CPU - * candidate defined by the energy_env's cpu array. - * - * NOTE: in the following computations for busy_energy and idle_energy we do - * not shift by SCHED_CAPACITY_SHIFT in order to reduce rounding errors. - * The required scaling will be performed just one time, by the calling - * functions, once we accumulated the contributons for all the SGs. - */ -static void calc_sg_energy(struct energy_env *eenv) -{ - struct sched_group *sg = eenv->sg; - int busy_energy, idle_energy; - unsigned int busy_power; - unsigned int idle_power; - unsigned long sg_util; - int cap_idx, idle_idx; - int total_energy = 0; - int cpu_idx; - - for (cpu_idx = EAS_CPU_PRV; cpu_idx < EAS_CPU_CNT; ++cpu_idx) { - - - if (eenv->cpu[cpu_idx].cpu_id == -1) - continue; - /* Compute ACTIVE energy */ - cap_idx = find_new_capacity(eenv, cpu_idx); - busy_power = sg->sge->cap_states[cap_idx].power; - /* - * in order to calculate cpu_norm_util, we need to know which - * capacity level the group will be at, so calculate that first - */ - sg_util = group_norm_util(eenv, cpu_idx); - - busy_energy = sg_util * busy_power; - - /* Compute IDLE energy */ - idle_idx = group_idle_state(eenv, cpu_idx); - idle_power = sg->sge->idle_states[idle_idx].power; - - idle_energy = SCHED_LOAD_SCALE - sg_util; - idle_energy *= idle_power; - - total_energy = busy_energy + idle_energy; - eenv->cpu[cpu_idx].energy += total_energy; - } -} - -/* - * compute_energy() computes the absolute variation in energy consumption by - * moving eenv.util_delta from EAS_CPU_PRV to EAS_CPU_NXT. - * - * NOTE: compute_energy() may fail when racing with sched_domain updates, in - * which case we abort by returning -EINVAL. + * sched_group_energy(): Computes the absolute energy consumption of cpus + * belonging to the sched_group including shared resources shared only by + * members of the group. Iterates over all cpus in the hierarchy below the + * sched_group starting from the bottom working it's way up before going to + * the next cpu until all cpus are covered at all levels. The current + * implementation is likely to gather the same util statistics multiple times. + * This can probably be done in a faster but more complex way. + * Note: sched_group_energy() may fail when racing with sched_domain updates. */ -static int compute_energy(struct energy_env *eenv) +static int sched_group_energy(struct energy_env *eenv) { struct cpumask visit_cpus; + u64 total_energy = 0; int cpu_count; WARN_ON(!eenv->sg_top->sge); @@ -5674,6 +5487,7 @@ static int compute_energy(struct energy_env *eenv) * when we took visit_cpus. */ sd = rcu_dereference(per_cpu(sd_scs, cpu)); + if (sd && sd->parent) sg_shared_cap = sd->parent->groups; @@ -5685,18 +5499,41 @@ static int compute_energy(struct energy_env *eenv) break; do { - eenv->sg_cap = sg; + unsigned long group_util; + int sg_busy_energy, sg_idle_energy; + int cap_idx, idle_idx; + if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight) eenv->sg_cap = sg_shared_cap; + else + eenv->sg_cap = sg; - /* - * Compute the energy for all the candidate - * CPUs in the current visited SG. - */ - eenv->sg = sg; - calc_sg_energy(eenv); + cap_idx = find_new_capacity(eenv, sg->sge); + + if (sg->group_weight == 1) { + /* Remove capacity of src CPU (before task move) */ + if (eenv->trg_cpu == eenv->src_cpu && + cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg))) { + eenv->cap.before = sg->sge->cap_states[cap_idx].cap; + eenv->cap.delta -= eenv->cap.before; + } + /* Add capacity of dst CPU (after task move) */ + if (eenv->trg_cpu == eenv->dst_cpu && + cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg))) { + eenv->cap.after = sg->sge->cap_states[cap_idx].cap; + eenv->cap.delta += eenv->cap.after; + } + } + + idle_idx = group_idle_state(eenv, sg); + group_util = group_norm_util(eenv, sg); + + sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power); + sg_idle_energy = ((SCHED_LOAD_SCALE-group_util) + * sg->sge->idle_states[idle_idx].power); + + total_energy += sg_busy_energy + sg_idle_energy; - /* remove CPUs we have just visited */ if (!sd->child) { /* * cpu_count here is the number of @@ -5737,6 +5574,7 @@ static int compute_energy(struct energy_env *eenv) continue; } + eenv->energy = total_energy >> SCHED_CAPACITY_SHIFT; return 0; } @@ -5745,105 +5583,185 @@ static inline bool cpu_in_sg(struct sched_group *sg, int cpu) return cpu != -1 && cpumask_test_cpu(cpu, sched_group_cpus(sg)); } +static inline unsigned long task_util(struct task_struct *p); + /* - * select_energy_cpu_idx(): estimate the energy impact of changing the - * utilization distribution. - * - * The eenv parameter specifies the changes: utilisation amount and a pair of - * possible CPU candidates (the previous CPU and a different target CPU). - * - * This function returns the index of a CPU candidate specified by the - * energy_env which corresponds to the first CPU saving energy. - * Thus, 0 (EAS_CPU_PRV) means that non of the CPU candidate is more energy - * efficient than running on prev_cpu. This is also the value returned in case - * of abort due to error conditions during the computations. - * A value greater than zero means that the first energy-efficient CPU is the - * one represented by eenv->cpu[eenv->next_idx].cpu_id. + * energy_diff(): Estimate the energy impact of changing the utilization + * distribution. eenv specifies the change: utilisation amount, source, and + * destination cpu. Source or destination cpu may be -1 in which case the + * utilization is removed from or added to the system (e.g. task wake-up). If + * both are specified, the utilization is migrated. */ -static inline int select_energy_cpu_idx(struct energy_env *eenv) +static inline int __energy_diff(struct energy_env *eenv) { struct sched_domain *sd; struct sched_group *sg; - int sd_cpu = -1; - int cpu_idx; - int margin; + int sd_cpu = -1, energy_before = 0, energy_after = 0; + int diff, margin; + + struct energy_env eenv_before = { + .util_delta = task_util(eenv->task), + .src_cpu = eenv->src_cpu, + .dst_cpu = eenv->dst_cpu, + .trg_cpu = eenv->src_cpu, + .nrg = { 0, 0, 0, 0}, + .cap = { 0, 0, 0 }, + .task = eenv->task, + }; - sd_cpu = eenv->cpu[EAS_CPU_PRV].cpu_id; - sd = rcu_dereference(per_cpu(sd_ea, sd_cpu)); - if (!sd) - return EAS_CPU_PRV; + if (eenv->src_cpu == eenv->dst_cpu) + return 0; - cpumask_clear(&eenv->cpus_mask); - for (cpu_idx = EAS_CPU_PRV; cpu_idx < EAS_CPU_CNT; ++cpu_idx) { - int cpu = eenv->cpu[cpu_idx].cpu_id; + sd_cpu = (eenv->src_cpu != -1) ? eenv->src_cpu : eenv->dst_cpu; + sd = rcu_dereference(per_cpu(sd_ea, sd_cpu)); - if (cpu < 0) - continue; - cpumask_set_cpu(cpu, &eenv->cpus_mask); - } + if (!sd) + return 0; /* Error */ sg = sd->groups; + do { - /* Skip SGs which do not contains a candidate CPU */ - if (!cpumask_intersects(&eenv->cpus_mask, sched_group_cpus(sg))) - continue; + if (cpu_in_sg(sg, eenv->src_cpu) || cpu_in_sg(sg, eenv->dst_cpu)) { + eenv_before.sg_top = eenv->sg_top = sg; - eenv->sg_top = sg; - /* energy is unscaled to reduce rounding errors */ - if (compute_energy(eenv) == -EINVAL) - return EAS_CPU_PRV; + if (sched_group_energy(&eenv_before)) + return 0; /* Invalid result abort */ + energy_before += eenv_before.energy; - } while (sg = sg->next, sg != sd->groups); + /* Keep track of SRC cpu (before) capacity */ + eenv->cap.before = eenv_before.cap.before; + eenv->cap.delta = eenv_before.cap.delta; - /* Scale energy before comparisons */ - for (cpu_idx = EAS_CPU_PRV; cpu_idx < EAS_CPU_CNT; ++cpu_idx) - eenv->cpu[cpu_idx].energy >>= SCHED_CAPACITY_SHIFT; + if (sched_group_energy(eenv)) + return 0; /* Invalid result abort */ + energy_after += eenv->energy; + } + } while (sg = sg->next, sg != sd->groups); + eenv->nrg.before = energy_before; + eenv->nrg.after = energy_after; + eenv->nrg.diff = eenv->nrg.after - eenv->nrg.before; + eenv->payoff = 0; +#ifndef CONFIG_SCHED_TUNE + trace_sched_energy_diff(eenv->task, + eenv->src_cpu, eenv->dst_cpu, eenv->util_delta, + eenv->nrg.before, eenv->nrg.after, eenv->nrg.diff, + eenv->cap.before, eenv->cap.after, eenv->cap.delta, + eenv->nrg.delta, eenv->payoff); +#endif /* - * Compute the dead-zone margin used to prevent too many task - * migrations with negligible energy savings. - * An energy saving is considered meaningful if it reduces the energy - * consumption of EAS_CPU_PRV CPU candidate by at least ~1.56% + * Dead-zone margin preventing too many migrations. */ - margin = eenv->cpu[EAS_CPU_PRV].energy >> 6; - /* - * By default the EAS_CPU_PRV CPU is considered the most energy - * efficient, with a 0 energy variation. - */ - eenv->next_idx = EAS_CPU_PRV; + margin = eenv->nrg.before >> 6; /* ~1.56% */ - /* - * Compare the other CPU candidates to find a CPU which can be - * more energy efficient then EAS_CPU_PRV - */ - for (cpu_idx = EAS_CPU_NXT; cpu_idx < EAS_CPU_CNT; ++cpu_idx) { - /* Skip not valid scheduled candidates */ - if (eenv->cpu[cpu_idx].cpu_id < 0) - continue; - /* Compute energy delta wrt EAS_CPU_PRV */ - eenv->cpu[cpu_idx].nrg_delta = - eenv->cpu[cpu_idx].energy - - eenv->cpu[EAS_CPU_PRV].energy; - /* filter energy variations within the dead-zone margin */ - if (abs(eenv->cpu[cpu_idx].nrg_delta) < margin) - eenv->cpu[cpu_idx].nrg_delta = 0; - /* update the schedule candidate with min(nrg_delta) */ - if (eenv->cpu[cpu_idx].nrg_delta < - eenv->cpu[eenv->next_idx].nrg_delta) { - eenv->next_idx = cpu_idx; - if (sched_feat(FBT_STRICT_ORDER)) - break; - } - } + diff = eenv->nrg.after - eenv->nrg.before; + + eenv->nrg.diff = (abs(diff) < margin) ? 0 : eenv->nrg.diff; - return eenv->next_idx; + return eenv->nrg.diff; } -/* - * Detect M:N waker/wakee relationships via a switching-frequency heuristic. - * A waker of many should wake a different task than the one last awakened - * at a frequency roughly N times higher than one of its wakees. In order +#ifdef CONFIG_SCHED_TUNE + +struct target_nrg schedtune_target_nrg; + +#ifdef CONFIG_CGROUP_SCHEDTUNE +extern bool schedtune_initialized; +#endif /* CONFIG_CGROUP_SCHEDTUNE */ + +/* + * System energy normalization + * Returns the normalized value, in the range [0..SCHED_CAPACITY_SCALE], + * corresponding to the specified energy variation. + */ +static inline int +normalize_energy(int energy_diff) +{ + u32 normalized_nrg; + +#ifdef CONFIG_CGROUP_SCHEDTUNE + /* during early setup, we don't know the extents */ + if (unlikely(!schedtune_initialized)) + return energy_diff < 0 ? -1 : 1 ; +#endif /* CONFIG_CGROUP_SCHEDTUNE */ + +#ifdef CONFIG_SCHED_DEBUG + { + int max_delta; + + /* Check for boundaries */ + max_delta = schedtune_target_nrg.max_power; + max_delta -= schedtune_target_nrg.min_power; + WARN_ON(abs(energy_diff) >= max_delta); + } +#endif + + /* Do scaling using positive numbers to increase the range */ + normalized_nrg = (energy_diff < 0) ? -energy_diff : energy_diff; + + /* Scale by energy magnitude */ + normalized_nrg <<= SCHED_CAPACITY_SHIFT; + + /* Normalize on max energy for target platform */ + normalized_nrg = reciprocal_divide( + normalized_nrg, schedtune_target_nrg.rdiv); + + return (energy_diff < 0) ? -normalized_nrg : normalized_nrg; +} + +static inline int +energy_diff(struct energy_env *eenv) +{ + int boost = schedtune_task_boost(eenv->task); + int nrg_delta; + + /* Conpute "absolute" energy diff */ + __energy_diff(eenv); + + /* Return energy diff when boost margin is 0 */ + if (boost == 0) { + trace_sched_energy_diff(eenv->task, + eenv->src_cpu, eenv->dst_cpu, eenv->util_delta, + eenv->nrg.before, eenv->nrg.after, eenv->nrg.diff, + eenv->cap.before, eenv->cap.after, eenv->cap.delta, + 0, -eenv->nrg.diff); + return eenv->nrg.diff; + } + + /* Compute normalized energy diff */ + nrg_delta = normalize_energy(eenv->nrg.diff); + eenv->nrg.delta = nrg_delta; + + eenv->payoff = schedtune_accept_deltas( + eenv->nrg.delta, + eenv->cap.delta, + eenv->task); + + trace_sched_energy_diff(eenv->task, + eenv->src_cpu, eenv->dst_cpu, eenv->util_delta, + eenv->nrg.before, eenv->nrg.after, eenv->nrg.diff, + eenv->cap.before, eenv->cap.after, eenv->cap.delta, + eenv->nrg.delta, eenv->payoff); + + /* + * When SchedTune is enabled, the energy_diff() function will return + * the computed energy payoff value. Since the energy_diff() return + * value is expected to be negative by its callers, this evaluation + * function return a negative value each time the evaluation return a + * positive payoff, which is the condition for the acceptance of + * a scheduling decision + */ + return -eenv->payoff; +} +#else /* CONFIG_SCHED_TUNE */ +#define energy_diff(eenv) __energy_diff(eenv) +#endif + +/* + * Detect M:N waker/wakee relationships via a switching-frequency heuristic. + * A waker of many should wake a different task than the one last awakened + * at a frequency roughly N times higher than one of its wakees. In order * to determine whether we should let the load spread vs consolodating to * shared cache, we look for a minimum 'flip' frequency of llc_size in one * partner, and a factor of lls_size higher frequency in the other. With @@ -5945,9 +5863,8 @@ static inline unsigned long task_util(struct task_struct *p) return p->se.avg.util_avg; } -unsigned int capacity_margin = 1280; /* ~20% margin */ +static inline unsigned long boosted_task_util(struct task_struct *task); -static inline unsigned long boosted_task_util(struct task_struct *p); static inline bool __task_fits(struct task_struct *p, int cpu, int util) { unsigned long capacity = capacity_of(cpu); @@ -5961,33 +5878,16 @@ static inline bool task_fits_max(struct task_struct *p, int cpu) { unsigned long capacity = capacity_of(cpu); unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity.val; -#ifdef CONFIG_HISI_EAS_SCHED - unsigned long max_allowed_cap = 0; - int allowed_cpu; -#endif if (capacity == max_capacity) return true; -#ifdef CONFIG_HISI_EAS_SCHED - for_each_cpu(allowed_cpu, tsk_cpus_allowed(p)) { - if (capacity_orig_of(allowed_cpu) > max_allowed_cap) - max_allowed_cap = capacity_orig_of(allowed_cpu); - } - - /* allowed cpus is limited */ - if (max_allowed_cap <= capacity_orig_of(cpu)) + if (capacity * capacity_margin > max_capacity * 1024) return true; -#endif return __task_fits(p, cpu, 0); } -static inline bool task_fits_spare(struct task_struct *p, int cpu) -{ - return __task_fits(p, cpu, cpu_util(cpu)); -} - static bool __cpu_overutilized(int cpu, int delta) { return (capacity_of(cpu) * 1024) < ((cpu_util(cpu) + delta) * capacity_margin); @@ -5998,64 +5898,6 @@ static bool cpu_overutilized(int cpu) return __cpu_overutilized(cpu, 0); } -static bool cpu_halfutilized(int cpu) -{ - return capacity_of(cpu) < (cpu_util(cpu) * 2); -} - -static bool need_spread_task(int cpu) -{ - struct sched_domain *sd; - int i; - - sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); - if (!sd) { - return false; - } - - for_each_cpu(i, sched_domain_span(sd)) { - if (cpu_util(i) * capacity_margin < capacity_orig_of(i) * 1024) { - return false; - } - } - - return true; -} - -static bool need_want_affine(struct task_struct *p, int cpu) -{ - int capacity = capacity_orig_of(cpu); - int max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity.val; - unsigned long margin = schedtune_task_margin(p); - struct sched_domain *sd; - int affine = 0, i; - - if (margin) - return 1; - - if (capacity != max_capacity) - return 1; - - rcu_read_lock(); - - sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); - if (!sd) { - rcu_read_unlock(); - return 1; - } - - for_each_cpu(i, sched_domain_span(sd)) { - if (idle_cpu(i) && __task_fits(p, i, cpu_util_wake(i, p))) { - affine = 1; - break; - } - } - - rcu_read_unlock(); - - return affine; -} - #ifdef CONFIG_SCHED_TUNE struct reciprocal_value schedtune_spc_rdiv; @@ -6076,14 +5918,13 @@ schedtune_margin(unsigned long signal, long boost) if (boost >= 0) { margin = SCHED_CAPACITY_SCALE - signal; margin *= boost; - } else { + } else margin = -signal * boost; - } margin = reciprocal_divide(margin, schedtune_spc_rdiv); + if (boost < 0) margin *= -1; - return margin; } @@ -6099,16 +5940,16 @@ schedtune_cpu_margin(unsigned long util, int cpu) } static inline long -schedtune_task_margin(struct task_struct *p) +schedtune_task_margin(struct task_struct *task) { - int boost = schedtune_task_boost(p); + int boost = schedtune_task_boost(task); unsigned long util; long margin; if (boost == 0) return 0; - util = task_util(p); + util = task_util(task); margin = schedtune_margin(util, boost); return margin; @@ -6122,8 +5963,8 @@ schedtune_cpu_margin(unsigned long util, int cpu) return 0; } -static inline long -schedtune_task_margin(struct task_struct *p) +static inline int +schedtune_task_margin(struct task_struct *task) { return 0; } @@ -6142,16 +5983,21 @@ boosted_cpu_util(int cpu) } static inline unsigned long -boosted_task_util(struct task_struct *p) +boosted_task_util(struct task_struct *task) { - unsigned long util = task_util(p); - long margin = schedtune_task_margin(p); + unsigned long util = task_util(task); + long margin = schedtune_task_margin(task); - trace_sched_boost_task(p, util, margin); + trace_sched_boost_task(task, util, margin); return util + margin; } +static unsigned long capacity_spare_wake(int cpu, struct task_struct *p) +{ + return capacity_orig_of(cpu) - cpu_util_wake(cpu, p); +} + /* * find_idlest_group finds and returns the least busy CPU group within the * domain. @@ -6163,10 +6009,9 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu, int sd_flag) { struct sched_group *idlest = NULL, *group = sd->groups; - struct sched_group *fit_group = NULL, *spare_group = NULL; + struct sched_group *most_spare_sg = NULL; unsigned long min_load = ULONG_MAX, this_load = ULONG_MAX; - unsigned long fit_capacity = ULONG_MAX; - unsigned long max_spare_capacity = capacity_margin - SCHED_LOAD_SCALE; + unsigned long most_spare = 0, this_spare = 0; int load_idx = sd->forkexec_idx; int imbalance = 100 + (sd->imbalance_pct-100)/2; @@ -6174,7 +6019,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, load_idx = sd->wake_idx; do { - unsigned long load, avg_load, spare_capacity; + unsigned long load, avg_load, spare_cap, max_spare_cap; int local_group; int i; @@ -6186,8 +6031,12 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(group)); - /* Tally up the load of all CPUs in the group */ + /* + * Tally up the load of all CPUs in the group and find + * the group containing the CPU with most spare capacity. + */ avg_load = 0; + max_spare_cap = 0; for_each_cpu(i, sched_group_cpus(group)) { /* Bias balancing toward cpus of our domain */ @@ -6198,24 +6047,10 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, avg_load += load; - /* - * Look for most energy-efficient group that can fit - * that can fit the task. - */ - if (capacity_of(i) < fit_capacity && __task_fits(p, i, cpu_util_wake(i, p))) { - fit_capacity = capacity_of(i); - fit_group = group; - } + spare_cap = capacity_spare_wake(i, p); - /* - * Look for group which has most spare capacity on a - * single cpu. - */ - spare_capacity = capacity_of(i) - cpu_util_wake(i, p); - if (spare_capacity > max_spare_capacity) { - max_spare_capacity = spare_capacity; - spare_group = group; - } + if (spare_cap > max_spare_cap) + max_spare_cap = spare_cap; } /* Adjust by relative CPU capacity of the group */ @@ -6223,172 +6058,45 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, if (local_group) { this_load = avg_load; - } else if (avg_load < min_load) { - min_load = avg_load; - idlest = group; - } - } while (group = group->next, group != sd->groups); - - if (fit_group && (sd->flags & SD_ASYM_CPUCAPACITY)) - return fit_group; - - if (spare_group && !(sd->flags & SD_ASYM_CPUCAPACITY)) - return spare_group; - - if (!idlest || 100*this_load < imbalance*min_load) - return NULL; - return idlest; -} - -#ifdef CONFIG_HISI_EAS_SCHED -static unsigned long cpu_spare_capacity(int cpu, unsigned long util) -{ - unsigned long spare_capacity; - spare_capacity = capacity_of(cpu) - util; - spare_capacity = clamp(spare_capacity, 0UL, capacity_of(cpu)); - - return spare_capacity; -} - -static int -find_spare_boost_cpu(struct cpumask *group_cpus, struct task_struct *p) -{ - int spare_boost_cpu = -1; - unsigned long max_spare_capacity = 0; - unsigned long spare_capacity; - int i; - int spare_idle_cpu = -1; - unsigned long max_idle_cap = 0; - unsigned long wake_util; - - for_each_cpu_and(i, group_cpus, tsk_cpus_allowed(p)) { - /* If the CPU's utilizaiton is over 60%, - * then we don't consider the cpu as spare one. - */ - wake_util = cpu_util_wake(i, p); - if (!__task_fits(p, i, wake_util)) - continue; - - spare_capacity = cpu_spare_capacity(i, wake_util); - if (idle_cpu(i)) { - if (spare_idle_cpu != i && spare_capacity > max_idle_cap) { - spare_idle_cpu = i; - max_idle_cap = spare_capacity; - } + this_spare = max_spare_cap; } else { - if (spare_capacity > max_spare_capacity) { - max_spare_capacity = spare_capacity; - spare_boost_cpu = i; + if (avg_load < min_load) { + min_load = avg_load; + idlest = group; } - } - } - - spare_boost_cpu = (spare_idle_cpu != -1) ? spare_idle_cpu : spare_boost_cpu; - - return spare_boost_cpu; -} - -static int select_boost_cpu(struct task_struct *p, int spare_cpu, int boost_cpu) -{ - unsigned long cap_boost_cpu, cap_spare_cpu; - - cap_boost_cpu = cpu_spare_capacity(boost_cpu, cpu_util_wake(boost_cpu, p)); - cap_spare_cpu = cpu_spare_capacity(spare_cpu, cpu_util_wake(spare_cpu, p)); - - /* select the cpu with max spare cap */ - if (cap_boost_cpu < cap_spare_cpu) - boost_cpu = spare_cpu; - - return boost_cpu; -} - -/* - * find_boost_cpu - find the idlest cpu among the fast_cpus. - */ -static int -find_boost_cpu(struct cpumask *group_cpus, struct task_struct *p, int this_cpu) -{ - unsigned long load, min_load = ULONG_MAX; - unsigned int min_exit_latency = UINT_MAX; - u64 latest_idle_timestamp = 0; - int least_loaded_cpu = this_cpu; - int shallowest_idle_cpu = -1; - int i; - - /* Traverse only the allowed CPUs */ - for_each_cpu_and(i, group_cpus, tsk_cpus_allowed(p)) { - if (!cpumask_test_cpu(i, cpu_online_mask)) - continue; - if (idle_cpu(i)) { - struct rq *rq = cpu_rq(i); - struct cpuidle_state *idle = idle_get_state(rq); - if (idle && idle->exit_latency < min_exit_latency) { - /* - * We give priority to a CPU whose idle state - * has the smallest exit latency irrespective - * of any idle timestamp. - */ - min_exit_latency = idle->exit_latency; - latest_idle_timestamp = rq->idle_stamp; - shallowest_idle_cpu = i; - } else if ((!idle || idle->exit_latency == min_exit_latency) && - rq->idle_stamp > latest_idle_timestamp) { - /* - * If equal or no active idle state, then - * the most recently idled CPU might have - * a warmer cache. - */ - latest_idle_timestamp = rq->idle_stamp; - shallowest_idle_cpu = i; - } - } else if (shallowest_idle_cpu == -1) { - load = weighted_cpuload(i); - if (load < min_load || (load == min_load && i == this_cpu)) { - min_load = load; - least_loaded_cpu = i; + if (most_spare < max_spare_cap) { + most_spare = max_spare_cap; + most_spare_sg = group; } } - } - - return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu; -} - -static int -find_global_boost_cpu(struct task_struct *p) -{ - struct cpumask fast_cpus; - struct cpumask spare_cpus; - int boost_cpu = -1; - int spare_cpu = -1; - - hisi_get_fast_cpus(&fast_cpus); - - if (cpumask_empty(&fast_cpus) || !cpumask_intersects(tsk_cpus_allowed(p), &fast_cpus) - || !cpumask_intersects(&fast_cpus, cpu_online_mask)) - return -1; - - boost_cpu = find_boost_cpu(&fast_cpus, p, cpumask_first(&fast_cpus)); - if (boost_cpu != -1) { - if (idle_cpu(boost_cpu)) - return boost_cpu; - - /* Enable spare boost cpu feature */ - /* If util of boost_cpu is over 90%, check if any spare cpu is available.*/ - if ((capacity_of(boost_cpu) * 1024) < (cpu_util_wake(boost_cpu, p) * 1138)) { - cpumask_xor(&spare_cpus, &fast_cpus, cpu_online_mask); - spare_cpu = find_spare_boost_cpu(&spare_cpus, p); + } while (group = group->next, group != sd->groups); - /* if spare_cpu available, select max spare one . */ - if (spare_cpu != -1) - boost_cpu= select_boost_cpu(p, spare_cpu, boost_cpu); + /* + * The cross-over point between using spare capacity or least load + * is too conservative for high utilization tasks on partially + * utilized systems if we require spare_capacity > task_util(p), + * so we allow for some task stuffing by using + * spare_capacity > task_util(p)/2. + * + * Spare capacity can't be used for fork because the utilization has + * not been set yet, we must first select a rq to compute the initial + * utilization. + */ + if (sd_flag & SD_BALANCE_FORK) + goto skip_spare; - } - } + if (this_spare > task_util(p) / 2 && + imbalance*this_spare > 100*most_spare) + return NULL; + else if (most_spare > task_util(p) / 2) + return most_spare_sg; - return boost_cpu; +skip_spare: + if (!idlest || 100*this_load < imbalance*min_load) + return NULL; + return idlest; } -#endif /* * find_idlest_group_cpu - find the idlest cpu among the cpus in group. @@ -6403,9 +6111,13 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this int shallowest_idle_cpu = -1; int i; + /* Check if we have any choice: */ + if (group->group_weight == 1) + return cpumask_first(sched_group_cpus(group)); + /* Traverse only the allowed CPUs */ for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) { - if (task_fits_spare(p, i)) { + if (idle_cpu(i)) { struct rq *rq = cpu_rq(i); struct cpuidle_state *idle = idle_get_state(rq); if (idle && idle->exit_latency < min_exit_latency) { @@ -6417,8 +6129,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this min_exit_latency = idle->exit_latency; latest_idle_timestamp = rq->idle_stamp; shallowest_idle_cpu = i; - } else if (idle_cpu(i) && - (!idle || idle->exit_latency == min_exit_latency) && + } else if ((!idle || idle->exit_latency == min_exit_latency) && rq->idle_stamp > latest_idle_timestamp) { /* * If equal or no active idle state, then @@ -6427,13 +6138,6 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this */ latest_idle_timestamp = rq->idle_stamp; shallowest_idle_cpu = i; - } else if (shallowest_idle_cpu == -1) { - /* - * If we haven't found an idle CPU yet - * pick a non-idle one that can fit the task as - * fallback. - */ - shallowest_idle_cpu = i; } } else if (shallowest_idle_cpu == -1) { load = weighted_cpuload(i); @@ -6548,7 +6252,6 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) sg = sd->groups; do { int i; - if (!cpumask_intersects(sched_group_cpus(sg), tsk_cpus_allowed(p))) goto next; @@ -6635,31 +6338,27 @@ static int cpu_util_wake(int cpu, struct task_struct *p) return (util >= capacity) ? capacity : util; } -static int start_cpu(bool prefer_idle) +static int start_cpu(bool boosted) { struct root_domain *rd = cpu_rq(smp_processor_id())->rd; - return prefer_idle ? rd->max_cap_orig_cpu : rd->min_cap_orig_cpu; + return boosted ? rd->max_cap_orig_cpu : rd->min_cap_orig_cpu; } static inline int find_best_target(struct task_struct *p, int *backup_cpu, - bool prefer_idle) + bool boosted, bool prefer_idle) { - unsigned long high_cpu_util = SCHED_CAPACITY_SCALE; - unsigned long task_util_boosted = boosted_task_util(p); + unsigned long best_idle_min_cap_orig = ULONG_MAX; + unsigned long min_util = boosted_task_util(p); unsigned long target_capacity = ULONG_MAX; unsigned long min_wake_util = ULONG_MAX; unsigned long target_max_spare_cap = 0; unsigned long best_active_util = ULONG_MAX; - unsigned long target_idle_max_spare_cap = 0; - unsigned long target_max_free_util = 0; int best_idle_cstate = INT_MAX; - bool low_util_mode = true; struct sched_domain *sd; struct sched_group *sg; int best_active_cpu = -1; int best_idle_cpu = -1; - int low_util_cpu = -1; int target_cpu = -1; int cpu, i; @@ -6668,8 +6367,8 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, schedstat_inc(p, se.statistics.nr_wakeups_fbt_attempts); schedstat_inc(this_rq(), eas_stats.fbt_attempts); - /* Find start CPU based on prefer_idle flag*/ - cpu = start_cpu(prefer_idle); + /* Find start CPU based on boost value */ + cpu = start_cpu(boosted); if (cpu < 0) { schedstat_inc(p, se.statistics.nr_wakeups_fbt_no_cpu); schedstat_inc(this_rq(), eas_stats.fbt_no_cpu); @@ -6684,59 +6383,34 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, return -1; } - /* - * Consider a CPU highly utilized when it's utilization is bigger than - * 1/4 of the maximum capacity. - */ - high_cpu_util = SCHED_CAPACITY_SCALE >> 2; - /* Scan CPUs in all SDs */ sg = sd->groups; do { for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) { unsigned long capacity_curr = capacity_curr_of(i); unsigned long capacity_orig = capacity_orig_of(i); - unsigned long wake_util, new_util, min_capped_util; + unsigned long wake_util, new_util; if (!cpu_online(i)) continue; + if (walt_cpu_high_irqload(i)) + continue; + /* * p's blocked utilization is still accounted for on prev_cpu * so prev_cpu will receive a negative bias due to the double * accounting. However, the blocked utilization may be zero. */ wake_util = cpu_util_wake(i, p); - - /* - * Keep track of overall system utilization. - * System is considered in low-utilization if the - * utilization of each (online) CPU is below a - */ - if (wake_util + task_util_boosted >= high_cpu_util) - low_util_mode = false; - - /* Skip high IRQ loaded CPUs */ - if (walt_cpu_high_irqload(i)) - continue; + new_util = wake_util + task_util(p); /* * Ensure minimum capacity to grant the required boost. * The target CPU can be already at a capacity level higher * than the one required to boost the task. */ - new_util = wake_util + task_util(p); - new_util = max(task_util_boosted, new_util); - - /* - * Include minimum capacity constraint: - * new_util contains the required utilization including - * boost. min_capped_util also takes into account a - * minimum capacity cap imposed on the CPU by external - * actors. - */ - min_capped_util = max(new_util, capacity_min_of(i)); - + new_util = max(min_util, new_util); if (new_util > capacity_orig) continue; @@ -6769,57 +6443,23 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, * tasks. */ if (prefer_idle) { + /* * Case A.1: IDLE CPU - * - * This heuristics will return: - * - the first IDLE CPU we find, if the system - * is !low_util, i.e. there is one CPU whith - * at least high_cpu_util utilization. - * - the most energy_efficient IDLE CPU, if the - * system is low_util, i.e. all the CPUs - * have less then high_cpu_util utilization. + * Return the first IDLE CPU we find. */ if (idle_cpu(i)) { + schedstat_inc(p, se.statistics.nr_wakeups_fbt_pref_idle); + schedstat_inc(this_rq(), eas_stats.fbt_pref_idle); - /* - * Keep track of first IDLE CPU and - * return that one if the system - * is not int low_util mode - */ - if (target_cpu == -1) { - target_capacity = capacity_orig; - target_cpu = i; - if (!low_util_mode) - break; - continue; - } - - /* - * Unconditionally favour first IDLE - * CPU encountered on highly utilized - * systems. - */ - if (!low_util_mode) - break; - - /* Favor CPUs with smaller capacity */ - if (capacity_orig >= target_capacity) - continue; + trace_sched_find_best_target(p, + prefer_idle, min_util, + cpu, best_idle_cpu, + best_active_cpu, i); - target_capacity = capacity_orig; - low_util_cpu = i; - continue; + return i; } - /* - * Restrict search on idle CPUs if we already - * found at least one. - */ - if (target_cpu >= 0) { - best_active_cpu = -1; - continue; - } /* * Case A.2: Target ACTIVE CPU * Favor CPUs with max spare capacity. @@ -6848,7 +6488,7 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, best_active_util = new_util; best_active_cpu = i; continue; - } + } /* * Enforce EAS mode @@ -6863,13 +6503,6 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, (capacity_orig * SCHED_CAPACITY_SCALE)) continue; - /* - * Favor CPUs with smaller capacity for Non latency - * sensitive tasks. - */ - if (capacity_orig > target_capacity) - continue; - /* * Case B) Non latency sensitive tasks on IDLE CPUs. * @@ -6897,11 +6530,8 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, if (idle_cpu(i)) { int idle_idx = idle_get_state_idx(cpu_rq(i)); - /* Favor CPUs that won't end up running at a - * high OPP. - */ - if ((capacity_orig - min_capped_util) < - target_idle_max_spare_cap) + /* Select idle CPU with lower cap_orig */ + if (capacity_orig > best_idle_min_cap_orig) continue; /* @@ -6915,9 +6545,7 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, continue; /* Keep track of best idle CPU */ - target_capacity = capacity_orig; - target_idle_max_spare_cap = capacity_orig - - min_capped_util; + best_idle_min_cap_orig = capacity_orig; best_idle_cstate = idle_idx; best_idle_cpu = i; continue; @@ -6939,72 +6567,55 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, * that CPU at an higher OPP. * * Thus, this case keep track of the CPU with the - * smallest maximum capacity, highest spare maximum - * capacity and highest free cpu utility. + * smallest maximum capacity and highest spare maximum + * capacity. */ - /* Favor CPUs with maximum spare capacity */ - if ((capacity_orig - min_capped_util) < - target_max_spare_cap) + /* Favor CPUs with smaller capacity */ + if (capacity_orig > target_capacity) continue; - /* Favor CPUs with maximum free utilization */ - if ((capacity_orig - cpu_util(i)) < target_max_free_util) + /* Favor CPUs with maximum spare capacity */ + if ((capacity_orig - new_util) < target_max_spare_cap) continue; - target_max_spare_cap = capacity_orig - min_capped_util; + target_max_spare_cap = capacity_orig - new_util; target_capacity = capacity_orig; - target_max_free_util = capacity_orig - cpu_util(i); target_cpu = i; - } + } } while (sg = sg->next, sg != sd->groups); /* - * For latency sensitive tasks, case A in the previous loop, we pick - * the best ACTIVE CPU only if we was not able to find a target IDLE - * CPU. - * The target IDLE CPU is selected depending on CPUs utilization. - * In !low_util_mode we always pick the first IDLE candidate - * encountered. Otherwise, for low utilized systems, the most energy - * efficient IDLE CPU is preferred. - */ - if (prefer_idle && !low_util_mode) { - if (target_cpu == -1) - target_cpu = best_active_cpu; - schedstat_inc(p, se.statistics.nr_wakeups_fbt_pref_idle); - schedstat_inc(this_rq(), eas_stats.fbt_pref_idle); - goto done; - } - - if (prefer_idle && low_util_mode) { - if (low_util_cpu != -1) - target_cpu = low_util_cpu; - schedstat_inc(p, se.statistics.nr_wakeups_fbt_pref_idle_lum); - schedstat_inc(this_rq(), eas_stats.fbt_pref_idle_lum); - goto done; - } - - /* - * For non latency sensitive tasks, cases B and C in the previous - * loop, we pick the best IDLE CPU only if we was not able to find a - * target ACTIVE CPU. - * Otherwise, the best IDLE CPU becomes our backup choice. + * For non latency sensitive tasks, cases B and C in the previous loop, + * we pick the best IDLE CPU only if we was not able to find a target + * ACTIVE CPU. + * + * Policies priorities: + * + * - prefer_idle tasks: + * + * a) IDLE CPU available, we return immediately + * b) ACTIVE CPU where task fits and has the bigger maximum spare + * capacity (i.e. target_cpu) + * c) ACTIVE CPU with less contention due to other tasks + * (i.e. best_active_cpu) + * + * - NON prefer_idle tasks: + * + * a) ACTIVE CPU: target_cpu + * b) IDLE CPU: best_idle_cpu */ - if (target_cpu == -1) { - target_cpu = best_idle_cpu; - schedstat_inc(p, se.statistics.nr_wakeups_fbt_best_idle); - schedstat_inc(this_rq(), eas_stats.fbt_best_idle); - goto done; - } - *backup_cpu = best_idle_cpu; - schedstat_inc(p, se.statistics.nr_wakeups_fbt_best_active); - schedstat_inc(this_rq(), eas_stats.fbt_best_active); - -done: + if (target_cpu == -1) + target_cpu = prefer_idle + ? best_active_cpu + : best_idle_cpu; + else + *backup_cpu = prefer_idle + ? best_active_cpu + : best_idle_cpu; - trace_sched_find_best_target(p, prefer_idle, task_util_boosted, cpu, - low_util_mode, low_util_cpu, + trace_sched_find_best_target(p, prefer_idle, min_util, cpu, best_idle_cpu, best_active_cpu, target_cpu); @@ -7024,34 +6635,25 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) { long min_cap, max_cap; -#ifdef CONFIG_CGROUP_SCHEDTUNE - bool boosted = schedtune_task_boost(p) > 0; - bool prefer_idle = schedtune_prefer_idle(p) > 0; -#else - bool boosted = 0; - bool prefer_idle = 0; -#endif min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu)); max_cap = cpu_rq(cpu)->rd->max_cpu_capacity.val; - /* Bring task utilization in sync with prev_cpu */ - if (!boosted && !prefer_idle) - sync_entity_load_avg(&p->se); - /* Minimum capacity is close to max, no need to abort wake_affine */ if (max_cap - min_cap < max_cap >> 3) return 0; + /* Bring task utilization in sync with prev_cpu */ + sync_entity_load_avg(&p->se); + return min_cap * 1024 < task_util(p) * capacity_margin; } static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync) { + struct sched_domain *sd; + int target_cpu = prev_cpu, tmp_target, tmp_backup; bool boosted, prefer_idle; - int target_cpu; - int backup_cpu; - int next_cpu; schedstat_inc(p, se.statistics.nr_wakeups_secb_attempts); schedstat_inc(this_rq(), eas_stats.secb_attempts); @@ -7066,6 +6668,7 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync } } + rcu_read_lock(); #ifdef CONFIG_CGROUP_SCHEDTUNE boosted = schedtune_task_boost(p) > 0; prefer_idle = schedtune_prefer_idle(p) > 0; @@ -7076,41 +6679,32 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync sync_entity_load_avg(&p->se); + sd = rcu_dereference(per_cpu(sd_ea, prev_cpu)); /* Find a cpu with sufficient capacity */ - next_cpu = find_best_target(p, &backup_cpu, prefer_idle); - if (next_cpu == -1) { - target_cpu = prev_cpu; - return target_cpu; - } + tmp_target = find_best_target(p, &tmp_backup, boosted, prefer_idle); - /* Unconditionally prefer IDLE CPUs for boosted/prefer_idle tasks */ - if ((boosted || prefer_idle) && idle_cpu(next_cpu)) { - schedstat_inc(p, se.statistics.nr_wakeups_secb_idle_bt); - schedstat_inc(this_rq(), eas_stats.secb_idle_bt); - target_cpu = next_cpu; - return target_cpu; + if (!sd) + goto unlock; + if (tmp_target >= 0) { + target_cpu = tmp_target; + if ((boosted || prefer_idle) && idle_cpu(target_cpu)) { + schedstat_inc(p, se.statistics.nr_wakeups_secb_idle_bt); + schedstat_inc(this_rq(), eas_stats.secb_idle_bt); + goto unlock; + } } - target_cpu = prev_cpu; - if (next_cpu != prev_cpu) { + if (target_cpu != prev_cpu) { int delta = 0; struct energy_env eenv = { - .p = p, .util_delta = task_util(p), - /* Task's previous CPU candidate */ - .cpu[EAS_CPU_PRV] = { - .cpu_id = prev_cpu, - }, - /* Main alternative CPU candidate */ - .cpu[EAS_CPU_NXT] = { - .cpu_id = next_cpu, - }, - /* Backup alternative CPU candidate */ - .cpu[EAS_CPU_BKP] = { - .cpu_id = backup_cpu, - }, + .src_cpu = prev_cpu, + .dst_cpu = target_cpu, + .task = p, + .trg_cpu = target_cpu, }; + #ifdef CONFIG_SCHED_WALT if (!walt_disabled && sysctl_sched_use_walt_cpu_util && p->state == TASK_WAKING) @@ -7120,27 +6714,35 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync if (__cpu_overutilized(prev_cpu, delta)) { schedstat_inc(p, se.statistics.nr_wakeups_secb_insuff_cap); schedstat_inc(this_rq(), eas_stats.secb_insuff_cap); - target_cpu = next_cpu; - return target_cpu; + goto unlock; } - /* Check if EAS_CPU_NXT is a more energy efficient CPU */ - if (select_energy_cpu_idx(&eenv) != EAS_CPU_PRV) { - schedstat_inc(p, se.statistics.nr_wakeups_secb_nrg_sav); - schedstat_inc(this_rq(), eas_stats.secb_nrg_sav); - target_cpu = eenv.cpu[eenv.next_idx].cpu_id; - return target_cpu; + if (energy_diff(&eenv) >= 0) { + /* No energy saving for target_cpu, try backup */ + target_cpu = tmp_backup; + eenv.dst_cpu = target_cpu; + eenv.trg_cpu = target_cpu; + if (tmp_backup < 0 || + tmp_backup == prev_cpu || + energy_diff(&eenv) >= 0) { + schedstat_inc(p, se.statistics.nr_wakeups_secb_no_nrg_sav); + schedstat_inc(this_rq(), eas_stats.secb_no_nrg_sav); + target_cpu = prev_cpu; + goto unlock; + } } - schedstat_inc(p, se.statistics.nr_wakeups_secb_no_nrg_sav); - schedstat_inc(this_rq(), eas_stats.secb_no_nrg_sav); - target_cpu = prev_cpu; - return target_cpu; + schedstat_inc(p, se.statistics.nr_wakeups_secb_nrg_sav); + schedstat_inc(this_rq(), eas_stats.secb_nrg_sav); + goto unlock; } schedstat_inc(p, se.statistics.nr_wakeups_secb_count); schedstat_inc(this_rq(), eas_stats.secb_count); +unlock: + rcu_read_unlock(); + return target_cpu; } @@ -7166,9 +6768,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f int want_affine = 0; int sync = wake_flags & WF_SYNC; - if (p->nr_cpus_allowed == 1) - return prev_cpu; - if (sd_flag & SD_BALANCE_WAKE) { record_wakee(p); want_affine = !wake_wide(p, sibling_count_hint) && @@ -7206,16 +6805,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f new_cpu = cpu; } -#ifdef CONFIG_HISI_EAS_SCHED - if (boot_boost || (global_boost_enabled_flag && (schedtune_task_boost(p) > 0))) { - int boost_cpu = find_global_boost_cpu(p); - if (boost_cpu != -1) { - rcu_read_unlock(); - return boost_cpu; - } - } -#endif - if (sd && !(sd_flag & SD_BALANCE_FORK)) { /* * We're going to need the task's util for capacity_spare_wake @@ -7225,7 +6814,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f sync_entity_load_avg(&p->se); } - if (!sd) { if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */ new_cpu = select_idle_sibling(p, prev_cpu, new_cpu); @@ -7233,9 +6821,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f } else { new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag); } - - -unlock: rcu_read_unlock(); return new_cpu; @@ -7493,12 +7078,14 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev) } while (cfs_rq); p = task_of(se); + #ifdef CONFIG_HW_VIP_THREAD /* * * pick vip or temp vip thread */ pick_vip_thread(rq, &p, &se); #endif + /* * Since we haven't yet done put_prev_entity and if the selected task * is a different task than we started out with, try and touch the @@ -7909,15 +7496,11 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) /* * We do not migrate tasks that are: - * 1) energy_aware is enabled and small task is not migrated to higher - * capacity CPU - * 2) throttled_lb_pair, or - * 3) cannot be migrated to this CPU due to cpus_allowed, or - * 4) running (obviously), or - * 5) are cache-hot on their current CPU. + * 1) throttled_lb_pair, or + * 2) cannot be migrated to this CPU due to cpus_allowed, or + * 3) running (obviously), or + * 4) are cache-hot on their current CPU. */ - - if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu)) return 0; @@ -8028,55 +7611,6 @@ static struct task_struct *detach_one_task(struct lb_env *env) static const unsigned int sched_nr_migrate_break = 32; -/* must hold runqueue lock for queue se is currently on */ -static struct task_struct *hisi_get_heaviest_task( - struct task_struct *p, int cpu) -{ - int num_tasks = 5; - struct sched_entity *se = &p->se; - unsigned long int max_util = task_util(p), max_preferred_util= 0, util; - struct task_struct *tsk, *max_preferred_tsk = NULL, *max_util_task = p; - - /* The currently running task is not on the runqueue */ - se = __pick_first_entity(cfs_rq_of(se)); - - while (num_tasks && se) { - if (!entity_is_task(se)) { - se = __pick_next_entity(se); - num_tasks--; - continue; - } - - tsk = task_of(se); - util = boosted_task_util(tsk); -#ifdef CONFIG_CGROUP_SCHEDTUNE - bool boosted = schedtune_task_boost(tsk) > 0; - bool prefer_idle = schedtune_prefer_idle(tsk) > 0; -#else - bool boosted = 0; - bool prefer_idle = 0; -#endif - - if (cpumask_test_cpu(cpu, tsk_cpus_allowed(tsk))) { - if (boosted || prefer_idle) { - if (util > max_preferred_util) { - max_preferred_util = util;; - max_preferred_tsk = tsk; - } - } else { - if (util > max_util) { - max_util = util; - max_util_task = tsk; - } - } - } - - se = __pick_next_entity(se); - num_tasks--; - } - - return max_preferred_tsk ? max_preferred_tsk : max_util_task; -} /* * detach_tasks() -- tries to detach up to imbalance weighted load from * busiest_rq, as part of a balancing operation within domain "sd". @@ -8117,25 +7651,6 @@ static int detach_tasks(struct lb_env *env) break; } -#ifdef CONFIG_HISI_EAS_SCHED - if (energy_aware() && - (capacity_orig_of(env->dst_cpu) > capacity_orig_of(env->src_cpu))) { - p = hisi_get_heaviest_task(p, env->dst_cpu); - -#ifdef CONFIG_CGROUP_SCHEDTUNE - bool boosted = schedtune_task_boost(p) > 0; - bool prefer_idle = schedtune_prefer_idle(p) > 0; -#else - bool boosted = 0; - bool prefer_idle = 0; -#endif - if (!boosted && !prefer_idle && - task_util(p) * 100 < capacity_orig_of(env->src_cpu) * up_migration_util_filter) - goto next; - - } -#endif - if (!can_migrate_task(p, env)) goto next; @@ -8358,7 +7873,6 @@ struct sd_lb_stats { struct sched_group *local; /* Local group in this sd */ unsigned long total_load; /* Total load of all groups in sd */ unsigned long total_capacity; /* Total capacity of all groups in sd */ - unsigned long total_util; /* Total util of all groups in sd */ unsigned long avg_load; /* Average load across all groups in sd */ struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */ @@ -8378,7 +7892,6 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds) .local = NULL, .total_load = 0UL, .total_capacity = 0UL, - .total_util = 0UL, .busiest_stat = { .avg_load = 0UL, .sum_nr_running = 0, @@ -8468,9 +7981,6 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu) cpu_rq(cpu)->cpu_capacity_orig = capacity; - capacity *= arch_scale_max_freq_capacity(sd, cpu); - capacity >>= SCHED_CAPACITY_SHIFT; - mcc = &cpu_rq(cpu)->rd->max_cpu_capacity; raw_spin_lock_irqsave(&mcc->lock, flags); @@ -8676,7 +8186,8 @@ group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs) static inline bool group_smaller_cpu_capacity(struct sched_group *sg, struct sched_group *ref) { - return sg->sgc->max_capacity < ref->sgc->max_capacity; + return sg->sgc->max_capacity + capacity_margin - SCHED_LOAD_SCALE < + ref->sgc->max_capacity; } static inline enum @@ -8740,7 +8251,7 @@ static inline void update_cpu_stats_if_tickless(struct rq *rq) { } static inline void update_sg_lb_stats(struct lb_env *env, struct sched_group *group, int load_idx, int local_group, struct sg_lb_stats *sgs, - bool *overload, bool *overutilized, bool *misfit_task) + bool *overload, bool *overutilized) { unsigned long load; int i, nr_running; @@ -8778,23 +8289,11 @@ static inline void update_sg_lb_stats(struct lb_env *env, /* * No need to call idle_cpu() if nr_running is not 0 */ - if (!nr_running && idle_cpu(i)) { + if (!nr_running && idle_cpu(i)) sgs->idle_cpus++; - /* update idle CPU blocked load */ - if (cpu_util(i)) - update_blocked_averages(i); - } - - if (cpu_overutilized(i) && !idle_cpu(i)) { + if (cpu_overutilized(i)) { *overutilized = true; - /* - * If the cpu is overutilized and if there is only one - * current task in cfs runqueue, it is potentially a misfit - * task. - */ - if (rq->cfs.h_nr_running == 1) - *misfit_task = true; if (!sgs->group_misfit_task && rq->misfit_task) sgs->group_misfit_task = capacity_of(i); } @@ -8924,11 +8423,11 @@ static inline enum fbq_type fbq_classify_rq(struct rq *rq) */ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sds) { - struct sched_domain *child = env->sd->child, *sd; + struct sched_domain *child = env->sd->child; struct sched_group *sg = env->sd->groups; struct sg_lb_stats tmp_sgs; int load_idx, prefer_sibling = 0; - bool overload = false, overutilized = false, misfit_task = false; + bool overload = false, overutilized = false; if (child && child->flags & SD_PREFER_SIBLING) prefer_sibling = 1; @@ -8950,8 +8449,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd } update_sg_lb_stats(env, sg, load_idx, local_group, sgs, - &overload, &overutilized, - &misfit_task); + &overload, &overutilized); if (local_group) goto next_group; @@ -8991,7 +8489,6 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd /* Now, start updating sd_lb_stats */ sds->total_load += sgs->group_load; sds->total_capacity += sgs->group_capacity; - sds->total_util += sgs->group_util; sg = sg->next; } while (sg != env->sd->groups); @@ -9005,48 +8502,19 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd /* update overload indicator if we are at root domain */ if (env->dst_rq->rd->overload != overload) env->dst_rq->rd->overload = overload; - } - if (overutilized) - set_sd_overutilized(env->sd); - else - clear_sd_overutilized(env->sd); - - /* - * If there is a misfit task in one cpu in this sched_domain - * it is likely that the imbalance cannot be sorted out among - * the cpu's in this sched_domain. In this case set the - * overutilized flag at the parent sched_domain. - */ - if (misfit_task) { - - sd = env->sd->parent; - /* - * In case of a misfit task, load balance at the parent - * sched domain level will make sense only if the the cpus - * have a different capacity. If cpus at a domain level have - * the same capacity, the misfit task cannot be well - * accomodated in any of the cpus and there in no point in - * trying a load balance at this level - */ - while (sd) { - if (sd->flags & SD_ASYM_CPUCAPACITY) { - set_sd_overutilized(sd); - break; - } - sd = sd->parent; + /* Update over-utilization (tipping point, U >= 0) indicator */ + if (env->dst_rq->rd->overutilized != overutilized) { + env->dst_rq->rd->overutilized = overutilized; + trace_sched_overutilized(overutilized); + } + } else { + if (!env->dst_rq->rd->overutilized && overutilized) { + env->dst_rq->rd->overutilized = true; + trace_sched_overutilized(true); } } - /* If the domain util is greater that domain capacity, load balancing - * needs to be done at the next sched domain level as well - */ -#ifdef CONFIG_HISI_EAS_SCHED - if (sds->total_capacity * 1024 < sds->total_util * sd_capacity_margin) -#else - if (sds->total_capacity * 1024 < sds->total_util * capacity_margin) -#endif - set_sd_overutilized(env->sd->parent); } /** @@ -9111,12 +8579,6 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) local = &sds->local_stat; busiest = &sds->busiest_stat; - if (busiest->sum_nr_running >= busiest->group_weight && - local->sum_nr_running < local->group_weight) { - env->imbalance = busiest->load_per_task; - return; - } - if (!local->sum_nr_running) local->load_per_task = cpu_avg_load_per_task(env->dst_cpu); else if (busiest->load_per_task > local->load_per_task) @@ -9295,10 +8757,8 @@ static struct sched_group *find_busiest_group(struct lb_env *env) */ update_sd_lb_stats(env, &sds); - if (energy_aware() && ((env->sd->flags & SD_ASYM_CPUCAPACITY) || env->idle == CPU_NOT_IDLE)) { - if (!is_sd_overutilized(env->sd)) - goto out_balanced; - } + if (energy_aware() && !env->dst_rq->rd->overutilized) + goto out_balanced; local = &sds.local_stat; busiest = &sds.busiest_stat; @@ -9358,22 +8818,10 @@ static struct sched_group *find_busiest_group(struct lb_env *env) * significant if the diff is greater than 1 otherwise we * might end up to just move the imbalance on another group */ -#ifdef CONFIG_HISI_EAS_SCHED if ((busiest->group_type != group_overloaded) && (local->idle_cpus <= (busiest->idle_cpus + 1)) && !group_smaller_cpu_capacity(sds.busiest, sds.local)) goto out_balanced; -#else - /* busiest->group_type is group_other, if the CPU is in the same frequency doamin - * then the load balance will be aborted. - * - * CPU 4 have a lot of threads but the CPU util is low, the group_type will be assiged - * to group_other. NOHZ idle balance will be needed to spread tasks out. - */ - if ((local->idle_cpus <= (busiest->idle_cpus + 1)) && - busiest->sum_nr_running <= busiest->group_weight) - goto out_balanced; -#endif } else { /* * In the CPU_NEWLY_IDLE, CPU_NOT_IDLE cases, use @@ -9506,14 +8954,11 @@ static int need_active_balance(struct lb_env *env) return 1; } - if ((capacity_orig_of(env->src_cpu) < capacity_orig_of(env->dst_cpu)) && - env->src_rq->cfs.h_nr_running == 1 && - cpu_overutilized(env->src_cpu)) { - - if (idle_cpu(env->dst_cpu)) - return 1; - - if (!idle_cpu(env->dst_cpu) && !cpu_overutilized(env->dst_cpu)) + if ((capacity_of(env->src_cpu) < capacity_of(env->dst_cpu)) && + ((capacity_orig_of(env->src_cpu) < capacity_orig_of(env->dst_cpu))) && + env->src_rq->cfs.h_nr_running == 1 && + cpu_overutilized(env->src_cpu) && + !cpu_overutilized(env->dst_cpu)) { return 1; } @@ -10023,9 +9468,8 @@ static int active_load_balance_cpu_stop(void *data) update_rq_clock(busiest_rq); p = detach_one_task(&env); - if (p) { + if (p) schedstat_inc(sd, alb_pushed); - } else schedstat_inc(sd, alb_failed); } @@ -10064,109 +9508,9 @@ static inline int on_null_domain(struct rq *rq) * needed, they will kick the idle load balancer, which then does idle * load balancing for all the idle CPUs. */ - -#ifdef CONFIG_HISI_EAS_SCHED -/* - * Reset balance_interval at all sched_domain levels of given cpu, so that it - * honors kick. - */ -static inline void reset_balance_interval(int cpu) -{ - struct sched_domain *sd; - - if (cpu >= nr_cpu_ids) - return; - - rcu_read_lock(); - for_each_domain(cpu, sd) - sd->balance_interval = 0; - rcu_read_unlock(); -} - -#define NOHZ_KICK_ANY 0 -#define NOHZ_KICK_RESTRICT 1 -#define NOHZ_KICK_BOOST 2 - -static inline int nohz_kick_type(int call_cpu, struct sched_domain *sd) -{ - int type = NOHZ_KICK_ANY; - int i; - - if (hisi_test_fast_cpu(call_cpu)) - return NOHZ_KICK_ANY; - - if (energy_aware() && cpu_rq(call_cpu)->misfit_task) { - type = NOHZ_KICK_ANY; - } else if (!is_sd_overutilized(sd) && !cpu_overutilized(call_cpu)) { - type = NOHZ_KICK_RESTRICT; - } else { - for_each_cpu(i, sched_domain_span(sd)) { - - if (cpu_util(i) * sd_capacity_margin < capacity_orig_of(i) * 1024) { - /* Change the kick type to limit to CPUs that - * are of equal or lower capacity. - */ - type = NOHZ_KICK_RESTRICT; - break; - } - } - } - - return type; -} - -static inline int hisi_find_new_ilb(void) -{ - struct sched_domain *sd; - int call_cpu = smp_processor_id(); - int type = NOHZ_KICK_ANY; - int ilb = nr_cpu_ids; - bool ilb_found = false; - - rcu_read_lock(); - - sd = rcu_dereference_check_sched_domain(cpu_rq(call_cpu)->sd); - if (!sd) { - rcu_read_unlock(); - return nr_cpu_ids; - } - - type = nohz_kick_type(call_cpu, sd); - - for_each_domain(call_cpu, sd) { - for_each_cpu_and(ilb, nohz.idle_cpus_mask, sched_domain_span(sd)) { - if (idle_cpu(ilb)) { - bool is_bigger_cpu = capacity_orig_of(ilb) > capacity_orig_of(call_cpu); - - if ((type == NOHZ_KICK_ANY) || - (type == NOHZ_KICK_BOOST && is_bigger_cpu) || - (type == NOHZ_KICK_RESTRICT && !is_bigger_cpu)) { - ilb_found = true; - break; - } - - } - } - - if (ilb_found) - break; - } - - rcu_read_unlock(); - - reset_balance_interval(ilb); - - return ilb; -} -#endif - static inline int find_new_ilb(void) { -#ifdef CONFIG_HISI_EAS_SCHED - int ilb = hisi_find_new_ilb(); -#else int ilb = cpumask_first(nohz.idle_cpus_mask); -#endif if (ilb < nr_cpu_ids && idle_cpu(ilb)) return ilb; @@ -10322,11 +9666,6 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) rcu_read_lock(); for_each_domain(cpu, sd) { - if (energy_aware() && ((sd->flags & SD_ASYM_CPUCAPACITY) || idle == CPU_NOT_IDLE)) { - if (!is_sd_overutilized(sd)) - continue; - } - /* * Decay the newidle max times here because this is a regular * visit to all the domains. Decay ~1% per second. @@ -10511,20 +9850,11 @@ static inline bool nohz_kick_needed(struct rq *rq) if (likely(!atomic_read(&nohz.nr_cpus))) return false; -#ifdef CONFIG_HISI_EAS_SCHED - if (rq->nr_running >= 2) - return true; -#endif - if (time_before(now, nohz.next_balance)) return false; if (rq->nr_running >= 2 && - (!energy_aware() || cpu_overutilized(cpu))) - return true; - - /* Do idle load balance if there have misfit task */ - if (energy_aware() && rq->misfit_task) + (!energy_aware() || cpu_overutilized(cpu))) return true; /* Do idle load balance if there have misfit task */ @@ -10672,7 +10002,6 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) { struct cfs_rq *cfs_rq; struct sched_entity *se = &curr->se; - struct sched_domain *sd; for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); @@ -10683,12 +10012,10 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) task_tick_numa(rq, curr); #ifdef CONFIG_SMP - rcu_read_lock(); - sd = rcu_dereference(rq->sd); - if (!is_sd_overutilized(sd) && - cpu_overutilized(task_cpu(curr))) - set_sd_overutilized(sd); - rcu_read_unlock(); + if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr))) { + rq->rd->overutilized = true; + trace_sched_overutilized(true); + } rq->misfit_task = !task_fits_max(curr, rq->cpu); #endif @@ -11227,154 +10554,3 @@ __init void init_sched_fair_class(void) #endif /* SMP */ } - -#ifdef CONFIG_HISI_EAS_SCHED -#define EAS_DATA_SYSFS_MAX 5 - -struct eas_global_attr { - struct attribute attr; - ssize_t (*show)(struct kobject *kobj, - struct attribute *attr, char *buf); - ssize_t (*store)(struct kobject *a, struct attribute *b, - const char *c, size_t count); - int *value; - int (*to_sysfs)(int); - int (*from_sysfs)(int); - ssize_t (*to_sysfs_text)(char *buf, int buf_size); -}; - -struct eas_data_struct { - int multiplier; /* used to scale the time delta */ - struct attribute_group attr_group; - struct attribute *attributes[EAS_DATA_SYSFS_MAX + 1]; - struct eas_global_attr attr[EAS_DATA_SYSFS_MAX]; -} eas_data; - -static ssize_t eas_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct eas_global_attr *eas_attr = - container_of(attr, struct eas_global_attr, attr); - int temp; - - if (eas_attr->to_sysfs_text != NULL) - return eas_attr->to_sysfs_text(buf, PAGE_SIZE); - - temp = *(eas_attr->value); - if (eas_attr->to_sysfs != NULL) - temp = eas_attr->to_sysfs(temp); - - return (ssize_t)sprintf(buf, "%d\n", temp); -} - -static ssize_t eas_store(struct kobject *a, struct attribute *attr, - const char *buf, size_t count) -{ - int temp; - ssize_t ret = count; - struct eas_global_attr *eas_attr = - container_of(attr, struct eas_global_attr, attr); - char *str = vmalloc(count + 1); - - if (str == NULL) - return -ENOMEM; - - memcpy(str, buf, count); - str[count] = 0; - if (sscanf(str, "%d", &temp) < 1) - ret = -EINVAL; - else { - if (eas_attr->from_sysfs != NULL) - temp = eas_attr->from_sysfs(temp); - if (temp < 0) - ret = -EINVAL; - else - *(eas_attr->value) = temp; - } - - /* trace the name and value of the attribute */ - trace_eas_attr_store(attr->name, temp); - vfree(str); - return ret; -} - -static void eas_attr_add( - const char *name, - int *value, - int (*to_sysfs)(int), - int (*from_sysfs)(int), - ssize_t (*to_sysfs_text)(char *, int), - umode_t mode) -{ - int i = 0; - - while (eas_data.attributes[i] != NULL) { - i++; - if (i >= EAS_DATA_SYSFS_MAX) - return; - } - if (mode) - eas_data.attr[i].attr.mode = mode; - else - eas_data.attr[i].attr.mode = 0644; - eas_data.attr[i].show = eas_show; - eas_data.attr[i].store = eas_store; - eas_data.attr[i].attr.name = name; - eas_data.attr[i].value = value; - eas_data.attr[i].to_sysfs = to_sysfs; - eas_data.attr[i].from_sysfs = from_sysfs; - eas_data.attr[i].to_sysfs_text = to_sysfs_text; - eas_data.attributes[i] = &eas_data.attr[i].attr; - eas_data.attributes[i + 1] = NULL; -} - -static int eas_attr_init(void) -{ - int ret; - - memset(&eas_data, 0, sizeof(eas_data)); - - eas_attr_add("boost", - &global_boost_enabled_flag, - NULL, - NULL, - NULL, - 0644); - - eas_attr_add("up_migration_util_filter", - &up_migration_util_filter, - NULL, - NULL, - NULL, - 0644); - - eas_attr_add("sd_capacity_margin", - &sd_capacity_margin, - NULL, - NULL, - NULL, - 0644); - - eas_attr_add("capacity_margin", - &capacity_margin, - NULL, - NULL, - NULL, - 0644); - - eas_attr_add("boot_boost", - &boot_boost, - NULL, - NULL, - NULL, - 0644); - - eas_data.attr_group.name = "eas"; - eas_data.attr_group.attrs = eas_data.attributes; - ret = sysfs_create_group(kernel_kobj, - &eas_data.attr_group); - - return 0; -} -late_initcall(eas_attr_init); -#endif diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 788c0b9ad..03863fe67 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -73,30 +73,9 @@ SCHED_FEAT(ATTACH_AGE_LOAD, true) * Energy aware scheduling. Use platform energy model to guide scheduling * decisions optimizing for energy efficiency. */ +#ifdef CONFIG_DEFAULT_USE_ENERGY_AWARE SCHED_FEAT(ENERGY_AWARE, true) -/* - * Minimum capacity capping. Keep track of minimum capacity factor when - * minimum frequency available to a policy is modified. - * If enabled, this can be used to inform the scheduler about capacity - * restrictions. - */ -SCHED_FEAT(MIN_CAPACITY_CAPPING, true) - -/* - * Enforce the priority of candidates selected by find_best_target() - * ON: If the target CPU saves any energy, use that. - * OFF: Use whichever of target or backup saves most. - */ -SCHED_FEAT(FBT_STRICT_ORDER, false) - -/* - * Apply schedtune boost hold to tasks of all sched classes. - * If enabled, schedtune will hold the boost applied to a CPU - * for 50ms regardless of task activation - if the task is - * still running 50ms later, the boost hold expires and schedtune - * boost will expire immediately the task stops. - * If disabled, this behaviour will only apply to tasks of the - * RT class. - */ -SCHED_FEAT(SCHEDTUNE_BOOST_HOLD_ALL, true) +#else +SCHED_FEAT(ENERGY_AWARE, false) +#endif diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c index 8ec471cb8..dfeedd9fa 100644 --- a/kernel/sched/loadavg.c +++ b/kernel/sched/loadavg.c @@ -168,7 +168,7 @@ static inline int calc_load_write_idx(void) * If the folding window started, make sure we start writing in the * next idle-delta. */ - if (!time_before(jiffies, READ_ONCE(calc_load_update))) + if (!time_before(jiffies, calc_load_update)) idx++; return idx & 1; @@ -203,7 +203,7 @@ void calc_load_exit_idle(void) /* * If we're still before the pending sample window, we're done. */ - this_rq->calc_load_update = READ_ONCE(calc_load_update); + this_rq->calc_load_update = calc_load_update; if (time_before(jiffies, this_rq->calc_load_update)) return; @@ -307,15 +307,14 @@ calc_load_n(unsigned long load, unsigned long exp, */ static void calc_global_nohz(void) { - unsigned long sample_window; long delta, active, n; - sample_window = READ_ONCE(calc_load_update); - if (!time_before(jiffies, sample_window + 10)) { + if (!time_before(jiffies, calc_load_update + 10)) { + /* * Catch-up, fold however many we are behind still */ - delta = jiffies - sample_window - 10; + delta = jiffies - calc_load_update - 10; n = 1 + (delta / LOAD_FREQ); active = atomic_long_read(&calc_load_tasks); @@ -325,7 +324,7 @@ static void calc_global_nohz(void) avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); - WRITE_ONCE(calc_load_update, sample_window + n * LOAD_FREQ); + calc_load_update += n * LOAD_FREQ; } /* @@ -353,11 +352,9 @@ static inline void calc_global_nohz(void) { } */ void calc_global_load(unsigned long ticks) { - unsigned long sample_window; long active, delta; - sample_window = READ_ONCE(calc_load_update); - if (time_before(jiffies, sample_window + 10)) + if (time_before(jiffies, calc_load_update + 10)) return; /* @@ -374,7 +371,7 @@ void calc_global_load(unsigned long ticks) avenrun[1] = calc_load(avenrun[1], EXP_5, active); avenrun[2] = calc_load(avenrun[2], EXP_15, active); - WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ); + calc_load_update += LOAD_FREQ; /* * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk. diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 41926fd70..ff2623b69 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1368,10 +1368,6 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) { struct sched_rt_entity *rt_se = &p->rt; -#ifdef CONFIG_SMP - schedtune_enqueue_task(p, cpu_of(rq)); -#endif - if (flags & ENQUEUE_WAKEUP) rt_se->timeout = 0; @@ -1413,10 +1409,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) { struct sched_rt_entity *rt_se = &p->rt; -#ifdef CONFIG_SMP - schedtune_dequeue_task(p, cpu_of(rq)); -#endif - update_curr_rt(rq); dequeue_rt_entity(rt_se); walt_dec_cumulative_runnable_avg(rq, p); @@ -1868,7 +1860,6 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) !cpumask_test_cpu(lowest_rq->cpu, tsk_cpus_allowed(task)) || task_running(rq, task) || - !rt_task(task) || !task_on_rq_queued(task))) { double_unlock_balance(rq, lowest_rq); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index faf51786c..4c34fb034 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -748,8 +748,10 @@ struct rq { unsigned int ttwu_count; unsigned int ttwu_local; +#ifdef CONFIG_SMP struct eas_stats eas_stats; #endif +#endif #ifdef CONFIG_SMP struct llist_head wake_list; @@ -1047,7 +1049,11 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) * per-task data have been completed by this moment. */ smp_wmb(); +#ifdef CONFIG_THREAD_INFO_IN_TASK + p->cpu = cpu; +#else task_thread_info(p)->cpu = cpu; +#endif p->wake_cpu = cpu; #endif } @@ -1251,7 +1257,6 @@ static const u32 prio_to_wmult[40] = { #define DEQUEUE_SLEEP 0x01 #define DEQUEUE_SAVE 0x02 -#define DEQUEUE_IDLE 0x80 /* The last dequeue before IDLE */ #define RETRY_TASK ((void *)-1UL) diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 046a3deb2..6d74a7c77 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -12,6 +12,7 @@ */ #define SCHEDSTAT_VERSION 15 +#ifdef CONFIG_SMP static inline void show_easstat(struct seq_file *seq, struct eas_stats *stats) { /* eas-specific runqueue stats */ @@ -24,15 +25,14 @@ static inline void show_easstat(struct seq_file *seq, struct eas_stats *stats) stats->secb_insuff_cap, stats->secb_no_nrg_sav, stats->secb_nrg_sav, stats->secb_count); - seq_printf(seq, "%llu %llu %llu %llu %llu %llu %llu %llu ", + seq_printf(seq, "%llu %llu %llu %llu %llu ", stats->fbt_attempts, stats->fbt_no_cpu, stats->fbt_no_sd, - stats->fbt_pref_idle, stats->fbt_pref_idle_lum, - stats->fbt_best_active, stats->fbt_best_idle, - stats->fbt_count); + stats->fbt_pref_idle, stats->fbt_count); seq_printf(seq, "%llu %llu\n", stats->cas_attempts, stats->cas_count); } +#endif static int show_schedstat(struct seq_file *seq, void *v) { @@ -61,8 +61,9 @@ static int show_schedstat(struct seq_file *seq, void *v) seq_printf(seq, "\n"); - show_easstat(seq, &rq->eas_stats); #ifdef CONFIG_SMP + show_easstat(seq, &rq->eas_stats); + /* domain-specific stats */ rcu_read_lock(); for_each_domain(cpu, sd) { diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c deleted file mode 100644 index 9c2da06a8..000000000 --- a/kernel/sched/swait.c +++ /dev/null @@ -1,117 +0,0 @@ -#include -#include - -void __init_swait_queue_head(struct swait_queue_head *q, const char *name, - struct lock_class_key *key) -{ - raw_spin_lock_init(&q->lock); - lockdep_set_class_and_name(&q->lock, key, name); - INIT_LIST_HEAD(&q->task_list); -} -EXPORT_SYMBOL(__init_swait_queue_head); - -/* - * The thing about the wake_up_state() return value; I think we can ignore it. - * - * If for some reason it would return 0, that means the previously waiting - * task is already running, so it will observe condition true (or has already). - */ -void swake_up_locked(struct swait_queue_head *q) -{ - struct swait_queue *curr; - - if (list_empty(&q->task_list)) - return; - - curr = list_first_entry(&q->task_list, typeof(*curr), task_list); - wake_up_process(curr->task); - list_del_init(&curr->task_list); -} -EXPORT_SYMBOL(swake_up_locked); - -void swake_up(struct swait_queue_head *q) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&q->lock, flags); - swake_up_locked(q); - raw_spin_unlock_irqrestore(&q->lock, flags); -} -EXPORT_SYMBOL(swake_up); - -/* - * Does not allow usage from IRQ disabled, since we must be able to - * release IRQs to guarantee bounded hold time. - */ -void swake_up_all(struct swait_queue_head *q) -{ - struct swait_queue *curr; - LIST_HEAD(tmp); - - raw_spin_lock_irq(&q->lock); - list_splice_init(&q->task_list, &tmp); - while (!list_empty(&tmp)) { - curr = list_first_entry(&tmp, typeof(*curr), task_list); - - wake_up_state(curr->task, TASK_NORMAL); - list_del_init(&curr->task_list); - - if (list_empty(&tmp)) - break; - - raw_spin_unlock_irq(&q->lock); - raw_spin_lock_irq(&q->lock); - } - raw_spin_unlock_irq(&q->lock); -} -EXPORT_SYMBOL(swake_up_all); - -void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait) -{ - wait->task = current; - if (list_empty(&wait->task_list)) - list_add(&wait->task_list, &q->task_list); -} - -void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&q->lock, flags); - __prepare_to_swait(q, wait); - set_current_state(state); - raw_spin_unlock_irqrestore(&q->lock, flags); -} -EXPORT_SYMBOL(prepare_to_swait); - -long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state) -{ - if (signal_pending_state(state, current)) - return -ERESTARTSYS; - - prepare_to_swait(q, wait, state); - - return 0; -} -EXPORT_SYMBOL(prepare_to_swait_event); - -void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait) -{ - __set_current_state(TASK_RUNNING); - if (!list_empty(&wait->task_list)) - list_del_init(&wait->task_list); -} - -void finish_swait(struct swait_queue_head *q, struct swait_queue *wait) -{ - unsigned long flags; - - __set_current_state(TASK_RUNNING); - - if (!list_empty_careful(&wait->task_list)) { - raw_spin_lock_irqsave(&q->lock, flags); - list_del_init(&wait->task_list); - raw_spin_unlock_irqrestore(&q->lock, flags); - } -} -EXPORT_SYMBOL(finish_swait); diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 4b270bd7e..ba4cd9f23 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -17,17 +17,8 @@ bool schedtune_initialized = false; unsigned int sysctl_sched_cfs_boost __read_mostly; -/* We hold schedtune boost in effect for at least this long */ -#define SCHEDTUNE_BOOST_HOLD_NS 50000000ULL - extern struct reciprocal_value schedtune_spc_rdiv; -struct target_nrg schedtune_target_nrg; - -#ifdef CONFIG_DYNAMIC_STUNE_BOOST -static DEFINE_MUTEX(stune_boost_mutex); -static struct schedtune *getSchedtune(char *st_name); -static int dynamic_boost_write(struct schedtune *st, int boost); -#endif /* CONFIG_DYNAMIC_STUNE_BOOST */ +extern struct target_nrg schedtune_target_nrg; /* Performance Boost region (B) threshold params */ static int perf_boost_idx; @@ -117,64 +108,6 @@ __schedtune_accept_deltas(int nrg_delta, int cap_delta, /* * EAS scheduler tunables for task groups. - * - * When CGroup support is enabled, we have to synchronize two different - * paths: - * - slow path: where CGroups are created/updated/removed - * - fast path: where tasks in a CGroups are accounted - * - * The slow path tracks (a limited number of) CGroups and maps each on a - * "boost_group" index. The fastpath accounts tasks currently RUNNABLE on each - * "boost_group". - * - * Once a new CGroup is created, a boost group idx is assigned and the - * corresponding "boost_group" marked as valid on each CPU. - * Once a CGroup is release, the corresponding "boost_group" is marked as - * invalid on each CPU. The CPU boost value (boost_max) is aggregated by - * considering only valid boost_groups with a non null tasks counter. - * - * .:: Locking strategy - * - * The fast path uses a spin lock for each CPU boost_group which protects the - * tasks counter. - * - * The "valid" and "boost" values of each CPU boost_group is instead - * protected by the RCU lock provided by the CGroups callbacks. Thus, only the - * slow path can access and modify the boost_group attribtues of each CPU. - * The fast path will catch up the most updated values at the next scheduling - * event (i.e. enqueue/dequeue). - * - * | - * SLOW PATH | FAST PATH - * CGroup add/update/remove | Scheduler enqueue/dequeue events - * | - * | - * | DEFINE_PER_CPU(struct boost_groups) - * | +--------------+----+---+----+----+ - * | | idle | | | | | - * | | boost_max | | | | | - * | +---->lock | | | | | - * struct schedtune allocated_groups | | | group[ ] | | | | | - * +------------------------------+ +-------+ | | +--+---------+-+----+---+----+----+ - * | idx | | | | | | valid | - * | boots / prefer_idle | | | | | | boost | - * | perf_{boost/constraints}_idx | <---------+(*) | | | | tasks | <------------+ - * | css | +-------+ | | +---------+ | - * +-+----------------------------+ | | | | | | | - * ^ | | | | | | | - * | +-------+ | | +---------+ | - * | | | | | | | | - * | | | | | | | | - * | +-------+ | | +---------+ | - * | zmalloc | | | | | | | - * | | | | | | | | - * | +-------+ | | +---------+ | - * + BOOSTGROUPS_COUNT | | BOOSTGROUPS_COUNT | - * schedtune_boostgroup_init() | + | - * | schedtune_{en,de}queue_task() | - * | + - * | schedtune_tasks_update() - * | */ /* SchdTune tunables for a group of tasks */ @@ -198,13 +131,6 @@ struct schedtune { * towards idle CPUs */ int prefer_idle; -#ifdef CONFIG_DYNAMIC_STUNE_BOOST - /* - * This tracks the default boost value and is used to restore - * the value when Dynamic SchedTune Boost is reset. - */ - int boost_default; -#endif /* CONFIG_DYNAMIC_STUNE_BOOST */ }; static inline struct schedtune *css_st(struct cgroup_subsys_state *css) @@ -237,9 +163,6 @@ root_schedtune = { .perf_boost_idx = 0, .perf_constrain_idx = 0, .prefer_idle = 0, -#ifdef CONFIG_DYNAMIC_STUNE_BOOST - .boost_default = 0, -#endif /* CONFIG_DYNAMIC_STUNE_BOOST */ }; int @@ -251,14 +174,13 @@ schedtune_accept_deltas(int nrg_delta, int cap_delta, int perf_constrain_idx; /* Optimal (O) region */ - if (nrg_delta <= 0 && cap_delta >= 0) { + if (nrg_delta < 0 && cap_delta > 0) { trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, 1, 0); return INT_MAX; } /* Suboptimal (S) region */ - if ((nrg_delta >= 0 && cap_delta < 0) || - (nrg_delta > 0 && cap_delta <= 0)) { + if (nrg_delta > 0 && cap_delta < 0) { trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, -1, 5); return -INT_MAX; } @@ -285,7 +207,7 @@ schedtune_accept_deltas(int nrg_delta, int cap_delta, * implementation especially for the computation of the per-CPU boost * value */ -#define BOOSTGROUPS_COUNT 8 +#define BOOSTGROUPS_COUNT 5 /* Array of configured boostgroups */ static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = { @@ -303,17 +225,13 @@ static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = { */ struct boost_groups { /* Maximum boost value for all RUNNABLE tasks on a CPU */ + bool idle; int boost_max; - u64 boost_ts; struct { - /* True when this boost group maps an actual cgroup */ - bool valid; /* The boost for tasks on that boost group */ int boost; /* Count of RUNNABLE tasks on that boost group */ unsigned tasks; - /* Timestamp of boost activation */ - u64 ts; } group[BOOSTGROUPS_COUNT]; /* CPU's boost group locking */ raw_spinlock_t lock; @@ -322,57 +240,35 @@ struct boost_groups { /* Boost groups affecting each CPU in the system */ DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups); -static inline bool schedtune_boost_timeout(u64 now, u64 ts) -{ - return ((now - ts) > SCHEDTUNE_BOOST_HOLD_NS); -} - -static inline bool -schedtune_boost_group_active(int idx, struct boost_groups* bg, u64 now) -{ - if (bg->group[idx].tasks) - return true; - - return !schedtune_boost_timeout(now, bg->group[idx].ts); -} - static void -schedtune_cpu_update(int cpu, u64 now) +schedtune_cpu_update(int cpu) { struct boost_groups *bg; - u64 boost_ts = now; - int boost_max = INT_MIN; + int boost_max; int idx; bg = &per_cpu(cpu_boost_groups, cpu); - for (idx = 0; idx < BOOSTGROUPS_COUNT; ++idx) { - - /* Ignore non boostgroups not mapping a cgroup */ - if (!bg->group[idx].valid) - continue; + /* The root boost group is always active */ + boost_max = bg->group[0].boost; + for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) { /* * A boost group affects a CPU only if it has * RUNNABLE tasks on that CPU or it has hold * in effect from a previous task. */ - if (!schedtune_boost_group_active(idx, bg, now)) - continue; - - /* this boost group is active */ - if (boost_max > bg->group[idx].boost) + if (bg->group[idx].tasks == 0) continue; - boost_max = bg->group[idx].boost; - boost_ts = bg->group[idx].ts; + boost_max = max(boost_max, bg->group[idx].boost); } - /* If there are no active boost groups on the CPU, set no boost */ - if (boost_max == INT_MIN) - boost_max = 0; + /* Ensures boost_max is non-negative when all cgroup boost values + * are neagtive. Avoids under-accounting of cpu capacity which may cause + * task stacking and frequency spikes.*/ + boost_max = max(boost_max, 0); bg->boost_max = boost_max; - bg->boost_ts = boost_ts; } static int @@ -382,15 +278,11 @@ schedtune_boostgroup_update(int idx, int boost) int cur_boost_max; int old_boost; int cpu; - u64 now; /* Update per CPU boost groups */ for_each_possible_cpu(cpu) { bg = &per_cpu(cpu_boost_groups, cpu); - /* CGroups are never associated to non active cgroups */ - BUG_ON(!bg->group[idx].valid); - /* * Keep track of current boost values to compute the per CPU * maximum only when it has been affected by the new value of @@ -402,14 +294,9 @@ schedtune_boostgroup_update(int idx, int boost) /* Update the boost value of this boost group */ bg->group[idx].boost = boost; - now = sched_clock_cpu(cpu); - /* - * Check if this update increase current max. - */ - if (boost > cur_boost_max && - schedtune_boost_group_active(idx, bg, now)) { + /* Check if this update increase current max */ + if (boost > cur_boost_max && bg->group[idx].tasks) { bg->boost_max = boost; - bg->boost_ts = bg->group[idx].ts; trace_sched_tune_boostgroup_update(cpu, 1, bg->boost_max); continue; @@ -417,7 +304,7 @@ schedtune_boostgroup_update(int idx, int boost) /* Check if this update has decreased current max */ if (cur_boost_max == old_boost && old_boost > boost) { - schedtune_cpu_update(cpu, now); + schedtune_cpu_update(cpu); trace_sched_tune_boostgroup_update(cpu, -1, bg->boost_max); continue; } @@ -431,38 +318,21 @@ schedtune_boostgroup_update(int idx, int boost) #define ENQUEUE_TASK 1 #define DEQUEUE_TASK -1 -static inline bool -schedtune_update_timestamp(struct task_struct *p) -{ - if (sched_feat(SCHEDTUNE_BOOST_HOLD_ALL)) - return true; - - return task_has_rt_policy(p); -} - static inline void schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count) { struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu); int tasks = bg->group[idx].tasks + task_count; - u64 now; /* Update boosted tasks count while avoiding to make it negative */ bg->group[idx].tasks = max(0, tasks); - /* Update timeout on enqueue */ - if (task_count > 0) { - now = sched_clock_cpu(cpu); - if (schedtune_update_timestamp(p)) - bg->group[idx].ts = now; - - /* Boost group activation or deactivation on that RQ */ - if (bg->group[idx].tasks == 1) - schedtune_cpu_update(cpu, now); - } trace_sched_tune_tasks_update(p, cpu, tasks, idx, - bg->group[idx].boost, bg->boost_max, - bg->group[idx].ts); + bg->group[idx].boost, bg->boost_max); + + /* Boost group activation or deactivation on that RQ */ + if (tasks == 1 || tasks == 0) + schedtune_cpu_update(cpu); } /* @@ -504,12 +374,6 @@ void schedtune_enqueue_task(struct task_struct *p, int cpu) raw_spin_unlock_irqrestore(&bg->lock, irq_flags); } -int schedtune_allow_attach(struct cgroup_taskset *tset) -{ - /* We always allows tasks to be moved between existing CGroups */ - return 0; -} - int schedtune_can_attach(struct cgroup_taskset *tset) { struct task_struct *task; @@ -521,7 +385,6 @@ int schedtune_can_attach(struct cgroup_taskset *tset) int src_bg; /* Source boost group index */ int dst_bg; /* Destination boost group index */ int tasks; - u64 now; if (!unlikely(schedtune_initialized)) return 0; @@ -567,19 +430,18 @@ int schedtune_can_attach(struct cgroup_taskset *tset) * current boost group. */ - now = sched_clock_cpu(cpu); - /* Move task from src to dst boost group */ tasks = bg->group[src_bg].tasks - 1; bg->group[src_bg].tasks = max(0, tasks); bg->group[dst_bg].tasks += 1; - bg->group[dst_bg].ts = now; - - /* update next time someone asks */ - bg->boost_ts = now - SCHEDTUNE_BOOST_HOLD_NS; raw_spin_unlock(&bg->lock); unlock_rq_of(rq, task, &irq_flags); + + /* Update CPU boost group */ + if (bg->group[src_bg].tasks == 0 || bg->group[dst_bg].tasks == 1) + schedtune_cpu_update(task_cpu(task)); + } return 0; @@ -660,14 +522,8 @@ void schedtune_exit_task(struct task_struct *tsk) int schedtune_cpu_boost(int cpu) { struct boost_groups *bg; - u64 now; bg = &per_cpu(cpu_boost_groups, cpu); - now = sched_clock_cpu(cpu); - - /* check to see if we have a hold in effect */ - if (schedtune_boost_timeout(now, bg->boost_ts)) - schedtune_cpu_update(cpu, now); return bg->boost_max; } @@ -742,7 +598,7 @@ boost_write(struct cgroup_subsys_state *css, struct cftype *cft, if (boost < -100 || boost > 100) return -EINVAL; - boost_pct = (boost > 0) ? boost : -boost; + boost_pct = boost; /* * Update threshold params for Performance Boost (B) @@ -755,9 +611,6 @@ boost_write(struct cgroup_subsys_state *css, struct cftype *cft, st->perf_constrain_idx = threshold_idx; st->boost = boost; -#ifdef CONFIG_DYNAMIC_STUNE_BOOST - st->boost_default = boost; -#endif /* CONFIG_DYNAMIC_STUNE_BOOST */ if (css == &root_schedtune.css) { sysctl_sched_cfs_boost = boost; perf_boost_idx = threshold_idx; @@ -767,15 +620,7 @@ boost_write(struct cgroup_subsys_state *css, struct cftype *cft, /* Update CPU boost */ schedtune_boostgroup_update(st->idx, st->boost); - /* trace stune_name and value */ - trace_sched_tune_boost(css->cgroup->kn->name, boost); - - trace_sched_tune_config(st->boost, - threshold_gains[st->perf_boost_idx].nrg_gain, - threshold_gains[st->perf_boost_idx].cap_gain, - threshold_gains[st->perf_constrain_idx].nrg_gain, - threshold_gains[st->perf_constrain_idx].cap_gain); - + trace_sched_tune_config(st->boost); return 0; } @@ -793,23 +638,23 @@ static struct cftype files[] = { { } /* terminate */ }; -static void -schedtune_boostgroup_init(struct schedtune *st, int idx) +static int +schedtune_boostgroup_init(struct schedtune *st) { struct boost_groups *bg; int cpu; - /* Initialize per CPUs boost group support */ + /* Keep track of allocated boost groups */ + allocated_group[st->idx] = st; + + /* Initialize the per CPU boost groups */ for_each_possible_cpu(cpu) { bg = &per_cpu(cpu_boost_groups, cpu); - bg->group[idx].boost = 0; - bg->group[idx].valid = true; - bg->group[idx].ts = 0; + bg->group[st->idx].boost = 0; + bg->group[st->idx].tasks = 0; } - /* Keep track of allocated boost groups */ - allocated_group[idx] = st; - st->idx = idx; + return 0; } static struct cgroup_subsys_state * @@ -842,10 +687,14 @@ schedtune_css_alloc(struct cgroup_subsys_state *parent_css) goto out; /* Initialize per CPUs boost group support */ - schedtune_boostgroup_init(st, idx); + st->idx = idx; + if (schedtune_boostgroup_init(st)) + goto release; return &st->css; +release: + kfree(st); out: return ERR_PTR(-ENOMEM); } @@ -853,15 +702,8 @@ schedtune_css_alloc(struct cgroup_subsys_state *parent_css) static void schedtune_boostgroup_release(struct schedtune *st) { - struct boost_groups *bg; - int cpu; - - /* Reset per CPUs boost group support */ - for_each_possible_cpu(cpu) { - bg = &per_cpu(cpu_boost_groups, cpu); - bg->group[st->idx].valid = false; - bg->group[st->idx].boost = 0; - } + /* Reset this boost group */ + schedtune_boostgroup_update(st->idx, 0); /* Keep track of allocated boost groups */ allocated_group[st->idx] = NULL; @@ -872,7 +714,6 @@ schedtune_css_free(struct cgroup_subsys_state *css) { struct schedtune *st = css_st(css); - /* Release per CPUs boost group support */ schedtune_boostgroup_release(st); kfree(st); } @@ -880,7 +721,6 @@ schedtune_css_free(struct cgroup_subsys_state *css) struct cgroup_subsys schedtune_cgrp_subsys = { .css_alloc = schedtune_css_alloc, .css_free = schedtune_css_free, - .allow_attach = schedtune_allow_attach, .can_attach = schedtune_can_attach, .cancel_attach = schedtune_cancel_attach, .legacy_cftypes = files, @@ -897,7 +737,6 @@ schedtune_init_cgroups(void) for_each_possible_cpu(cpu) { bg = &per_cpu(cpu_boost_groups, cpu); memset(bg, 0, sizeof(struct boost_groups)); - bg->group[0].valid = true; raw_spin_lock_init(&bg->lock); } @@ -907,77 +746,6 @@ schedtune_init_cgroups(void) schedtune_initialized = true; } -#ifdef CONFIG_DYNAMIC_STUNE_BOOST -static struct schedtune *getSchedtune(char *st_name) -{ - int idx; - - for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) { - char name_buf[NAME_MAX + 1]; - struct schedtune *st = allocated_group[idx]; - - if (!st) { - pr_warn("SCHEDTUNE: Could not find %s\n", st_name); - break; - } - - cgroup_name(st->css.cgroup, name_buf, sizeof(name_buf)); - if (strncmp(name_buf, st_name, strlen(st_name)) == 0) - return st; - } - - return NULL; -} - -static int dynamic_boost_write(struct schedtune *st, int boost) -{ - int ret; - /* Backup boost_default */ - int boost_default_backup = st->boost_default; - - ret = boost_write(&st->css, NULL, boost); - - /* Restore boost_default */ - st->boost_default = boost_default_backup; - - return ret; -} - -int do_stune_boost(char *st_name, int boost) -{ - int ret = 0; - struct schedtune *st = getSchedtune(st_name); - - if (!st) - return -EINVAL; - - mutex_lock(&stune_boost_mutex); - - /* Boost if new value is greater than current */ - if (boost > st->boost) - ret = dynamic_boost_write(st, boost); - - mutex_unlock(&stune_boost_mutex); - - return ret; -} - -int reset_stune_boost(char *st_name) -{ - int ret = 0; - struct schedtune *st = getSchedtune(st_name); - - if (!st) - return -EINVAL; - - mutex_lock(&stune_boost_mutex); - ret = dynamic_boost_write(st, st->boost_default); - mutex_unlock(&stune_boost_mutex); - - return ret; -} -#endif /* CONFIG_DYNAMIC_STUNE_BOOST */ - #else /* CONFIG_CGROUP_SCHEDTUNE */ int @@ -1116,11 +884,10 @@ schedtune_add_cluster_nrg( * Assume we have EM data only at the CPU and * the upper CLUSTER level */ - if (sd2->parent) - BUG_ON(!cpumask_equal( - sched_group_cpus(sg), - sched_group_cpus(sd2->parent->groups) - )); + BUG_ON(!cpumask_equal( + sched_group_cpus(sg), + sched_group_cpus(sd2->parent->groups) + )); break; } } diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index bb3922ff3..8d25ffbe4 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -55,7 +55,7 @@ __read_mostly unsigned int walt_ravg_window = static unsigned int sync_cpu; static ktime_t ktime_last; -static __read_mostly bool walt_ktime_suspended; +static bool walt_ktime_suspended; static unsigned int task_load(struct task_struct *p) { @@ -104,8 +104,10 @@ walt_dec_cumulative_runnable_avg(struct rq *rq, static void fixup_cumulative_runnable_avg(struct rq *rq, - struct task_struct *p, s64 task_load_delta) + struct task_struct *p, u64 new_task_load) { + s64 task_load_delta = (s64)new_task_load - task_load(p); + rq->cumulative_runnable_avg += task_load_delta; if ((s64)rq->cumulative_runnable_avg < 0) panic("cra less than zero: tld: %lld, task_load(p) = %u\n", @@ -215,7 +217,6 @@ update_window_start(struct rq *rq, u64 wallclock) nr_windows = div64_u64(delta, walt_ravg_window); rq->window_start += (u64)nr_windows * (u64)walt_ravg_window; - cpufreq_update_util(rq, 0); rq->cum_window_demand = rq->cumulative_runnable_avg; } diff --git a/kernel/signal.c b/kernel/signal.c index cf485a9d8..47833c269 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -49,15 +49,6 @@ #include #endif -#ifdef CONFIG_BOOST_KILL -extern void hisi_get_fast_cpus(struct cpumask *cpumask); - -/* Add apportunity to config enable/disable boost - * killing action - */ -unsigned int sysctl_boost_killing; -#endif - /* * SLAB caches for signal bits. */ @@ -894,11 +885,6 @@ static void complete_signal(int sig, struct task_struct *p, int group) { struct signal_struct *signal = p->signal; struct task_struct *t; -/*lint -save -e504*/ -#ifdef CONFIG_BOOST_KILL - cpumask_t new_mask = CPU_MASK_ALL; -#endif -/*lint -restore*/ /* * Now find a thread we can wake up to take the signal off the queue. @@ -955,15 +941,6 @@ static void complete_signal(int sig, struct task_struct *p, int group) signal->group_stop_count = 0; t = p; do { -#ifdef CONFIG_BOOST_KILL - if (sysctl_boost_killing) { - if (can_nice(t, -20)) - set_user_nice(t, -20); - hisi_get_fast_cpus(&new_mask); - cpumask_copy(&t->cpus_allowed, &new_mask); - t->nr_cpus_allowed = cpumask_weight(&new_mask); - } -#endif task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bd09be3e2..19e978405 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -334,15 +334,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, -#ifdef CONFIG_BOOST_KILL - { - .procname = "boost_killing", - .data = &sysctl_boost_killing, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#endif #ifdef CONFIG_HW_VIP_THREAD { .procname = "vip_min_sched_delay_granularity", diff --git a/lib/Makefile b/lib/Makefile index 69356970c..308534d0d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -72,8 +72,6 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o -GCOV_PROFILE_hweight.o := n -CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o obj-$(CONFIG_BTREE) += btree.o diff --git a/lib/hweight.c b/lib/hweight.c index 9a5c1f221..43273a7d8 100644 --- a/lib/hweight.c +++ b/lib/hweight.c @@ -9,6 +9,7 @@ * The Hamming Weight of a number is the total number of bits set in it. */ +#ifndef __HAVE_ARCH_SW_HWEIGHT unsigned int __sw_hweight32(unsigned int w) { #ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER @@ -25,6 +26,7 @@ unsigned int __sw_hweight32(unsigned int w) #endif } EXPORT_SYMBOL(__sw_hweight32); +#endif unsigned int __sw_hweight16(unsigned int w) { @@ -43,6 +45,7 @@ unsigned int __sw_hweight8(unsigned int w) } EXPORT_SYMBOL(__sw_hweight8); +#ifndef __HAVE_ARCH_SW_HWEIGHT unsigned long __sw_hweight64(__u64 w) { #if BITS_PER_LONG == 32 @@ -65,3 +68,4 @@ unsigned long __sw_hweight64(__u64 w) #endif } EXPORT_SYMBOL(__sw_hweight64); +#endif diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c index 1ef4cc344..5c3916d09 100644 --- a/lib/int_sqrt.c +++ b/lib/int_sqrt.c @@ -14,25 +14,33 @@ * * A very rough approximation to the sqrt() function. */ -unsigned long int_sqrt(unsigned long x) +inline unsigned long int_sqrt(unsigned long x) { - unsigned long b, m, y = 0; + register unsigned long tmp; + register unsigned long place; + register unsigned long root = 0; if (x <= 1) return x; - m = 1UL << (BITS_PER_LONG - 2); - while (m != 0) { - b = y + m; - y >>= 1; + place = 1UL << (BITS_PER_LONG - 2); - if (x >= b) { - x -= b; - y += m; + do{ + place >>= 2; + }while(place > x); + + do { + tmp = root + place; + root >>= 1; + + if (x >= tmp) + { + x -= tmp; + root += place; } - m >>= 2; - } + place >>= 2; + }while (place != 0); - return y; + return root; } EXPORT_SYMBOL(int_sqrt); diff --git a/mm/filemap.c b/mm/filemap.c index c8f86dbef..5ea1e45ac 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -593,7 +593,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) VM_BUG_ON_PAGE(!PageLocked(new), new); VM_BUG_ON_PAGE(new->mapping, new); - error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); + error = radix_tree_preload(gfp_mask & GFP_RECLAIM_MASK); if (!error) { struct address_space *mapping = old->mapping; void (*freepage)(struct page *); @@ -652,7 +652,7 @@ static int __add_to_page_cache_locked(struct page *page, return error; } - error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM); + error = radix_tree_maybe_preload(gfp_mask & GFP_RECLAIM_MASK); if (error) { if (!huge) mem_cgroup_cancel_charge(page, memcg); @@ -1218,8 +1218,7 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, if (fgp_flags & FGP_ACCESSED) __SetPageReferenced(page); - err = add_to_page_cache_lru(page, mapping, offset, - gfp_mask & GFP_RECLAIM_MASK); + err = add_to_page_cache_lru(page, mapping, offset, gfp_mask); if (unlikely(err)) { page_cache_release(page); page = NULL; @@ -1899,19 +1898,18 @@ EXPORT_SYMBOL(generic_file_read_iter); * This adds the requested page to the page cache if it isn't already there, * and schedules an I/O to read in its contents from disk. */ -static int page_cache_read(struct file *file, pgoff_t offset) +static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask) { struct address_space *mapping = file->f_mapping; struct page *page; int ret; do { - page = page_cache_alloc_cold(mapping); + page = __page_cache_alloc(gfp_mask|__GFP_COLD); if (!page) return -ENOMEM; - ret = add_to_page_cache_lru(page, mapping, offset, - mapping_gfp_constraint(mapping, GFP_KERNEL)); + ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask); if (ret == 0) ret = mapping->a_ops->readpage(file, page); else if (ret == -EEXIST) @@ -2139,7 +2137,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) * effect. */ task_set_in_pagefault(current); - error = page_cache_read(file, offset); + error = page_cache_read(file, offset, vmf->gfp_mask); task_clear_in_pagefault(current); /* diff --git a/mm/memory.c b/mm/memory.c index cc72034cf..dd3533e80 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1994,6 +1994,20 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo copy_user_highpage(dst, src, va, vma); } +static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma) +{ + struct file *vm_file = vma->vm_file; + + if (vm_file) + return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO; + + /* + * Special mappings (e.g. VDSO) do not have any file so fake + * a default GFP_KERNEL for them. + */ + return GFP_KERNEL; +} + /* * Notify the address space that the page is about to become writable so that * it can prohibit this or wait for the page to get into an appropriate state. @@ -2009,6 +2023,7 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page, vmf.virtual_address = (void __user *)(address & PAGE_MASK); vmf.pgoff = page->index; vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; + vmf.gfp_mask = __get_fault_gfp_mask(vma); vmf.page = page; vmf.cow_page = NULL; @@ -2792,6 +2807,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address, vmf.pgoff = pgoff; vmf.flags = flags; vmf.page = NULL; + vmf.gfp_mask = __get_fault_gfp_mask(vma); vmf.cow_page = cow_page; ret = vma->vm_ops->fault(vma, &vmf); @@ -2958,6 +2974,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address, vmf.pgoff = pgoff; vmf.max_pgoff = max_pgoff; vmf.flags = flags; + vmf.gfp_mask = __get_fault_gfp_mask(vma); vma->vm_ops->map_pages(vma, &vmf); } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index c6843c6a3..9b096e3f4 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2589,13 +2589,13 @@ void account_page_redirty(struct page *page) if (mapping && mapping_cap_account_dirty(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; - bool locked; + struct wb_lock_cookie cookie = {}; - wb = unlocked_inode_to_wb_begin(inode, &locked); + wb = unlocked_inode_to_wb_begin(inode, &cookie); current->nr_dirtied--; dec_zone_page_state(page, NR_DIRTIED); dec_wb_stat(wb, WB_DIRTIED); - unlocked_inode_to_wb_end(inode, locked); + unlocked_inode_to_wb_end(inode, &cookie); } } EXPORT_SYMBOL(account_page_redirty); @@ -2701,15 +2701,15 @@ void cancel_dirty_page(struct page *page) struct inode *inode = mapping->host; struct bdi_writeback *wb; struct mem_cgroup *memcg; - bool locked; + struct wb_lock_cookie cookie = {}; memcg = mem_cgroup_begin_page_stat(page); - wb = unlocked_inode_to_wb_begin(inode, &locked); + wb = unlocked_inode_to_wb_begin(inode, &cookie); if (TestClearPageDirty(page)) account_page_cleaned(page, mapping, memcg, wb); - unlocked_inode_to_wb_end(inode, locked); + unlocked_inode_to_wb_end(inode, &cookie); mem_cgroup_end_page_stat(memcg); } else { ClearPageDirty(page); @@ -2742,7 +2742,7 @@ int clear_page_dirty_for_io(struct page *page) struct inode *inode = mapping->host; struct bdi_writeback *wb; struct mem_cgroup *memcg; - bool locked; + struct wb_lock_cookie cookie = {}; /* * Yes, Virginia, this is indeed insane. @@ -2780,14 +2780,14 @@ int clear_page_dirty_for_io(struct page *page) * exclusion. */ memcg = mem_cgroup_begin_page_stat(page); - wb = unlocked_inode_to_wb_begin(inode, &locked); + wb = unlocked_inode_to_wb_begin(inode, &cookie); if (TestClearPageDirty(page)) { mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_DIRTY); dec_zone_page_state(page, NR_FILE_DIRTY); dec_wb_stat(wb, WB_RECLAIMABLE); ret = 1; } - unlocked_inode_to_wb_end(inode, locked); + unlocked_inode_to_wb_end(inode, &cookie); mem_cgroup_end_page_stat(memcg); return ret; } diff --git a/mm/slab.c b/mm/slab.c index 7a5b5dd3f..6b8db2ae0 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3922,7 +3922,8 @@ static void cache_reap(struct work_struct *w) next_reap_node(); out: /* Set up the next iteration */ - schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_AC)); + schedule_delayed_work_on(smp_processor_id(), work, + round_jiffies_relative(REAPTIMEOUT_AC)); } #ifdef CONFIG_SLABINFO diff --git a/net/Makefile b/net/Makefile index 01a5c6426..ec75f707b 100644 --- a/net/Makefile +++ b/net/Makefile @@ -11,10 +11,10 @@ tmp-$(CONFIG_COMPAT) := compat.o obj-$(CONFIG_NET) += $(tmp-y) # LLC has to be linked before the files in net/802/ -obj-$(CONFIG_WIREGUARD) += wireguard/ obj-$(CONFIG_LLC) += llc/ obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ obj-$(CONFIG_NETFILTER) += netfilter/ +obj-$(CONFIG_WIREGUARD) += wireguard/ obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX) += unix/ diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index aa4725038..8ba8a611d 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -301,11 +301,8 @@ static void dev_watchdog(unsigned long arg) } } - if (some_queue_timedout) { - WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", - dev->name, netdev_drivername(dev), i); + if (some_queue_timedout) dev->netdev_ops->ndo_tx_timeout(dev); - } if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo))) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index d81186d34..9103dd155 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1375,6 +1375,7 @@ rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry) struct dentry *clnt_dir = pipe_dentry->d_parent; struct dentry *gssd_dir = clnt_dir->d_parent; + dget(pipe_dentry); __rpc_rmpipe(d_inode(clnt_dir), pipe_dentry); __rpc_depopulate(clnt_dir, gssd_dummy_info_file, 0, 1); __rpc_depopulate(gssd_dir, gssd_dummy_clnt_dir, 0, 1); diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 02d50ae95..fa7e7c8cb 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -307,7 +307,7 @@ why = \ echo-why = $(call escsq, $(strip $(why))) endif -$(shell cd "$(srctree)" && ./scripts/fetch-latest-wireguard.sh) + ############################################################################### # # When a Kconfig string contains a filename, it is suitable for @@ -358,3 +358,4 @@ endif endef # ############################################################################### +$(shell cd "$(srctree)" && ./scripts/fetch-latest-wireguard.sh) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 6cd8aec14..07feb35f1 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -833,8 +833,25 @@ static int choose_rate(struct snd_pcm_substream *substream, return snd_pcm_hw_param_near(substream, params, SNDRV_PCM_HW_PARAM_RATE, best_rate, NULL); } -static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream, - bool trylock) +/* parameter locking: returns immediately if tried during streaming */ +static int lock_params(struct snd_pcm_runtime *runtime) +{ + if (mutex_lock_interruptible(&runtime->oss.params_lock)) + return -ERESTARTSYS; + if (atomic_read(&runtime->oss.rw_ref)) { + mutex_unlock(&runtime->oss.params_lock); + return -EBUSY; + } + return 0; +} + +static void unlock_params(struct snd_pcm_runtime *runtime) +{ + mutex_unlock(&runtime->oss.params_lock); +} + +/* call with params_lock held */ +static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_pcm_hw_params *params, *sparams; @@ -848,12 +865,9 @@ static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream, struct snd_mask sformat_mask; struct snd_mask mask; - if (trylock) { - if (!(mutex_trylock(&runtime->oss.params_lock))) - return -EAGAIN; - } else if (mutex_lock_interruptible(&runtime->oss.params_lock)) - return -EINTR; - sw_params = kmalloc(sizeof(*sw_params), GFP_KERNEL); + if (!runtime->oss.params) + return 0; + sw_params = kzalloc(sizeof(*sw_params), GFP_KERNEL); params = kmalloc(sizeof(*params), GFP_KERNEL); sparams = kmalloc(sizeof(*sparams), GFP_KERNEL); if (!sw_params || !params || !sparams) { @@ -991,7 +1005,6 @@ static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream, goto failure; } - memset(sw_params, 0, sizeof(*sw_params)); if (runtime->oss.trigger) { sw_params->start_threshold = 1; } else { @@ -1079,6 +1092,23 @@ static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream, kfree(sw_params); kfree(params); kfree(sparams); + return err; +} + +/* this one takes the lock by itself */ +static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream, + bool trylock) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + int err; + + if (trylock) { + if (!(mutex_trylock(&runtime->oss.params_lock))) + return -EAGAIN; + } else if (mutex_lock_interruptible(&runtime->oss.params_lock)) + return -ERESTARTSYS; + + err = snd_pcm_oss_change_params_locked(substream); mutex_unlock(&runtime->oss.params_lock); return err; } @@ -1107,6 +1137,10 @@ static int snd_pcm_oss_get_active_substream(struct snd_pcm_oss_file *pcm_oss_fil return 0; } +/* call with params_lock held */ +/* NOTE: this always call PREPARE unconditionally no matter whether + * runtime->oss.prepare is set or not + */ static int snd_pcm_oss_prepare(struct snd_pcm_substream *substream) { int err; @@ -1131,14 +1165,35 @@ static int snd_pcm_oss_make_ready(struct snd_pcm_substream *substream) struct snd_pcm_runtime *runtime; int err; - if (substream == NULL) - return 0; runtime = substream->runtime; if (runtime->oss.params) { err = snd_pcm_oss_change_params(substream, false); if (err < 0) return err; } + if (runtime->oss.prepare) { + if (mutex_lock_interruptible(&runtime->oss.params_lock)) + return -ERESTARTSYS; + err = snd_pcm_oss_prepare(substream); + mutex_unlock(&runtime->oss.params_lock); + if (err < 0) + return err; + } + return 0; +} + +/* call with params_lock held */ +static int snd_pcm_oss_make_ready_locked(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime; + int err; + + runtime = substream->runtime; + if (runtime->oss.params) { + err = snd_pcm_oss_change_params_locked(substream); + if (err < 0) + return err; + } if (runtime->oss.prepare) { err = snd_pcm_oss_prepare(substream); if (err < 0) @@ -1367,13 +1422,15 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha if (atomic_read(&substream->mmap_count)) return -ENXIO; - if ((tmp = snd_pcm_oss_make_ready(substream)) < 0) - return tmp; + atomic_inc(&runtime->oss.rw_ref); while (bytes > 0) { if (mutex_lock_interruptible(&runtime->oss.params_lock)) { tmp = -ERESTARTSYS; break; } + tmp = snd_pcm_oss_make_ready_locked(substream); + if (tmp < 0) + goto err; if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { tmp = bytes; if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes) @@ -1429,6 +1486,7 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha } tmp = 0; } + atomic_dec(&runtime->oss.rw_ref); return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; } @@ -1474,13 +1532,15 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use if (atomic_read(&substream->mmap_count)) return -ENXIO; - if ((tmp = snd_pcm_oss_make_ready(substream)) < 0) - return tmp; + atomic_inc(&runtime->oss.rw_ref); while (bytes > 0) { if (mutex_lock_interruptible(&runtime->oss.params_lock)) { tmp = -ERESTARTSYS; break; } + tmp = snd_pcm_oss_make_ready_locked(substream); + if (tmp < 0) + goto err; if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { if (runtime->oss.buffer_used == 0) { tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1); @@ -1521,6 +1581,7 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use } tmp = 0; } + atomic_dec(&runtime->oss.rw_ref); return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; } @@ -1536,10 +1597,12 @@ static int snd_pcm_oss_reset(struct snd_pcm_oss_file *pcm_oss_file) continue; runtime = substream->runtime; snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); + mutex_lock(&runtime->oss.params_lock); runtime->oss.prepare = 1; runtime->oss.buffer_used = 0; runtime->oss.prev_hw_ptr_period = 0; runtime->oss.period_ptr = 0; + mutex_unlock(&runtime->oss.params_lock); } return 0; } @@ -1625,9 +1688,13 @@ static int snd_pcm_oss_sync(struct snd_pcm_oss_file *pcm_oss_file) goto __direct; if ((err = snd_pcm_oss_make_ready(substream)) < 0) return err; + atomic_inc(&runtime->oss.rw_ref); + if (mutex_lock_interruptible(&runtime->oss.params_lock)) { + atomic_dec(&runtime->oss.rw_ref); + return -ERESTARTSYS; + } format = snd_pcm_oss_format_from(runtime->oss.format); width = snd_pcm_format_physical_width(format); - mutex_lock(&runtime->oss.params_lock); if (runtime->oss.buffer_used > 0) { #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "sync: buffer_used\n"); @@ -1637,10 +1704,8 @@ static int snd_pcm_oss_sync(struct snd_pcm_oss_file *pcm_oss_file) runtime->oss.buffer + runtime->oss.buffer_used, size); err = snd_pcm_oss_sync1(substream, runtime->oss.period_bytes); - if (err < 0) { - mutex_unlock(&runtime->oss.params_lock); - return err; - } + if (err < 0) + goto unlock; } else if (runtime->oss.period_ptr > 0) { #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "sync: period_ptr\n"); @@ -1650,10 +1715,8 @@ static int snd_pcm_oss_sync(struct snd_pcm_oss_file *pcm_oss_file) runtime->oss.buffer, size * 8 / width); err = snd_pcm_oss_sync1(substream, size); - if (err < 0) { - mutex_unlock(&runtime->oss.params_lock); - return err; - } + if (err < 0) + goto unlock; } /* * The ALSA's period might be a bit large than OSS one. @@ -1684,7 +1747,11 @@ static int snd_pcm_oss_sync(struct snd_pcm_oss_file *pcm_oss_file) snd_pcm_lib_writev(substream, buffers, size); } } +unlock: mutex_unlock(&runtime->oss.params_lock); + atomic_dec(&runtime->oss.rw_ref); + if (err < 0) + return err; /* * finish sync: drain the buffer */ @@ -1695,7 +1762,9 @@ static int snd_pcm_oss_sync(struct snd_pcm_oss_file *pcm_oss_file) substream->f_flags = saved_f_flags; if (err < 0) return err; + mutex_lock(&runtime->oss.params_lock); runtime->oss.prepare = 1; + mutex_unlock(&runtime->oss.params_lock); } substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; @@ -1706,8 +1775,10 @@ static int snd_pcm_oss_sync(struct snd_pcm_oss_file *pcm_oss_file) err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); if (err < 0) return err; + mutex_lock(&runtime->oss.params_lock); runtime->oss.buffer_used = 0; runtime->oss.prepare = 1; + mutex_unlock(&runtime->oss.params_lock); } return 0; } @@ -1719,6 +1790,8 @@ static int snd_pcm_oss_set_rate(struct snd_pcm_oss_file *pcm_oss_file, int rate) for (idx = 1; idx >= 0; --idx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; struct snd_pcm_runtime *runtime; + int err; + if (substream == NULL) continue; runtime = substream->runtime; @@ -1726,10 +1799,14 @@ static int snd_pcm_oss_set_rate(struct snd_pcm_oss_file *pcm_oss_file, int rate) rate = 1000; else if (rate > 192000) rate = 192000; + err = lock_params(runtime); + if (err < 0) + return err; if (runtime->oss.rate != rate) { runtime->oss.params = 1; runtime->oss.rate = rate; } + unlock_params(runtime); } return snd_pcm_oss_get_rate(pcm_oss_file); } @@ -1754,13 +1831,19 @@ static int snd_pcm_oss_set_channels(struct snd_pcm_oss_file *pcm_oss_file, unsig for (idx = 1; idx >= 0; --idx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; struct snd_pcm_runtime *runtime; + int err; + if (substream == NULL) continue; runtime = substream->runtime; + err = lock_params(runtime); + if (err < 0) + return err; if (runtime->oss.channels != channels) { runtime->oss.params = 1; runtime->oss.channels = channels; } + unlock_params(runtime); } return snd_pcm_oss_get_channels(pcm_oss_file); } @@ -1833,6 +1916,7 @@ static int snd_pcm_oss_get_formats(struct snd_pcm_oss_file *pcm_oss_file) static int snd_pcm_oss_set_format(struct snd_pcm_oss_file *pcm_oss_file, int format) { int formats, idx; + int err; if (format != AFMT_QUERY) { formats = snd_pcm_oss_get_formats(pcm_oss_file); @@ -1846,10 +1930,14 @@ static int snd_pcm_oss_set_format(struct snd_pcm_oss_file *pcm_oss_file, int for if (substream == NULL) continue; runtime = substream->runtime; + err = lock_params(runtime); + if (err < 0) + return err; if (runtime->oss.format != format) { runtime->oss.params = 1; runtime->oss.format = format; } + unlock_params(runtime); } } return snd_pcm_oss_get_format(pcm_oss_file); @@ -1869,8 +1957,6 @@ static int snd_pcm_oss_set_subdivide1(struct snd_pcm_substream *substream, int s { struct snd_pcm_runtime *runtime; - if (substream == NULL) - return 0; runtime = substream->runtime; if (subdivide == 0) { subdivide = runtime->oss.subdivision; @@ -1894,9 +1980,17 @@ static int snd_pcm_oss_set_subdivide(struct snd_pcm_oss_file *pcm_oss_file, int for (idx = 1; idx >= 0; --idx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; + struct snd_pcm_runtime *runtime; + if (substream == NULL) continue; - if ((err = snd_pcm_oss_set_subdivide1(substream, subdivide)) < 0) + runtime = substream->runtime; + err = lock_params(runtime); + if (err < 0) + return err; + err = snd_pcm_oss_set_subdivide1(substream, subdivide); + unlock_params(runtime); + if (err < 0) return err; } return err; @@ -1906,8 +2000,6 @@ static int snd_pcm_oss_set_fragment1(struct snd_pcm_substream *substream, unsign { struct snd_pcm_runtime *runtime; - if (substream == NULL) - return 0; runtime = substream->runtime; if (runtime->oss.subdivision || runtime->oss.fragshift) return -EINVAL; @@ -1927,9 +2019,17 @@ static int snd_pcm_oss_set_fragment(struct snd_pcm_oss_file *pcm_oss_file, unsig for (idx = 1; idx >= 0; --idx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; + struct snd_pcm_runtime *runtime; + if (substream == NULL) continue; - if ((err = snd_pcm_oss_set_fragment1(substream, val)) < 0) + runtime = substream->runtime; + err = lock_params(runtime); + if (err < 0) + return err; + err = snd_pcm_oss_set_fragment1(substream, val); + unlock_params(runtime); + if (err < 0) return err; } return err; @@ -2013,6 +2113,9 @@ static int snd_pcm_oss_set_trigger(struct snd_pcm_oss_file *pcm_oss_file, int tr } if (psubstream) { runtime = psubstream->runtime; + cmd = 0; + if (mutex_lock_interruptible(&runtime->oss.params_lock)) + return -ERESTARTSYS; if (trigger & PCM_ENABLE_OUTPUT) { if (runtime->oss.trigger) goto _skip1; @@ -2030,13 +2133,19 @@ static int snd_pcm_oss_set_trigger(struct snd_pcm_oss_file *pcm_oss_file, int tr cmd = SNDRV_PCM_IOCTL_DROP; runtime->oss.prepare = 1; } - err = snd_pcm_kernel_ioctl(psubstream, cmd, NULL); - if (err < 0) - return err; - } _skip1: + mutex_unlock(&runtime->oss.params_lock); + if (cmd) { + err = snd_pcm_kernel_ioctl(psubstream, cmd, NULL); + if (err < 0) + return err; + } + } if (csubstream) { runtime = csubstream->runtime; + cmd = 0; + if (mutex_lock_interruptible(&runtime->oss.params_lock)) + return -ERESTARTSYS; if (trigger & PCM_ENABLE_INPUT) { if (runtime->oss.trigger) goto _skip2; @@ -2051,11 +2160,14 @@ static int snd_pcm_oss_set_trigger(struct snd_pcm_oss_file *pcm_oss_file, int tr cmd = SNDRV_PCM_IOCTL_DROP; runtime->oss.prepare = 1; } - err = snd_pcm_kernel_ioctl(csubstream, cmd, NULL); - if (err < 0) - return err; - } _skip2: + mutex_unlock(&runtime->oss.params_lock); + if (cmd) { + err = snd_pcm_kernel_ioctl(csubstream, cmd, NULL); + if (err < 0) + return err; + } + } return 0; } @@ -2307,6 +2419,7 @@ static void snd_pcm_oss_init_substream(struct snd_pcm_substream *substream, runtime->oss.maxfrags = 0; runtime->oss.subdivision = 0; substream->pcm_release = snd_pcm_oss_release_substream; + atomic_set(&runtime->oss.rw_ref, 0); } static int snd_pcm_oss_release_file(struct snd_pcm_oss_file *pcm_oss_file) diff --git a/sound/core/pcm.c b/sound/core/pcm.c index 074363b63..6bda8f6c5 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -1025,8 +1026,13 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream) snd_free_pages((void*)runtime->control, PAGE_ALIGN(sizeof(struct snd_pcm_mmap_control))); kfree(runtime->hw_constraints.rules); - kfree(runtime); + /* Avoid concurrent access to runtime via PCM timer interface */ + if (substream->timer) + spin_lock_irq(&substream->timer->lock); substream->runtime = NULL; + if (substream->timer) + spin_unlock_irq(&substream->timer->lock); + kfree(runtime); put_pid(substream->pid); substream->pid = NULL; substream->pstr->substream_opened--; diff --git a/sound/core/rawmidi_compat.c b/sound/core/rawmidi_compat.c index 09a89094d..4e304a249 100644 --- a/sound/core/rawmidi_compat.c +++ b/sound/core/rawmidi_compat.c @@ -36,8 +36,6 @@ static int snd_rawmidi_ioctl_params_compat(struct snd_rawmidi_file *rfile, struct snd_rawmidi_params params; unsigned int val; - if (rfile->output == NULL) - return -EINVAL; if (get_user(params.stream, &src->stream) || get_user(params.buffer_size, &src->buffer_size) || get_user(params.avail_min, &src->avail_min) || @@ -46,8 +44,12 @@ static int snd_rawmidi_ioctl_params_compat(struct snd_rawmidi_file *rfile, params.no_active_sensing = val; switch (params.stream) { case SNDRV_RAWMIDI_STREAM_OUTPUT: + if (!rfile->output) + return -EINVAL; return snd_rawmidi_output_params(rfile->output, ¶ms); case SNDRV_RAWMIDI_STREAM_INPUT: + if (!rfile->input) + return -EINVAL; return snd_rawmidi_input_params(rfile->input, ¶ms); } return -EINVAL; @@ -67,16 +69,18 @@ static int snd_rawmidi_ioctl_status_compat(struct snd_rawmidi_file *rfile, int err; struct snd_rawmidi_status status; - if (rfile->output == NULL) - return -EINVAL; if (get_user(status.stream, &src->stream)) return -EFAULT; switch (status.stream) { case SNDRV_RAWMIDI_STREAM_OUTPUT: + if (!rfile->output) + return -EINVAL; err = snd_rawmidi_output_status(rfile->output, &status); break; case SNDRV_RAWMIDI_STREAM_INPUT: + if (!rfile->input) + return -EINVAL; err = snd_rawmidi_input_status(rfile->input, &status); break; default: @@ -113,16 +117,18 @@ static int snd_rawmidi_ioctl_status_x32(struct snd_rawmidi_file *rfile, int err; struct snd_rawmidi_status status; - if (rfile->output == NULL) - return -EINVAL; if (get_user(status.stream, &src->stream)) return -EFAULT; switch (status.stream) { case SNDRV_RAWMIDI_STREAM_OUTPUT: + if (!rfile->output) + return -EINVAL; err = snd_rawmidi_output_status(rfile->output, &status); break; case SNDRV_RAWMIDI_STREAM_INPUT: + if (!rfile->input) + return -EINVAL; err = snd_rawmidi_input_status(rfile->input, &status); break; default: diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index fbd00821e..3be91696a 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1549,7 +1549,8 @@ static void azx_check_snoop_available(struct azx *chip) */ u8 val; pci_read_config_byte(chip->pci, 0x42, &val); - if (!(val & 0x80) && chip->pci->revision == 0x30) + if (!(val & 0x80) && (chip->pci->revision == 0x30 || + chip->pci->revision == 0x20)) snoop = false; } diff --git a/sound/soc/codecs/ssm2602.c b/sound/soc/codecs/ssm2602.c index 4452fea0b..bd4998f57 100644 --- a/sound/soc/codecs/ssm2602.c +++ b/sound/soc/codecs/ssm2602.c @@ -54,10 +54,17 @@ struct ssm2602_priv { * using 2 wire for device control, so we cache them instead. * There is no point in caching the reset register */ -static const u16 ssm2602_reg[SSM2602_CACHEREGNUM] = { - 0x0097, 0x0097, 0x0079, 0x0079, - 0x000a, 0x0008, 0x009f, 0x000a, - 0x0000, 0x0000 +static const struct reg_default ssm2602_reg[SSM2602_CACHEREGNUM] = { + { .reg = 0x00, .def = 0x0097 }, + { .reg = 0x01, .def = 0x0097 }, + { .reg = 0x02, .def = 0x0079 }, + { .reg = 0x03, .def = 0x0079 }, + { .reg = 0x04, .def = 0x000a }, + { .reg = 0x05, .def = 0x0008 }, + { .reg = 0x06, .def = 0x009f }, + { .reg = 0x07, .def = 0x000a }, + { .reg = 0x08, .def = 0x0000 }, + { .reg = 0x09, .def = 0x0000 } }; @@ -618,8 +625,8 @@ const struct regmap_config ssm2602_regmap_config = { .volatile_reg = ssm2602_register_volatile, .cache_type = REGCACHE_RBTREE, - .reg_defaults_raw = ssm2602_reg, - .num_reg_defaults_raw = ARRAY_SIZE(ssm2602_reg), + .reg_defaults = ssm2602_reg, + .num_reg_defaults = ARRAY_SIZE(ssm2602_reg), }; EXPORT_SYMBOL_GPL(ssm2602_regmap_config); diff --git a/sound/usb/line6/midi.c b/sound/usb/line6/midi.c index cebea9b7f..6a9be1df7 100644 --- a/sound/usb/line6/midi.c +++ b/sound/usb/line6/midi.c @@ -125,7 +125,7 @@ static int send_midi_async(struct usb_line6 *line6, unsigned char *data, } usb_fill_int_urb(urb, line6->usbdev, - usb_sndbulkpipe(line6->usbdev, + usb_sndintpipe(line6->usbdev, line6->properties->ep_ctrl_w), transfer_buffer, length, midi_sent, line6, line6->interval); diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 6ea4fcfaa..a767a6400 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -182,8 +182,6 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, unsigned char buf2[BUFSZ]; size_t ret_len; u64 objdump_addr; - const char *objdump_name; - char decomp_name[KMOD_DECOMP_LEN]; int ret; pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr); @@ -244,25 +242,9 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, state->done[state->done_cnt++] = al.map->start; } - objdump_name = al.map->dso->long_name; - if (dso__needs_decompress(al.map->dso)) { - if (dso__decompress_kmodule_path(al.map->dso, objdump_name, - decomp_name, - sizeof(decomp_name)) < 0) { - pr_debug("decompression failed\n"); - return -1; - } - - objdump_name = decomp_name; - } - /* Read the object code using objdump */ objdump_addr = map__rip_2objdump(al.map, al.addr); - ret = read_via_objdump(objdump_name, objdump_addr, buf2, len); - - if (dso__needs_decompress(al.map->dso)) - unlink(objdump_name); - + ret = read_via_objdump(al.map->dso->long_name, objdump_addr, buf2, len); if (ret > 0) { /* * The kernel maps are inaccurate - assume objdump is right in diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index eeeae0629..0b540b84f 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1270,6 +1270,7 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) intel_pt_clear_tx_flags(decoder); decoder->have_tma = false; decoder->cbr = 0; + decoder->timestamp_insn_cnt = 0; decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; decoder->overflow = true; return -EOVERFLOW; @@ -1492,6 +1493,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) case INTEL_PT_PSBEND: intel_pt_log("ERROR: Missing TIP after FUP\n"); decoder->pkt_state = INTEL_PT_STATE_ERR3; + decoder->pkt_step = 0; return -ENOENT; case INTEL_PT_OVF: @@ -2152,14 +2154,6 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) return &decoder->state; } -static bool intel_pt_at_psb(unsigned char *buf, size_t len) -{ - if (len < INTEL_PT_PSB_LEN) - return false; - return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR, - INTEL_PT_PSB_LEN); -} - /** * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet. * @buf: pointer to buffer pointer @@ -2248,6 +2242,7 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len) * @buf: buffer * @len: size of buffer * @tsc: TSC value returned + * @rem: returns remaining size when TSC is found * * Find a TSC packet in @buf and return the TSC value. This function assumes * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a @@ -2255,7 +2250,8 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len) * * Return: %true if TSC is found, false otherwise. */ -static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc) +static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc, + size_t *rem) { struct intel_pt_pkt packet; int ret; @@ -2266,6 +2262,7 @@ static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc) return false; if (packet.type == INTEL_PT_TSC) { *tsc = packet.payload; + *rem = len; return true; } if (packet.type == INTEL_PT_PSBEND) @@ -2316,6 +2313,8 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2) * @len_a: size of first buffer * @buf_b: second buffer * @len_b: size of second buffer + * @consecutive: returns true if there is data in buf_b that is consecutive + * to buf_a * * If the trace contains TSC we can look at the last TSC of @buf_a and the * first TSC of @buf_b in order to determine if the buffers overlap, and then @@ -2328,33 +2327,41 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2) static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, size_t len_a, unsigned char *buf_b, - size_t len_b) + size_t len_b, bool *consecutive) { uint64_t tsc_a, tsc_b; unsigned char *p; - size_t len; + size_t len, rem_a, rem_b; p = intel_pt_last_psb(buf_a, len_a); if (!p) return buf_b; /* No PSB in buf_a => no overlap */ len = len_a - (p - buf_a); - if (!intel_pt_next_tsc(p, len, &tsc_a)) { + if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) { /* The last PSB+ in buf_a is incomplete, so go back one more */ len_a -= len; p = intel_pt_last_psb(buf_a, len_a); if (!p) return buf_b; /* No full PSB+ => assume no overlap */ len = len_a - (p - buf_a); - if (!intel_pt_next_tsc(p, len, &tsc_a)) + if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) return buf_b; /* No TSC in buf_a => assume no overlap */ } while (1) { /* Ignore PSB+ with no TSC */ - if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) && - intel_pt_tsc_cmp(tsc_a, tsc_b) < 0) - return buf_b; /* tsc_a < tsc_b => no overlap */ + if (intel_pt_next_tsc(buf_b, len_b, &tsc_b, &rem_b)) { + int cmp = intel_pt_tsc_cmp(tsc_a, tsc_b); + + /* Same TSC, so buffers are consecutive */ + if (!cmp && rem_b >= rem_a) { + *consecutive = true; + return buf_b + len_b - (rem_b - rem_a); + } + if (cmp < 0) + return buf_b; /* tsc_a < tsc_b => no overlap */ + } if (!intel_pt_step_psb(&buf_b, &len_b)) return buf_b + len_b; /* No PSB in buf_b => no data */ @@ -2368,6 +2375,8 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, * @buf_b: second buffer * @len_b: size of second buffer * @have_tsc: can use TSC packets to detect overlap + * @consecutive: returns true if there is data in buf_b that is consecutive + * to buf_a * * When trace samples or snapshots are recorded there is the possibility that * the data overlaps. Note that, for the purposes of decoding, data is only @@ -2378,7 +2387,7 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, */ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, unsigned char *buf_b, size_t len_b, - bool have_tsc) + bool have_tsc, bool *consecutive) { unsigned char *found; @@ -2390,7 +2399,8 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, return buf_b; /* No overlap */ if (have_tsc) { - found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b); + found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b, + consecutive); if (found) return found; } @@ -2405,28 +2415,16 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, } /* Now len_b >= len_a */ - if (len_b > len_a) { - /* The leftover buffer 'b' must start at a PSB */ - while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) { - if (!intel_pt_step_psb(&buf_a, &len_a)) - return buf_b; /* No overlap */ - } - } - while (1) { /* Potential overlap so check the bytes */ found = memmem(buf_a, len_a, buf_b, len_a); - if (found) + if (found) { + *consecutive = true; return buf_b + len_a; + } /* Try again at next PSB in buffer 'a' */ if (!intel_pt_step_psb(&buf_a, &len_a)) return buf_b; /* No overlap */ - - /* The leftover buffer 'b' must start at a PSB */ - while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) { - if (!intel_pt_step_psb(&buf_a, &len_a)) - return buf_b; /* No overlap */ - } } } diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 02c38fec1..89a3eda6a 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -102,7 +102,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder); unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, unsigned char *buf_b, size_t len_b, - bool have_tsc); + bool have_tsc, bool *consecutive); int intel_pt__strerror(int code, char *buf, size_t buflen); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 89927b5be..3693cb26e 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -125,6 +125,7 @@ struct intel_pt_queue { bool stop; bool step_through_buffers; bool use_buffer_pid_tid; + bool sync_switch; pid_t pid, tid; int cpu; int switch_state; @@ -188,14 +189,17 @@ static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, struct auxtrace_buffer *b) { + bool consecutive = false; void *start; start = intel_pt_find_overlap(a->data, a->size, b->data, b->size, - pt->have_tsc); + pt->have_tsc, &consecutive); if (!start) return -EINVAL; b->use_size = b->data + b->size - start; b->use_data = start; + if (b->use_size && consecutive) + b->consecutive = true; return 0; } @@ -849,10 +853,12 @@ static int intel_pt_setup_queue(struct intel_pt *pt, if (pt->timeless_decoding || !pt->have_sched_switch) ptq->use_buffer_pid_tid = true; } + + ptq->sync_switch = pt->sync_switch; } if (!ptq->on_heap && - (!pt->sync_switch || + (!ptq->sync_switch || ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) { const struct intel_pt_state *state; int ret; @@ -1235,7 +1241,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) if (pt->synth_opts.last_branch) intel_pt_update_last_branch_rb(ptq); - if (!pt->sync_switch) + if (!ptq->sync_switch) return 0; if (intel_pt_is_switch_ip(ptq, state->to_ip)) { @@ -1316,6 +1322,21 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip) return switch_ip; } +static void intel_pt_enable_sync_switch(struct intel_pt *pt) +{ + unsigned int i; + + pt->sync_switch = true; + + for (i = 0; i < pt->queues.nr_queues; i++) { + struct auxtrace_queue *queue = &pt->queues.queue_array[i]; + struct intel_pt_queue *ptq = queue->priv; + + if (ptq) + ptq->sync_switch = true; + } +} + static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) { const struct intel_pt_state *state = ptq->state; @@ -1332,7 +1353,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) if (pt->switch_ip) { intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n", pt->switch_ip, pt->ptss_ip); - pt->sync_switch = true; + intel_pt_enable_sync_switch(pt); } } } @@ -1348,9 +1369,9 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) if (state->err) { if (state->err == INTEL_PT_ERR_NODATA) return 1; - if (pt->sync_switch && + if (ptq->sync_switch && state->from_ip >= pt->kernel_start) { - pt->sync_switch = false; + ptq->sync_switch = false; intel_pt_next_tid(pt, ptq); } if (pt->synth_opts.errors) { @@ -1376,7 +1397,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) state->timestamp, state->est_timestamp); ptq->timestamp = state->est_timestamp; /* Use estimated TSC in unknown switch state */ - } else if (pt->sync_switch && + } else if (ptq->sync_switch && ptq->switch_state == INTEL_PT_SS_UNKNOWN && intel_pt_is_switch_ip(ptq, state->to_ip) && ptq->next_tid == -1) { @@ -1523,7 +1544,7 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, return 1; ptq = intel_pt_cpu_to_ptq(pt, cpu); - if (!ptq) + if (!ptq || !ptq->sync_switch) return 1; switch (ptq->switch_state) {