diff options
author | Mike Pagano <mpagano@gentoo.org> | 2014-07-15 08:22:59 -0400 |
---|---|---|
committer | Mike Pagano <mpagano@gentoo.org> | 2014-07-15 08:22:59 -0400 |
commit | 3fe9f8aab7f5e1262afd9d1f45be1e3d0afe8ce9 (patch) | |
tree | 1a16db79dd24207eac96d76219ef57bbf3ba76a5 | |
parent | Zero copy for infiniband psm userspace driver. ACPI: Disable Windows 8 compat... (diff) | |
download | linux-patches-3fe9f8aab7f5e1262afd9d1f45be1e3d0afe8ce9.tar.gz linux-patches-3fe9f8aab7f5e1262afd9d1f45be1e3d0afe8ce9.tar.bz2 linux-patches-3fe9f8aab7f5e1262afd9d1f45be1e3d0afe8ce9.zip |
Kernel patch enables gcc optimizations for additional CPUs.3.16-1
-rw-r--r-- | 0000_README | 4 | ||||
-rw-r--r-- | 5000_enable-additional-cpu-optimizations-for-gcc.patch | 327 |
2 files changed, 331 insertions, 0 deletions
diff --git a/0000_README b/0000_README index 6276507b..da7da0db 100644 --- a/0000_README +++ b/0000_README @@ -71,3 +71,7 @@ Patch: 4567_distro-Gentoo-Kconfig.patch From: Tom Wijsman <TomWij@gentoo.org> Desc: Add Gentoo Linux support config settings and defaults. +Patch: 5000_enable-additional-cpu-optimizations-for-gcc.patch +From: https://github.com/graysky2/kernel_gcc_patch/ +Desc: Kernel patch enables gcc optimizations for additional CPUs. + diff --git a/5000_enable-additional-cpu-optimizations-for-gcc.patch b/5000_enable-additional-cpu-optimizations-for-gcc.patch new file mode 100644 index 00000000..f7ab6f0f --- /dev/null +++ b/5000_enable-additional-cpu-optimizations-for-gcc.patch @@ -0,0 +1,327 @@ +This patch has been tested on and known to work with kernel versions from 3.2 +up to the latest git version (pulled on 12/14/2013). + +This patch will expand the number of microarchitectures to include new +processors including: AMD K10-family, AMD Family 10h (Barcelona), AMD Family +14h (Bobcat), AMD Family 15h (Bulldozer), AMD Family 15h (Piledriver), AMD +Family 16h (Jaguar), Intel 1st Gen Core i3/i5/i7 (Nehalem), Intel 2nd Gen Core +i3/i5/i7 (Sandybridge), Intel 3rd Gen Core i3/i5/i7 (Ivybridge), and Intel 4th +Gen Core i3/i5/i7 (Haswell). It also offers the compiler the 'native' flag. + +Small but real speed increases are measurable using a make endpoint comparing +a generic kernel to one built with one of the respective microarchs. + +See the following experimental evidence supporting this statement: +https://github.com/graysky2/kernel_gcc_patch + +REQUIREMENTS +linux version >=3.15 +gcc version <4.9 + +--- +diff -uprN a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h +--- a/arch/x86/include/asm/module.h 2013-11-03 18:41:51.000000000 -0500 ++++ b/arch/x86/include/asm/module.h 2013-12-15 06:21:24.351122516 -0500 +@@ -15,6 +15,16 @@ + #define MODULE_PROC_FAMILY "586MMX " + #elif defined CONFIG_MCORE2 + #define MODULE_PROC_FAMILY "CORE2 " ++#elif defined CONFIG_MNATIVE ++#define MODULE_PROC_FAMILY "NATIVE " ++#elif defined CONFIG_MCOREI7 ++#define MODULE_PROC_FAMILY "COREI7 " ++#elif defined CONFIG_MCOREI7AVX ++#define MODULE_PROC_FAMILY "COREI7AVX " ++#elif defined CONFIG_MCOREAVXI ++#define MODULE_PROC_FAMILY "COREAVXI " ++#elif defined CONFIG_MCOREAVX2 ++#define MODULE_PROC_FAMILY "COREAVX2 " + #elif defined CONFIG_MATOM + #define MODULE_PROC_FAMILY "ATOM " + #elif defined CONFIG_M686 +@@ -33,6 +43,18 @@ + #define MODULE_PROC_FAMILY "K7 " + #elif defined CONFIG_MK8 + #define MODULE_PROC_FAMILY "K8 " ++#elif defined CONFIG_MK10 ++#define MODULE_PROC_FAMILY "K10 " ++#elif defined CONFIG_MBARCELONA ++#define MODULE_PROC_FAMILY "BARCELONA " ++#elif defined CONFIG_MBOBCAT ++#define MODULE_PROC_FAMILY "BOBCAT " ++#elif defined CONFIG_MBULLDOZER ++#define MODULE_PROC_FAMILY "BULLDOZER " ++#elif defined CONFIG_MPILEDRIVER ++#define MODULE_PROC_FAMILY "PILEDRIVER " ++#elif defined CONFIG_MJAGUAR ++#define MODULE_PROC_FAMILY "JAGUAR " + #elif defined CONFIG_MELAN + #define MODULE_PROC_FAMILY "ELAN " + #elif defined CONFIG_MCRUSOE +diff -uprN a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu +--- a/arch/x86/Kconfig.cpu 2013-11-03 18:41:51.000000000 -0500 ++++ b/arch/x86/Kconfig.cpu 2013-12-15 06:21:24.351122516 -0500 +@@ -139,7 +139,7 @@ config MPENTIUM4 + + + config MK6 +- bool "K6/K6-II/K6-III" ++ bool "AMD K6/K6-II/K6-III" + depends on X86_32 + ---help--- + Select this for an AMD K6-family processor. Enables use of +@@ -147,7 +147,7 @@ config MK6 + flags to GCC. + + config MK7 +- bool "Athlon/Duron/K7" ++ bool "AMD Athlon/Duron/K7" + depends on X86_32 + ---help--- + Select this for an AMD Athlon K7-family processor. Enables use of +@@ -155,12 +155,55 @@ config MK7 + flags to GCC. + + config MK8 +- bool "Opteron/Athlon64/Hammer/K8" ++ bool "AMD Opteron/Athlon64/Hammer/K8" + ---help--- + Select this for an AMD Opteron or Athlon64 Hammer-family processor. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + ++config MK10 ++ bool "AMD 61xx/7x50/PhenomX3/X4/II/K10" ++ ---help--- ++ Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50, ++ Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor. ++ Enables use of some extended instructions, and passes appropriate ++ optimization flags to GCC. ++ ++config MBARCELONA ++ bool "AMD Barcelona" ++ ---help--- ++ Select this for AMD Barcelona and newer processors. ++ ++ Enables -march=barcelona ++ ++config MBOBCAT ++ bool "AMD Bobcat" ++ ---help--- ++ Select this for AMD Bobcat processors. ++ ++ Enables -march=btver1 ++ ++config MBULLDOZER ++ bool "AMD Bulldozer" ++ ---help--- ++ Select this for AMD Bulldozer processors. ++ ++ Enables -march=bdver1 ++ ++config MPILEDRIVER ++ bool "AMD Piledriver" ++ ---help--- ++ Select this for AMD Piledriver processors. ++ ++ Enables -march=bdver2 ++ ++config MJAGUAR ++ bool "AMD Jaguar" ++ ---help--- ++ Select this for AMD Jaguar processors. ++ ++ Enables -march=btver2 ++ + config MCRUSOE + bool "Crusoe" + depends on X86_32 +@@ -251,8 +294,17 @@ config MPSC + using the cpu family field + in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. + ++config MATOM ++ bool "Intel Atom" ++ ---help--- ++ ++ Select this for the Intel Atom platform. Intel Atom CPUs have an ++ in-order pipelining architecture and thus can benefit from ++ accordingly optimized code. Use a recent GCC with specific Atom ++ support in order to fully benefit from selecting this option. ++ + config MCORE2 +- bool "Core 2/newer Xeon" ++ bool "Intel Core 2" + ---help--- + + Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and +@@ -260,14 +312,40 @@ config MCORE2 + family in /proc/cpuinfo. Newer ones have 6 and older ones 15 + (not a typo) + +-config MATOM +- bool "Intel Atom" ++ Enables -march=core2 ++ ++config MCOREI7 ++ bool "Intel Core i7" + ---help--- + +- Select this for the Intel Atom platform. Intel Atom CPUs have an +- in-order pipelining architecture and thus can benefit from +- accordingly optimized code. Use a recent GCC with specific Atom +- support in order to fully benefit from selecting this option. ++ Select this for the Intel Nehalem platform. Intel Nehalem proecessors ++ include Core i3, i5, i7, Xeon: 34xx, 35xx, 55xx, 56xx, 75xx processors. ++ ++ Enables -march=corei7 ++ ++config MCOREI7AVX ++ bool "Intel Core 2nd Gen AVX" ++ ---help--- ++ ++ Select this for 2nd Gen Core processors including Sandy Bridge. ++ ++ Enables -march=corei7-avx ++ ++config MCOREAVXI ++ bool "Intel Core 3rd Gen AVX" ++ ---help--- ++ ++ Select this for 3rd Gen Core processors including Ivy Bridge. ++ ++ Enables -march=core-avx-i ++ ++config MCOREAVX2 ++ bool "Intel Core AVX2" ++ ---help--- ++ ++ Select this for AVX2 enabled processors including Haswell. ++ ++ Enables -march=core-avx2 + + config GENERIC_CPU + bool "Generic-x86-64" +@@ -276,6 +354,19 @@ config GENERIC_CPU + Generic x86-64 CPU. + Run equally well on all x86-64 CPUs. + ++config MNATIVE ++ bool "Native optimizations autodetected by GCC" ++ ---help--- ++ ++ GCC 4.2 and above support -march=native, which automatically detects ++ the optimum settings to use based on your processor. -march=native ++ also detects and applies additional settings beyond -march specific ++ to your CPU, (eg. -msse4). Unless you have a specific reason not to ++ (e.g. distcc cross-compiling), you should probably be using ++ -march=native rather than anything listed below. ++ ++ Enables -march=native ++ + endchoice + + config X86_GENERIC +@@ -300,7 +391,7 @@ config X86_INTERNODE_CACHE_SHIFT + config X86_L1_CACHE_SHIFT + int + default "7" if MPENTIUM4 || MPSC +- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU ++ default "6" if MK7 || MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MPENTIUMM || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MATOM || MVIAC7 || X86_GENERIC || MNATIVE || GENERIC_CPU + default "4" if MELAN || M486 || MGEODEGX1 + default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX + +@@ -331,11 +422,11 @@ config X86_ALIGNMENT_16 + + config X86_INTEL_USERCOPY + def_bool y +- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 ++ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || MNATIVE || X86_GENERIC || MK8 || MK7 || MK10 || MBARCELONA || MEFFICEON || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 + + config X86_USE_PPRO_CHECKSUM + def_bool y +- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM ++ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MATOM || MNATIVE + + config X86_USE_3DNOW + def_bool y +@@ -363,17 +454,17 @@ config X86_P6_NOP + + config X86_TSC + def_bool y +- depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64 ++ depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MCOREI7 || MCOREI7-AVX || MATOM) || X86_64 || MNATIVE + + config X86_CMPXCHG64 + def_bool y +- depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM ++ depends on X86_PAE || X86_64 || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM || MNATIVE + + # this should be set for all -march=.. options where the compiler + # generates cmov. + config X86_CMOV + def_bool y +- depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) ++ depends on (MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MK7 || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX) + + config X86_MINIMUM_CPU_FAMILY + int +diff -uprN a/arch/x86/Makefile b/arch/x86/Makefile +--- a/arch/x86/Makefile 2013-11-03 18:41:51.000000000 -0500 ++++ b/arch/x86/Makefile 2013-12-15 06:21:24.354455723 -0500 +@@ -61,11 +61,26 @@ else + KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3) + + # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) ++ cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native) + cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) ++ cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10) ++ cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona) ++ cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1) ++ cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1) ++ cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2) ++ cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2) + cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) + + cflags-$(CONFIG_MCORE2) += \ +- $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) ++ $(call cc-option,-march=core2,$(call cc-option,-mtune=core2)) ++ cflags-$(CONFIG_MCOREI7) += \ ++ $(call cc-option,-march=corei7,$(call cc-option,-mtune=corei7)) ++ cflags-$(CONFIG_MCOREI7AVX) += \ ++ $(call cc-option,-march=corei7-avx,$(call cc-option,-mtune=corei7-avx)) ++ cflags-$(CONFIG_MCOREAVXI) += \ ++ $(call cc-option,-march=core-avx-i,$(call cc-option,-mtune=core-avx-i)) ++ cflags-$(CONFIG_MCOREAVX2) += \ ++ $(call cc-option,-march=core-avx2,$(call cc-option,-mtune=core-avx2)) + cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \ + $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) + cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) +diff -uprN a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu +--- a/arch/x86/Makefile_32.cpu 2013-11-03 18:41:51.000000000 -0500 ++++ b/arch/x86/Makefile_32.cpu 2013-12-15 06:21:24.354455723 -0500 +@@ -23,7 +23,14 @@ cflags-$(CONFIG_MK6) += -march=k6 + # Please note, that patches that add -march=athlon-xp and friends are pointless. + # They make zero difference whatsosever to performance at this time. + cflags-$(CONFIG_MK7) += -march=athlon ++cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native) + cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon) ++cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10,-march=athlon) ++cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona,-march=athlon) ++cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1,-march=athlon) ++cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1,-march=athlon) ++cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2,-march=athlon) ++cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2,-march=athlon) + cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 + cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 + cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) +@@ -32,6 +39,10 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc- + cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) + cflags-$(CONFIG_MVIAC7) += -march=i686 + cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) ++cflags-$(CONFIG_MCOREI7) += -march=i686 $(call tune,corei7) ++cflags-$(CONFIG_MCOREI7AVX) += -march=i686 $(call tune,corei7-avx) ++cflags-$(CONFIG_MCOREAVXI) += -march=i686 $(call tune,core-avx-i) ++cflags-$(CONFIG_MCOREAVX2) += -march=i686 $(call tune,core-avx2) + cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \ + $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) |