2 files changed, 119 insertions, 0 deletions
diff --git a/meta/recipes-devtools/qemu/qemu.inc b/meta/recipes-devtools/qemu/qemu.inc
index bbb9038961..6c0edcb706 100644
--- a/meta/recipes-devtools/qemu/qemu.inc
+++ b/meta/recipes-devtools/qemu/qemu.inc
@@ -31,6 +31,7 @@ SRC_URI = "https://download.qemu.org/${BPN}-${PV}.tar.xz \
            file://0001-qemu-Do-not-include-file-if-not-exists.patch \
            file://find_datadir.patch \
            file://usb-fix-setup_len-init.patch \
+           file://0001-mips-add-34Kf-64tlb-fictitious-cpu-type-like-34Kf-bu.patch \
            "
 UPSTREAM_CHECK_REGEX = "qemu-(?P<pver>\d+(\.\d+)+)\.tar"
 
diff --git a/meta/recipes-devtools/qemu/qemu/0001-mips-add-34Kf-64tlb-fictitious-cpu-type-like-34Kf-bu.patch b/meta/recipes-devtools/qemu/qemu/0001-mips-add-34Kf-64tlb-fictitious-cpu-type-like-34Kf-bu.patch
new file mode 100644
index 0000000000..b6312e1543
--- /dev/null
+++ b/meta/recipes-devtools/qemu/qemu/0001-mips-add-34Kf-64tlb-fictitious-cpu-type-like-34Kf-bu.patch
@@ -0,0 +1,118 @@
+From b3fcc7d96523ad8e3ea28c09d495ef08529d01ce Mon Sep 17 00:00:00 2001
+From: Victor Kamensky <kamensky@cisco.com>
+Date: Wed, 7 Oct 2020 10:19:42 -0700
+Subject: [PATCH] mips: add 34Kf-64tlb fictitious cpu type like 34Kf but with
+ 64 TLBs
+
+In Yocto Project CI runs it was observed that test run
+of 32 bit mips image takes almost twice longer than 64 bit
+mips image with the same logical load and CI execution
+hits timeout.
+
+See https://bugzilla.yoctoproject.org/show_bug.cgi?id=13992
+
+Yocto project uses 34Kf cpu type to run 32 bit mips image,
+and MIPS64R2-generic cpu type to run 64 bit mips64 image.
+
+Upon qemu behavior differences investigation between mips
+and mips64 two prominent observations came up: under
+logically similar load (same definition and configuration
+of user-land image) in case of mips get_physical_address
+function is called almost twice more often, meaning
+twice more memory accesses involved in this case. Also
+number of tlbwr instruction executed (r4k_helper_tlbwr
+qemu function) almost 16 time bigger in mips case than in
+mips64.
+
+It turns out that 34Kf cpu has 16 TLBs, but in case of
+MIPS64R2-generic it is 64 TLBs. So that explains why
+some many more tlbwr had to be execute by kernel TLB refill
+handler in case of 32 bit misp.
+
+The idea of the fix is to come up with new 34Kf-64tlb fictitious
+cpu type, that would behave exactly as 34Kf but it would
+contain 64 TLBs to reduce TLB trashing. After all, adding
+more TLBs to soft mmu is easy.
+
+Experiment with some significant non-trvial load in Yocto
+environment by running do_testimage load shows that 34Kf-64tlb
+cpu performs 40% or so better than original 34Kf cpu wrt test
+execution real time.
+
+It is not ideal to have cpu type that does not exist in the
+wild but given performance gains it seems to be justified.
+
+Signed-off-by: Victor Kamensky <kamensky@cisco.com>
+---
+ target/mips/translate_init.inc.c | 55 ++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 55 insertions(+)
+
+diff --git a/target/mips/translate_init.inc.c b/target/mips/translate_init.inc.c
+index 637caccd89..b73ab48231 100644
+--- a/target/mips/translate_init.inc.c
++++ b/target/mips/translate_init.inc.c
+@@ -297,6 +297,61 @@ const mips_def_t mips_defs[] =
+         .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP | ASE_MT,
+         .mmu_type = MMU_TYPE_R4000,
+     },
++    /*
++     * Verbatim copy of "34Kf" cpu, only bumped up number of TLB entries
++     * from 16 to 64 (see CP0_Config0 value at CP0C1_MMU bits) to improve
++     * performance by reducing number of TLB refill exceptions and
++     * eliminating need to run all corresponding TLB refill handling
++     * instructions.
++     */
++    {
++        .name = "34Kf-64tlb",
++        .CP0_PRid = 0x00019500,
++        .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) |
++                       (MMU_TYPE_R4000 << CP0C0_MT),
++        .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (63 << CP0C1_MMU) |
++                       (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
++                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
++                       (1 << CP0C1_CA),
++        .CP0_Config2 = MIPS_CONFIG2,
++        .CP0_Config3 = MIPS_CONFIG3 | (1 << CP0C3_VInt) | (1 << CP0C3_MT) |
++                       (1 << CP0C3_DSPP),
++        .CP0_LLAddr_rw_bitmask = 0,
++        .CP0_LLAddr_shift = 0,
++        .SYNCI_Step = 32,
++        .CCRes = 2,
++        .CP0_Status_rw_bitmask = 0x3778FF1F,
++        .CP0_TCStatus_rw_bitmask = (0 << CP0TCSt_TCU3) | (0 << CP0TCSt_TCU2) |
++                    (1 << CP0TCSt_TCU1) | (1 << CP0TCSt_TCU0) |
++                    (0 << CP0TCSt_TMX) | (1 << CP0TCSt_DT) |
++                    (1 << CP0TCSt_DA) | (1 << CP0TCSt_A) |
++                    (0x3 << CP0TCSt_TKSU) | (1 << CP0TCSt_IXMT) |
++                    (0xff << CP0TCSt_TASID),
++        .CP1_fcr0 = (1 << FCR0_F64) | (1 << FCR0_L) | (1 << FCR0_W) |
++                    (1 << FCR0_D) | (1 << FCR0_S) | (0x95 << FCR0_PRID),
++        .CP1_fcr31 = 0,
++        .CP1_fcr31_rw_bitmask = 0xFF83FFFF,
++        .CP0_SRSCtl = (0xf << CP0SRSCtl_HSS),
++        .CP0_SRSConf0_rw_bitmask = 0x3fffffff,
++        .CP0_SRSConf0 = (1U << CP0SRSC0_M) | (0x3fe << CP0SRSC0_SRS3) |
++                    (0x3fe << CP0SRSC0_SRS2) | (0x3fe << CP0SRSC0_SRS1),
++        .CP0_SRSConf1_rw_bitmask = 0x3fffffff,
++        .CP0_SRSConf1 = (1U << CP0SRSC1_M) | (0x3fe << CP0SRSC1_SRS6) |
++                    (0x3fe << CP0SRSC1_SRS5) | (0x3fe << CP0SRSC1_SRS4),
++        .CP0_SRSConf2_rw_bitmask = 0x3fffffff,
++        .CP0_SRSConf2 = (1U << CP0SRSC2_M) | (0x3fe << CP0SRSC2_SRS9) |
++                    (0x3fe << CP0SRSC2_SRS8) | (0x3fe << CP0SRSC2_SRS7),
++        .CP0_SRSConf3_rw_bitmask = 0x3fffffff,
++        .CP0_SRSConf3 = (1U << CP0SRSC3_M) | (0x3fe << CP0SRSC3_SRS12) |
++                    (0x3fe << CP0SRSC3_SRS11) | (0x3fe << CP0SRSC3_SRS10),
++        .CP0_SRSConf4_rw_bitmask = 0x3fffffff,
++        .CP0_SRSConf4 = (0x3fe << CP0SRSC4_SRS15) |
++                    (0x3fe << CP0SRSC4_SRS14) | (0x3fe << CP0SRSC4_SRS13),
++        .SEGBITS = 32,
++        .PABITS = 32,
++        .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP | ASE_MT,
++        .mmu_type = MMU_TYPE_R4000,
++    },
+     {
+         .name = "74Kf",
+         .CP0_PRid = 0x00019700,
+-- 
+2.14.5
+