From 15ca68a993d10767c37793e6a0a780b0a7e395dd Mon Sep 17 00:00:00 2001
From: Noam Camus <noamc@ezchip.com>
Date: Sun, 7 Sep 2014 22:52:33 +0300
Subject: ARC: Make vmalloc size configurable

On ARC, lower 2G of address space is translated and used for
 - user vaddr space (region 0 to 5)
 - unused kernel-user gutter (region 6)
 - kernel vaddr space (region 7)

where each region simply represents 256MB of address space.

The kernel vaddr space of 256MB is used to implement vmalloc, modules
So far this was enough, but not on EZChip system with 4K CPUs (given
that per cpu mechanism uses vmalloc for allocating chunks)

So allow VMALLOC_SIZE to be configurable by expanding down into the unused
kernel-user gutter region which at default 256M was excessive anyways.

Also use _BITUL() to fix a build error since PGDIR_SIZE cannot use "1UL"
as called from assembly code in mm/tlbex.S

Signed-off-by: Noam Camus <noamc@ezchip.com>
[vgupta: rewrote changelog, debugged bootup crash due to int vs. hex]
Acked-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/processor.h | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'arch/arc/include/asm/processor.h')

diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 1d694c1ef6d6..d0a9211ec769 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -97,7 +97,7 @@ extern unsigned int get_wchan(struct task_struct *p);
 #endif /* !__ASSEMBLY__ */
 
 /*
- * System Memory Map on ARC
+ * Default System Memory Map on ARC
  *
  * ---------------------------- (lower 2G, Translated) -------------------------
  * 0x0000_0000		0x5FFF_FFFF	(user vaddr: TASK_SIZE)
@@ -109,18 +109,17 @@ extern unsigned int get_wchan(struct task_struct *p);
  * 0xC000_0000		0xFFFF_FFFF	(peripheral uncached space)
  * -----------------------------------------------------------------------------
  */
-#define VMALLOC_START	0x70000000
 
-/*
- * 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter
- * See asm/highmem.h for details
- */
-#define VMALLOC_SIZE	(PAGE_OFFSET - VMALLOC_START - PGDIR_SIZE * 4)
-#define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
+#define TASK_SIZE	0x60000000
 
-#define USER_KERNEL_GUTTER    0x10000000
+#define VMALLOC_START	(PAGE_OFFSET - (CONFIG_ARC_KVADDR_SIZE << 20))
+
+/* 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter (see asm/highmem.h) */
+#define VMALLOC_SIZE	((CONFIG_ARC_KVADDR_SIZE << 20) - PGDIR_SIZE * 4)
+
+#define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
 
-#define TASK_SIZE	(VMALLOC_START - USER_KERNEL_GUTTER)
+#define USER_KERNEL_GUTTER    (VMALLOC_START - TASK_SIZE)
 
 #define STACK_TOP       TASK_SIZE
 #define STACK_TOP_MAX   STACK_TOP
-- 
cgit v1.2.3


From 8bcf2c48f32e22f923b69f779c95b1348308d5b1 Mon Sep 17 00:00:00 2001
From: Noam Camus <noamc@ezchip.com>
Date: Sun, 6 Dec 2015 15:40:55 +0200
Subject: ARC: [plat-eznps] Use dedicated user stack top

NPS use special mapping right below TASK_SIZE.
Hence we need to lower STACK_TOP so that user stack won't
overlap NPS special mapping.

Signed-off-by: Noam Camus <noamc@ezchip.com>
Acked-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/processor.h | 18 ++++++++++++++++++
 arch/arc/mm/tlb.c                |  6 ++++++
 2 files changed, 24 insertions(+)

(limited to 'arch/arc/include/asm/processor.h')

diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index d0a9211ec769..194a09fce198 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -121,7 +121,25 @@ extern unsigned int get_wchan(struct task_struct *p);
 
 #define USER_KERNEL_GUTTER    (VMALLOC_START - TASK_SIZE)
 
+#ifdef CONFIG_ARC_PLAT_EZNPS
+/* NPS architecture defines special window of 129M in user address space for
+ * special memory areas, when accessing this window the MMU do not use TLB.
+ * Instead MMU direct the access to:
+ * 0x57f00000:0x57ffffff -- 1M of closely coupled memory (aka CMEM)
+ * 0x58000000:0x5fffffff -- 16 huge pages, 8M each, with fixed map (aka FMTs)
+ *
+ * CMEM - is the fastest memory we got and its size is 16K.
+ * FMT  - is used to map either to internal/external memory.
+ * Internal memory is the second fast memory and its size is 16M
+ * External memory is the biggest memory (16G) and also the slowest.
+ *
+ * STACK_TOP need to be PMD align (21bit) that is why we supply 0x57e00000.
+ */
+#define STACK_TOP       0x57e00000
+#else
 #define STACK_TOP       TASK_SIZE
+#endif
+
 #define STACK_TOP_MAX   STACK_TOP
 
 /* This decides where the kernel will search for a free chunk of vm
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 745a9aeb2d96..ec868a9081a1 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -819,6 +819,12 @@ void arc_mmu_init(void)
 	 */
 	BUILD_BUG_ON(!IS_ALIGNED((CONFIG_ARC_KVADDR_SIZE << 20), PMD_SIZE));
 
+	/*
+	 * stack top size sanity check,
+	 * Can't be done in processor.h due to header include depenedencies
+	 */
+	BUILD_BUG_ON(!IS_ALIGNED(STACK_TOP, PMD_SIZE));
+
 	/* For efficiency sake, kernel is compile time built for a MMU ver
 	 * This must match the hardware it is running on.
 	 * Linux built for MMU V2, if run on MMU V1 will break down because V1
-- 
cgit v1.2.3


From 46c3e6b8768643d9bc7325324d17e37781b7bbf8 Mon Sep 17 00:00:00 2001
From: Tal Zilcer <talz@ezchip.com>
Date: Mon, 9 Mar 2015 16:58:39 +0200
Subject: ARC: [plat-eznps] Use dedicated cpu_relax()

Since the CTOP is SMT hardware multi-threaded, we need to hint
the HW that now will be a very good time to do a hardware
thread context switching. This is done by issuing the schd.rw
instruction (binary coded here so as to not require specific
revision of GCC to build the kernel).
sched.rw means that Thread becomes eligible for execution by
the threads scheduler after all pending read/write
transactions were completed.

Implementing cpu_relax_lowlatency() with barrier()
Since with current semantics of cpu_relax() it may take a
while till yielded CPU will get back.

Signed-off-by: Noam Camus <noamc@ezchip.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Acked-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/processor.h | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'arch/arc/include/asm/processor.h')

diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 194a09fce198..f9048994b22f 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -57,9 +57,19 @@ struct task_struct;
  * A lot of busy-wait loops in SMP are based off of non-volatile data otherwise
  * get optimised away by gcc
  */
-#define cpu_relax()	__asm__ __volatile__ ("" : : : "memory")
+#ifndef CONFIG_EZNPS_MTM_EXT
 
-#define cpu_relax_lowlatency() cpu_relax()
+#define cpu_relax()		barrier()
+#define cpu_relax_lowlatency()	cpu_relax()
+
+#else
+
+#define cpu_relax()     \
+	__asm__ __volatile__ (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory")
+
+#define cpu_relax_lowlatency()	barrier()
+
+#endif
 
 #define copy_segments(tsk, mm)      do { } while (0)
 #define release_segments(mm)        do { } while (0)
-- 
cgit v1.2.3


From 2547476a5e4061f6addb88d5fc837d3a950f54c4 Mon Sep 17 00:00:00 2001
From: Andrea Gelmini <andrea.gelmini@gelma.net>
Date: Sat, 21 May 2016 13:45:35 +0200
Subject: Fix typos

Signed-off-by: Andrea Gelmini <andrea.gelmini@gelma.net>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Makefile                    | 2 +-
 arch/arc/include/asm/entry-compact.h | 4 ++--
 arch/arc/include/asm/mmu_context.h   | 2 +-
 arch/arc/include/asm/pgtable.h       | 2 +-
 arch/arc/include/asm/processor.h     | 2 +-
 arch/arc/include/asm/smp.h           | 2 +-
 arch/arc/include/asm/thread_info.h   | 2 +-
 arch/arc/include/asm/uaccess.h       | 2 +-
 arch/arc/include/uapi/asm/swab.h     | 2 +-
 arch/arc/kernel/perf_event.c         | 2 +-
 arch/arc/kernel/setup.c              | 2 +-
 arch/arc/kernel/signal.c             | 2 +-
 arch/arc/kernel/troubleshoot.c       | 2 +-
 arch/arc/mm/cache.c                  | 6 +++---
 arch/arc/mm/dma.c                    | 2 +-
 15 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'arch/arc/include/asm/processor.h')

diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 02fabef2891c..d4df6be66d58 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -127,7 +127,7 @@ libs-y		+= arch/arc/lib/ $(LIBGCC)
 
 boot		:= arch/arc/boot
 
-#default target for make without any arguements.
+#default target for make without any arguments.
 KBUILD_IMAGE	:= bootpImage
 
 all:	$(KBUILD_IMAGE)
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index e0e1faf03c50..14c310f2e0b1 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -76,8 +76,8 @@
 	 * We need to be a bit more cautious here. What if a kernel bug in
 	 * L1 ISR, caused SP to go whaco (some small value which looks like
 	 * USER stk) and then we take L2 ISR.
-	 * Above brlo alone would treat it as a valid L1-L2 sceanrio
-	 * instead of shouting alound
+	 * Above brlo alone would treat it as a valid L1-L2 scenario
+	 * instead of shouting around
 	 * The only feasible way is to make sure this L2 happened in
 	 * L1 prelogue ONLY i.e. ilink2 is less than a pre-set marker in
 	 * L1 ISR before it switches stack
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index 1fd467ef658f..b0b87f2447f5 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -83,7 +83,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 		local_flush_tlb_all();
 
 		/*
-		 * Above checke for rollover of 8 bit ASID in 32 bit container.
+		 * Above check for rollover of 8 bit ASID in 32 bit container.
 		 * If the container itself wrapped around, set it to a non zero
 		 * "generation" to distinguish from no context
 		 */
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 034bbdc0ff61..858f98ef7f1b 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -47,7 +47,7 @@
  * Page Tables are purely for Linux VM's consumption and the bits below are
  * suited to that (uniqueness). Hence some are not implemented in the TLB and
  * some have different value in TLB.
- * e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible becoz they live in
+ * e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible because they live in
  *      seperate PD0 and PD1, which combined forms a translation entry)
  *      while for PTE perspective, they are 8 and 9 respectively
  * with MMU v3: Most bits (except SHARED) represent the exact hardware pos
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index f9048994b22f..16b630fbeb6a 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -78,7 +78,7 @@ struct task_struct;
 #define KSTK_ESP(tsk)   (task_pt_regs(tsk)->sp)
 
 /*
- * Where abouts of Task's sp, fp, blink when it was last seen in kernel mode.
+ * Where about of Task's sp, fp, blink when it was last seen in kernel mode.
  * Look in process.c for details of kernel stack layout
  */
 #define TSK_K_ESP(tsk)		(tsk->thread.ksp)
diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
index 991380438d6b..89fdd1b0a76e 100644
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -86,7 +86,7 @@ static inline const char *arc_platform_smp_cpuinfo(void)
  * (1) These insn were introduced only in 4.10 release. So for older released
  *	support needed.
  *
- * (2) In a SMP setup, the LLOCK/SCOND atomiticity across CPUs needs to be
+ * (2) In a SMP setup, the LLOCK/SCOND atomicity across CPUs needs to be
  *	gaurantted by the platform (not something which core handles).
  *	Assuming a platform won't, SMP Linux needs to use spinlocks + local IRQ
  *	disabling for atomicity.
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index 3af67455659a..2d79e527fa50 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -103,7 +103,7 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void)
 
 /*
  * _TIF_ALLWORK_MASK includes SYSCALL_TRACE, but we don't need it.
- * SYSCALL_TRACE is anways seperately/unconditionally tested right after a
+ * SYSCALL_TRACE is anyway seperately/unconditionally tested right after a
  * syscall, so all that reamins to be tested is _TIF_WORK_MASK
  */
 
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index d1da6032b715..a78d5670884f 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -32,7 +32,7 @@
 #define __kernel_ok		(segment_eq(get_fs(), KERNEL_DS))
 
 /*
- * Algorthmically, for __user_ok() we want do:
+ * Algorithmically, for __user_ok() we want do:
  * 	(start < TASK_SIZE) && (start+len < TASK_SIZE)
  * where TASK_SIZE could either be retrieved from thread_info->addr_limit or
  * emitted directly in code.
diff --git a/arch/arc/include/uapi/asm/swab.h b/arch/arc/include/uapi/asm/swab.h
index 095599a73195..71f3918b0fc3 100644
--- a/arch/arc/include/uapi/asm/swab.h
+++ b/arch/arc/include/uapi/asm/swab.h
@@ -74,7 +74,7 @@
 	__tmp ^ __in;						\
 })
 
-#elif (ARC_BSWAP_TYPE == 2)	/* Custom single cycle bwap instruction */
+#elif (ARC_BSWAP_TYPE == 2)	/* Custom single cycle bswap instruction */
 
 #define __arch_swab32(x)						\
 ({									\
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index 6fd48021324b..08f03d9b5b3e 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -108,7 +108,7 @@ static void arc_perf_event_update(struct perf_event *event,
 	int64_t delta = new_raw_count - prev_raw_count;
 
 	/*
-	 * We don't afaraid of hwc->prev_count changing beneath our feet
+	 * We aren't afraid of hwc->prev_count changing beneath our feet
 	 * because there's no way for us to re-enter this function anytime.
 	 */
 	local64_set(&hwc->prev_count, new_raw_count);
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index f63b8bfefb0c..2ee7a4d758a8 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -392,7 +392,7 @@ void __init setup_arch(char **cmdline_p)
 		/*
 		 * If we are here, it is established that @uboot_arg didn't
 		 * point to DT blob. Instead if u-boot says it is cmdline,
-		 * Appent to embedded DT cmdline.
+		 * append to embedded DT cmdline.
 		 * setup_machine_fdt() would have populated @boot_command_line
 		 */
 		if (uboot_tag == 1) {
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index 004b7f0bc76c..6cb3736b6b83 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -34,7 +34,7 @@
  *  -ViXS were still seeing crashes when using insmod to load drivers.
  *   It turned out that the code to change Execute permssions for TLB entries
  *   of user was not guarded for interrupts (mod_tlb_permission)
- *   This was cauing TLB entries to be overwritten on unrelated indexes
+ *   This was causing TLB entries to be overwritten on unrelated indexes
  *
  * Vineetg: July 15th 2008: Bug #94183
  *  -Exception happens in Delay slot of a JMP, and before user space resumes,
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index a6f91e88ce36..934150e7ac48 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -276,7 +276,7 @@ static int tlb_stats_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-/* called on user read(): display the couters */
+/* called on user read(): display the counters */
 static ssize_t tlb_stats_output(struct file *file,	/* file descriptor */
 				char __user *user_buf,	/* user buffer */
 				size_t len,		/* length of buffer */
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 9e5eddbb856f..5a294b2c3cb3 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -215,7 +215,7 @@ slc_chk:
  * ------------------
  * This ver of MMU supports variable page sizes (1k-16k): although Linux will
  * only support 8k (default), 16k and 4k.
- * However from hardware perspective, smaller page sizes aggrevate aliasing
+ * However from hardware perspective, smaller page sizes aggravate aliasing
  * meaning more vaddr bits needed to disambiguate the cache-line-op ;
  * the existing scheme of piggybacking won't work for certain configurations.
  * Two new registers IC_PTAG and DC_PTAG inttoduced.
@@ -302,7 +302,7 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 
 	/*
 	 * This is technically for MMU v4, using the MMU v3 programming model
-	 * Special work for HS38 aliasing I-cache configuratino with PAE40
+	 * Special work for HS38 aliasing I-cache configuration with PAE40
 	 *   - upper 8 bits of paddr need to be written into PTAG_HI
 	 *   - (and needs to be written before the lower 32 bits)
 	 * Note that PTAG_HI is hoisted outside the line loop
@@ -936,7 +936,7 @@ void arc_cache_init(void)
 			      ic->ver, CONFIG_ARC_MMU_VER);
 
 		/*
-		 * In MMU v4 (HS38x) the alising icache config uses IVIL/PTAG
+		 * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG
 		 * pair to provide vaddr/paddr respectively, just as in MMU v3
 		 */
 		if (is_isa_arcv2() && ic->alias)
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 8c8e36fa5659..73d7e4c75b7d 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -10,7 +10,7 @@
  * DMA Coherent API Notes
  *
  * I/O is inherently non-coherent on ARC. So a coherent DMA buffer is
- * implemented by accessintg it using a kernel virtual address, with
+ * implemented by accessing it using a kernel virtual address, with
  * Cache bit off in the TLB entry.
  *
  * The default DMA address == Phy address which is 0x8000_0000 based.
-- 
cgit v1.2.3