Patches contributed by Eötvös Lorand University

<<Prev 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149[150]151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 Next>>

commit 28e7d82d6325734c7ecd1f223b892701f7a8c171
Merge: c99dbbe9f8f6 cef30b3a84e1
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jan 19 17:12:20 2009 +0100

    Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/travis/linux-2.6-cpus4096-for-ingo into cpus4096

commit 7890ba8c87604ca4c2c73f7de846bf5305d743e4
Merge: 99937d6455ce b2b062b81633
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jan 19 12:36:09 2009 +0100

    Merge branch 'stackprotector' into core/percpu

commit 4092762aebfe55c1f8e31440b80a053c2dbe519b
Merge: 745b1626dd71 1de9e8e70f5a
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun Jan 18 20:15:05 2009 +0100

    Merge branch 'tracing/ftrace'; commit 'v2.6.29-rc2' into tracing/core

commit 5662a2f8e7313f78d6b17ab383f3e4f04971c335
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun Jan 18 19:37:21 2009 +0100

    x86, rdc321x: remove/move leftover files
    
    Impact: cleanup
    
    Move/remove leftover RDC321 files. Now that it's not a subarch anymore,
    arch/x86/mach-rdc321x and arch/x86/include/asm/mach-rdc321x/ are not
    needed.
    
    One include file was still in use: rdc321x_defs.h, move that to the
    generic x86 asm header directory.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/mach-rdc321x/gpio.h b/arch/x86/include/asm/mach-rdc321x/gpio.h
deleted file mode 100644
index c210ab5788b0..000000000000
--- a/arch/x86/include/asm/mach-rdc321x/gpio.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef _ASM_X86_MACH_RDC321X_GPIO_H
-#define _ASM_X86_MACH_RDC321X_GPIO_H
-
-#include <linux/kernel.h>
-
-extern int rdc_gpio_get_value(unsigned gpio);
-extern void rdc_gpio_set_value(unsigned gpio, int value);
-extern int rdc_gpio_direction_input(unsigned gpio);
-extern int rdc_gpio_direction_output(unsigned gpio, int value);
-extern int rdc_gpio_request(unsigned gpio, const char *label);
-extern void rdc_gpio_free(unsigned gpio);
-extern void __init rdc321x_gpio_setup(void);
-
-/* Wrappers for the arch-neutral GPIO API */
-
-static inline int gpio_request(unsigned gpio, const char *label)
-{
-	return rdc_gpio_request(gpio, label);
-}
-
-static inline void gpio_free(unsigned gpio)
-{
-	might_sleep();
-	rdc_gpio_free(gpio);
-}
-
-static inline int gpio_direction_input(unsigned gpio)
-{
-	return rdc_gpio_direction_input(gpio);
-}
-
-static inline int gpio_direction_output(unsigned gpio, int value)
-{
-	return rdc_gpio_direction_output(gpio, value);
-}
-
-static inline int gpio_get_value(unsigned gpio)
-{
-	return rdc_gpio_get_value(gpio);
-}
-
-static inline void gpio_set_value(unsigned gpio, int value)
-{
-	rdc_gpio_set_value(gpio, value);
-}
-
-static inline int gpio_to_irq(unsigned gpio)
-{
-	return gpio;
-}
-
-static inline int irq_to_gpio(unsigned irq)
-{
-	return irq;
-}
-
-/* For cansleep */
-#include <asm-generic/gpio.h>
-
-#endif /* _ASM_X86_MACH_RDC321X_GPIO_H */
diff --git a/arch/x86/include/asm/mach-rdc321x/rdc321x_defs.h b/arch/x86/include/asm/rdc321x_defs.h
similarity index 100%
rename from arch/x86/include/asm/mach-rdc321x/rdc321x_defs.h
rename to arch/x86/include/asm/rdc321x_defs.h
diff --git a/arch/x86/mach-rdc321x/Makefile b/arch/x86/mach-rdc321x/Makefile
deleted file mode 100644
index 8325b4ca431c..000000000000
--- a/arch/x86/mach-rdc321x/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for the RDC321x specific parts of the kernel
-#
-obj-$(CONFIG_X86_RDC321X)        := gpio.o platform.o
-
diff --git a/arch/x86/mach-rdc321x/gpio.c b/arch/x86/mach-rdc321x/gpio.c
deleted file mode 100644
index 247f33d3a407..000000000000
--- a/arch/x86/mach-rdc321x/gpio.c
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- *  GPIO support for RDC SoC R3210/R8610
- *
- *  Copyright (C) 2007, Florian Fainelli <florian@openwrt.org>
- *  Copyright (C) 2008, Volker Weiss <dev@tintuc.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-
-#include <linux/spinlock.h>
-#include <linux/io.h>
-#include <linux/types.h>
-#include <linux/module.h>
-
-#include <asm/gpio.h>
-#include <asm/mach-rdc321x/rdc321x_defs.h>
-
-
-/* spin lock to protect our private copy of GPIO data register plus
-   the access to PCI conf registers. */
-static DEFINE_SPINLOCK(gpio_lock);
-
-/* copy of GPIO data registers */
-static u32 gpio_data_reg1;
-static u32 gpio_data_reg2;
-
-static u32 gpio_request_data[2];
-
-
-static inline void rdc321x_conf_write(unsigned addr, u32 value)
-{
-	outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
-	outl(value, RDC3210_CFGREG_DATA);
-}
-
-static inline void rdc321x_conf_or(unsigned addr, u32 value)
-{
-	outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
-	value |= inl(RDC3210_CFGREG_DATA);
-	outl(value, RDC3210_CFGREG_DATA);
-}
-
-static inline u32 rdc321x_conf_read(unsigned addr)
-{
-	outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
-
-	return inl(RDC3210_CFGREG_DATA);
-}
-
-/* configure pin as GPIO */
-static void rdc321x_configure_gpio(unsigned gpio)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&gpio_lock, flags);
-	rdc321x_conf_or(gpio < 32
-		? RDC321X_GPIO_CTRL_REG1 : RDC321X_GPIO_CTRL_REG2,
-		1 << (gpio & 0x1f));
-	spin_unlock_irqrestore(&gpio_lock, flags);
-}
-
-/* initially setup the 2 copies of the gpio data registers.
-   This function must be called by the platform setup code. */
-void __init rdc321x_gpio_setup()
-{
-	/* this might not be, what others (BIOS, bootloader, etc.)
-	   wrote to these registers before, but it's a good guess. Still
-	   better than just using 0xffffffff. */
-
-	gpio_data_reg1 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG1);
-	gpio_data_reg2 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG2);
-}
-
-/* determine, if gpio number is valid */
-static inline int rdc321x_is_gpio(unsigned gpio)
-{
-	return gpio <= RDC321X_MAX_GPIO;
-}
-
-/* request GPIO */
-int rdc_gpio_request(unsigned gpio, const char *label)
-{
-	unsigned long flags;
-
-	if (!rdc321x_is_gpio(gpio))
-		return -EINVAL;
-
-	spin_lock_irqsave(&gpio_lock, flags);
-	if (gpio_request_data[(gpio & 0x20) ? 1 : 0] & (1 << (gpio & 0x1f)))
-		goto inuse;
-	gpio_request_data[(gpio & 0x20) ? 1 : 0] |= (1 << (gpio & 0x1f));
-	spin_unlock_irqrestore(&gpio_lock, flags);
-
-	return 0;
-inuse:
-	spin_unlock_irqrestore(&gpio_lock, flags);
-	return -EINVAL;
-}
-EXPORT_SYMBOL(rdc_gpio_request);
-
-/* release previously-claimed GPIO */
-void rdc_gpio_free(unsigned gpio)
-{
-	unsigned long flags;
-
-	if (!rdc321x_is_gpio(gpio))
-		return;
-
-	spin_lock_irqsave(&gpio_lock, flags);
-	gpio_request_data[(gpio & 0x20) ? 1 : 0] &= ~(1 << (gpio & 0x1f));
-	spin_unlock_irqrestore(&gpio_lock, flags);
-}
-EXPORT_SYMBOL(rdc_gpio_free);
-
-/* read GPIO pin */
-int rdc_gpio_get_value(unsigned gpio)
-{
-	u32 reg;
-	unsigned long flags;
-
-	spin_lock_irqsave(&gpio_lock, flags);
-	reg = rdc321x_conf_read(gpio < 32
-		? RDC321X_GPIO_DATA_REG1 : RDC321X_GPIO_DATA_REG2);
-	spin_unlock_irqrestore(&gpio_lock, flags);
-
-	return (1 << (gpio & 0x1f)) & reg ? 1 : 0;
-}
-EXPORT_SYMBOL(rdc_gpio_get_value);
-
-/* set GPIO pin to value */
-void rdc_gpio_set_value(unsigned gpio, int value)
-{
-	unsigned long flags;
-	u32 reg;
-
-	reg = 1 << (gpio & 0x1f);
-	if (gpio < 32) {
-		spin_lock_irqsave(&gpio_lock, flags);
-		if (value)
-			gpio_data_reg1 |= reg;
-		else
-			gpio_data_reg1 &= ~reg;
-		rdc321x_conf_write(RDC321X_GPIO_DATA_REG1, gpio_data_reg1);
-		spin_unlock_irqrestore(&gpio_lock, flags);
-	} else {
-		spin_lock_irqsave(&gpio_lock, flags);
-		if (value)
-			gpio_data_reg2 |= reg;
-		else
-			gpio_data_reg2 &= ~reg;
-		rdc321x_conf_write(RDC321X_GPIO_DATA_REG2, gpio_data_reg2);
-		spin_unlock_irqrestore(&gpio_lock, flags);
-	}
-}
-EXPORT_SYMBOL(rdc_gpio_set_value);
-
-/* configure GPIO pin as input */
-int rdc_gpio_direction_input(unsigned gpio)
-{
-	if (!rdc321x_is_gpio(gpio))
-		return -EINVAL;
-
-	rdc321x_configure_gpio(gpio);
-
-	return 0;
-}
-EXPORT_SYMBOL(rdc_gpio_direction_input);
-
-/* configure GPIO pin as output and set value */
-int rdc_gpio_direction_output(unsigned gpio, int value)
-{
-	if (!rdc321x_is_gpio(gpio))
-		return -EINVAL;
-
-	gpio_set_value(gpio, value);
-	rdc321x_configure_gpio(gpio);
-
-	return 0;
-}
-EXPORT_SYMBOL(rdc_gpio_direction_output);
diff --git a/arch/x86/mach-rdc321x/platform.c b/arch/x86/mach-rdc321x/platform.c
deleted file mode 100644
index 4f4e50c3ad3b..000000000000
--- a/arch/x86/mach-rdc321x/platform.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- *  Generic RDC321x platform devices
- *
- *  Copyright (C) 2007 Florian Fainelli <florian@openwrt.org>
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version 2
- *  of the License, or (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the
- *  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- *  Boston, MA  02110-1301, USA.
- *
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/leds.h>
-
-#include <asm/gpio.h>
-
-/* LEDS */
-static struct gpio_led default_leds[] = {
-	{ .name = "rdc:dmz", .gpio = 1, },
-};
-
-static struct gpio_led_platform_data rdc321x_led_data = {
-	.num_leds = ARRAY_SIZE(default_leds),
-	.leds = default_leds,
-};
-
-static struct platform_device rdc321x_leds = {
-	.name = "leds-gpio",
-	.id = -1,
-	.dev = {
-		.platform_data = &rdc321x_led_data,
-	}
-};
-
-/* Watchdog */
-static struct platform_device rdc321x_wdt = {
-	.name = "rdc321x-wdt",
-	.id = -1,
-	.num_resources = 0,
-};
-
-static struct platform_device *rdc321x_devs[] = {
-	&rdc321x_leds,
-	&rdc321x_wdt
-};
-
-static int __init rdc_board_setup(void)
-{
-	rdc321x_gpio_setup();
-
-	return platform_add_devices(rdc321x_devs, ARRAY_SIZE(rdc321x_devs));
-}
-
-arch_initcall(rdc_board_setup);
diff --git a/drivers/watchdog/rdc321x_wdt.c b/drivers/watchdog/rdc321x_wdt.c
index bf92802f2bbe..36e221beedcd 100644
--- a/drivers/watchdog/rdc321x_wdt.c
+++ b/drivers/watchdog/rdc321x_wdt.c
@@ -37,7 +37,7 @@
 #include <linux/io.h>
 #include <linux/uaccess.h>
 
-#include <asm/mach-rdc321x/rdc321x_defs.h>
+#include <asm/rdc321x_defs.h>
 
 #define RDC_WDT_MASK	0x80000000 /* Mask */
 #define RDC_WDT_EN	0x00800000 /* Enable bit */

commit b2b062b8163391c42b3219d466ca1ac9742b9c7b
Merge: a9de18eb761f 99937d6455ce
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun Jan 18 18:37:14 2009 +0100

    Merge branch 'core/percpu' into stackprotector
    
    Conflicts:
            arch/x86/include/asm/pda.h
            arch/x86/include/asm/system.h
    
    Also, moved include/asm-x86/stackprotector.h to arch/x86/include/asm.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --cc arch/x86/include/asm/pda.h
index 3fea2fdb3302,c31ca048a901..5976cd803e9a
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@@ -9,114 -11,30 +11,28 @@@
  
  /* Per processor datastructure. %gs points to it while the kernel runs */
  struct x8664_pda {
- 	struct task_struct *pcurrent;	/* 0  Current process */
- 	unsigned long data_offset;	/* 8 Per cpu data offset from linker
- 					   address */
- 	unsigned long kernelstack;	/* 16 top of kernel stack for current */
- 	unsigned long oldrsp;		/* 24 user rsp for system call */
- 	int irqcount;			/* 32 Irq nesting counter. Starts -1 */
- 	unsigned int cpunumber;		/* 36 Logical CPU number */
+ 	unsigned long unused1;
+ 	unsigned long unused2;
+ 	unsigned long unused3;
+ 	unsigned long unused4;
+ 	int unused5;
+ 	unsigned int unused6;		/* 36 was cpunumber */
 -#ifdef CONFIG_CC_STACKPROTECTOR
  	unsigned long stack_canary;	/* 40 stack canary value */
  					/* gcc-ABI: this canary MUST be at
  					   offset 40!!! */
- 	char *irqstackptr;
- 	short nodenumber;		/* number of current node (32k max) */
 -#endif
  	short in_bootmem;		/* pda lives in bootmem */
- 	unsigned int __softirq_pending;
- 	unsigned int __nmi_count;	/* number of NMI on this CPUs */
- 	short mmu_state;
- 	short isidle;
- 	struct mm_struct *active_mm;
- 	unsigned apic_timer_irqs;
- 	unsigned irq0_irqs;
- 	unsigned irq_resched_count;
- 	unsigned irq_call_count;
- 	unsigned irq_tlb_count;
- 	unsigned irq_thermal_count;
- 	unsigned irq_threshold_count;
- 	unsigned irq_spurious_count;
  } ____cacheline_aligned_in_smp;
  
- extern struct x8664_pda **_cpu_pda;
+ DECLARE_PER_CPU(struct x8664_pda, __pda);
  extern void pda_init(int);
  
- #define cpu_pda(i) (_cpu_pda[i])
+ #define cpu_pda(cpu)		(&per_cpu(__pda, cpu))
  
- /*
-  * There is no fast way to get the base address of the PDA, all the accesses
-  * have to mention %fs/%gs.  So it needs to be done this Torvaldian way.
-  */
- extern void __bad_pda_field(void) __attribute__((noreturn));
- 
- /*
-  * proxy_pda doesn't actually exist, but tell gcc it is accessed for
-  * all PDA accesses so it gets read/write dependencies right.
-  */
- extern struct x8664_pda _proxy_pda;
- 
- #define pda_offset(field) offsetof(struct x8664_pda, field)
- 
- #define pda_to_op(op, field, val)					\
- do {									\
- 	typedef typeof(_proxy_pda.field) T__;				\
- 	if (0) { T__ tmp__; tmp__ = (val); }	/* type checking */	\
- 	switch (sizeof(_proxy_pda.field)) {				\
- 	case 2:								\
- 		asm(op "w %1,%%gs:%c2" :				\
- 		    "+m" (_proxy_pda.field) :				\
- 		    "ri" ((T__)val),					\
- 		    "i"(pda_offset(field)));				\
- 		break;							\
- 	case 4:								\
- 		asm(op "l %1,%%gs:%c2" :				\
- 		    "+m" (_proxy_pda.field) :				\
- 		    "ri" ((T__)val),					\
- 		    "i" (pda_offset(field)));				\
- 		break;							\
- 	case 8:								\
- 		asm(op "q %1,%%gs:%c2":					\
- 		    "+m" (_proxy_pda.field) :				\
- 		    "ri" ((T__)val),					\
- 		    "i"(pda_offset(field)));				\
- 		break;							\
- 	default:							\
- 		__bad_pda_field();					\
- 	}								\
- } while (0)
- 
- #define pda_from_op(op, field)			\
- ({						\
- 	typeof(_proxy_pda.field) ret__;		\
- 	switch (sizeof(_proxy_pda.field)) {	\
- 	case 2:					\
- 		asm(op "w %%gs:%c1,%0" :	\
- 		    "=r" (ret__) :		\
- 		    "i" (pda_offset(field)),	\
- 		    "m" (_proxy_pda.field));	\
- 		break;				\
- 	case 4:					\
- 		asm(op "l %%gs:%c1,%0":		\
- 		    "=r" (ret__):		\
- 		    "i" (pda_offset(field)),	\
- 		    "m" (_proxy_pda.field));	\
- 		break;				\
- 	case 8:					\
- 		asm(op "q %%gs:%c1,%0":		\
- 		    "=r" (ret__) :		\
- 		    "i" (pda_offset(field)),	\
- 		    "m" (_proxy_pda.field));	\
- 		break;				\
- 	default:				\
- 		__bad_pda_field();		\
- 	}					\
- 	ret__;					\
- })
- 
- #define read_pda(field)		pda_from_op("mov", field)
- #define write_pda(field, val)	pda_to_op("mov", field, val)
- #define add_pda(field, val)	pda_to_op("add", field, val)
- #define sub_pda(field, val)	pda_to_op("sub", field, val)
- #define or_pda(field, val)	pda_to_op("or", field, val)
+ #define read_pda(field)		percpu_read(__pda.field)
+ #define write_pda(field, val)	percpu_write(__pda.field, val)
+ #define add_pda(field, val)	percpu_add(__pda.field, val)
+ #define sub_pda(field, val)	percpu_sub(__pda.field, val)
+ #define or_pda(field, val)	percpu_or(__pda.field, val)
  
  /* This is not atomic against other CPUs -- CPU preemption needs to be off */
  #define test_and_clear_bit_pda(bit, field)				\
@@@ -130,8 -42,4 +40,6 @@@
  
  #endif
  
- #define PDA_STACKOFFSET (5*8)
- 
 +#define refresh_stack_canary() write_pda(stack_canary, current->stack_canary)
 +
  #endif /* _ASM_X86_PDA_H */
diff --cc arch/x86/include/asm/stackprotector.h
index 3baf7ad89be1,000000000000..c7f0d10bae7b
mode 100644,000000..100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@@ -1,38 -1,0 +1,39 @@@
 +#ifndef _ASM_STACKPROTECTOR_H
 +#define _ASM_STACKPROTECTOR_H 1
 +
 +#include <asm/tsc.h>
++#include <asm/pda.h>
 +
 +/*
 + * Initialize the stackprotector canary value.
 + *
 + * NOTE: this must only be called from functions that never return,
 + * and it must always be inlined.
 + */
 +static __always_inline void boot_init_stack_canary(void)
 +{
 +	u64 canary;
 +	u64 tsc;
 +
 +	/*
 +	 * If we're the non-boot CPU, nothing set the PDA stack
 +	 * canary up for us - and if we are the boot CPU we have
 +	 * a 0 stack canary. This is a good place for updating
 +	 * it, as we wont ever return from this function (so the
 +	 * invalid canaries already on the stack wont ever
 +	 * trigger).
 +	 *
 +	 * We both use the random pool and the current TSC as a source
 +	 * of randomness. The TSC only matters for very early init,
 +	 * there it already has some randomness on most systems. Later
 +	 * on during the bootup the random pool has true entropy too.
 +	 */
 +	get_random_bytes(&canary, sizeof(canary));
 +	tsc = __native_read_tsc();
 +	canary += tsc + (tsc << 32UL);
 +
 +	current->stack_canary = canary;
 +	write_pda(stack_canary, canary);
 +}
 +
 +#endif
diff --cc arch/x86/include/asm/system.h
index 2f6340a44291,d1dc27dba36d..8cadfe9b1194
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@@ -94,9 -94,7 +94,9 @@@ do {									
  	     "call __switch_to\n\t"					  \
  	     ".globl thread_return\n"					  \
  	     "thread_return:\n\t"					  \
- 	     "movq %%gs:%P[pda_pcurrent],%%rsi\n\t"			  \
+ 	     "movq "__percpu_arg([current_task])",%%rsi\n\t"		  \
 +	     "movq %P[task_canary](%%rsi),%%r8\n\t"			  \
 +	     "movq %%r8,%%gs:%P[pda_canary]\n\t"			  \
  	     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
  	     LOCK_PREFIX "btr  %[tif_fork],%P[ti_flags](%%r8)\n\t"	  \
  	     "movq %%rax,%%rdi\n\t" 					  \
@@@ -108,9 -106,7 +108,9 @@@
  	       [ti_flags] "i" (offsetof(struct thread_info, flags)),	  \
  	       [tif_fork] "i" (TIF_FORK),			  	  \
  	       [thread_info] "i" (offsetof(struct task_struct, stack)),   \
 -	       [current_task] "m" (per_cpu_var(current_task))		  \
 +	       [task_canary] "i" (offsetof(struct task_struct, stack_canary)),\
- 	       [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)), \
++	       [current_task] "m" (per_cpu_var(current_task)),		  \
 +	       [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))\
  	     : "memory", "cc" __EXTRA_CLOBBER)
  #endif
  
diff --cc arch/x86/kernel/process_64.c
index efb0396e19bf,4523ff88a69d..aa89eabf09e0
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@@ -625,14 -619,15 +631,14 @@@ __switch_to(struct task_struct *prev_p
  	/*
  	 * Switch the PDA and FPU contexts.
  	 */
- 	prev->usersp = read_pda(oldrsp);
- 	write_pda(oldrsp, next->usersp);
- 	write_pda(pcurrent, next_p);
+ 	prev->usersp = percpu_read(old_rsp);
+ 	percpu_write(old_rsp, next->usersp);
+ 	percpu_write(current_task, next_p);
  
- 	write_pda(kernelstack,
+ 	percpu_write(kernel_stack,
  		  (unsigned long)task_stack_page(next_p) +
- 		  THREAD_SIZE - PDA_STACKOFFSET);
+ 		  THREAD_SIZE - KERNEL_STACK_OFFSET);
  #ifdef CONFIG_CC_STACKPROTECTOR
 -	write_pda(stack_canary, next_p->stack_canary);
  	/*
  	 * Build time only check to make sure the stack_canary is at
  	 * offset 40 in the pda; this is a gcc ABI requirement
diff --cc init/main.c
index 07da4dea50c3,844209453c02..bfe4fb0c9842
--- a/init/main.c
+++ b/init/main.c
@@@ -561,15 -537,8 +538,14 @@@ asmlinkage void __init start_kernel(voi
  	 * Need to run as early as possible, to initialize the
  	 * lockdep hash:
  	 */
- 	unwind_init();
  	lockdep_init();
  	debug_objects_early_init();
 +
 +	/*
 +	 * Set up the the initial canary ASAP:
 +	 */
 +	boot_init_stack_canary();
 +
  	cgroup_init_early();
  
  	local_irq_disable();

commit af37501c792107c2bde1524bdae38d9a247b841a
Merge: d859e29fe34c 99937d6455ce
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun Jan 18 18:15:49 2009 +0100

    Merge branch 'core/percpu' into perfcounters/core
    
    Conflicts:
            arch/x86/include/asm/pda.h
    
    We merge tip/core/percpu into tip/perfcounters/core because of a
    semantic and contextual conflict: the former eliminates the PDA,
    while the latter extends it with apic_perf_irqs field.
    
    Resolve the conflict by moving the new field to the irq_cpustat
    structure on 64-bit too.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --cc arch/x86/include/asm/hardirq_64.h
index b5a6b5d56704,a65bab20f6ce..42930b279215
--- a/arch/x86/include/asm/hardirq_64.h
+++ b/arch/x86/include/asm/hardirq_64.h
@@@ -3,9 -3,23 +3,24 @@@
  
  #include <linux/threads.h>
  #include <linux/irq.h>
- #include <asm/pda.h>
  #include <asm/apic.h>
  
+ typedef struct {
+ 	unsigned int __softirq_pending;
+ 	unsigned int __nmi_count;	/* arch dependent */
+ 	unsigned int apic_timer_irqs;	/* arch dependent */
++	unsigned int apic_perf_irqs;	/* arch dependent */
+ 	unsigned int irq0_irqs;
+ 	unsigned int irq_resched_count;
+ 	unsigned int irq_call_count;
+ 	unsigned int irq_tlb_count;
+ 	unsigned int irq_thermal_count;
+ 	unsigned int irq_spurious_count;
+ 	unsigned int irq_threshold_count;
+ } ____cacheline_aligned irq_cpustat_t;
+ 
+ DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
+ 
  /* We can have at most NR_VECTORS irqs routed to a cpu at a time */
  #define MAX_HARDIRQS_PER_CPU NR_VECTORS

commit 99937d6455cea95405ac681c86a857d0fcd530bd
Merge: 74e7904559a1 87b264065880
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun Jan 18 17:41:32 2009 +0100

    Merge branch 'tj-percpu' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into core/percpu

commit 74296a8ed6aa3c5bf672808ada690de7ba323ecc
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Jan 16 17:43:50 2009 +0100

    irq: provide debug_poll_all_shared_irqs() method under CONFIG_DEBUG_SHIRQ
    
    Provide a shared interrupt debug facility under CONFIG_DEBUG_SHIRQ:
    it uses the existing irqpoll facilities to iterate through all
    registered interrupt handlers and call those which can handle shared
    IRQ lines.
    
    This can be handy for suspend/resume debugging: if we call this function
    early during resume we can trigger crashes in those drivers which have
    incorrect assumptions about when exactly their ISRs will be called
    during suspend/resume.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 9127f6b51a39..468e3a25a4a1 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -462,6 +462,12 @@ static inline void init_irq_proc(void)
 }
 #endif
 
+#if defined(CONFIG_GENERIC_HARDIRQS) && defined(CONFIG_DEBUG_SHIRQ)
+extern void debug_poll_all_shared_irqs(void);
+#else
+static inline void debug_poll_all_shared_irqs(void) { }
+#endif
+
 int show_interrupts(struct seq_file *p, void *v);
 
 struct irq_desc;
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index dd364c11e56e..4d568294de3e 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -104,7 +104,7 @@ static int misrouted_irq(int irq)
 	return ok;
 }
 
-static void poll_spurious_irqs(unsigned long dummy)
+static void poll_all_shared_irqs(void)
 {
 	struct irq_desc *desc;
 	int i;
@@ -123,11 +123,23 @@ static void poll_spurious_irqs(unsigned long dummy)
 
 		try_one_irq(i, desc);
 	}
+}
+
+static void poll_spurious_irqs(unsigned long dummy)
+{
+	poll_all_shared_irqs();
 
 	mod_timer(&poll_spurious_irq_timer,
 		  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
 }
 
+#ifdef CONFIG_DEBUG_SHIRQ
+void debug_poll_all_shared_irqs(void)
+{
+	poll_all_shared_irqs();
+}
+#endif
+
 /*
  * If 99,900 of the previous 100,000 interrupts have not been handled
  * then assume that the IRQ is stuck in some manner. Drop a diagnostic

commit 5a2dd72abdae75ea2960145e0549635ce4e0be96
Merge: efdc64f0c792 7cb36b6ccdca
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Jan 16 17:46:22 2009 +0100

    Merge branch 'linus' into irq/genirq

commit 6dbde3530850d4d8bfc1b6bd4006d92786a2787f
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Jan 15 22:15:53 2009 +0900

    percpu: add optimized generic percpu accessors
    
    It is an optimization and a cleanup, and adds the following new
    generic percpu methods:
    
      percpu_read()
      percpu_write()
      percpu_add()
      percpu_sub()
      percpu_and()
      percpu_or()
      percpu_xor()
    
    and implements support for them on x86. (other architectures will fall
    back to a default implementation)
    
    The advantage is that for example to read a local percpu variable,
    instead of this sequence:
    
     return __get_cpu_var(var);
    
     ffffffff8102ca2b:      48 8b 14 fd 80 09 74    mov    -0x7e8bf680(,%rdi,8),%rdx
     ffffffff8102ca32:      81
     ffffffff8102ca33:      48 c7 c0 d8 59 00 00    mov    $0x59d8,%rax
     ffffffff8102ca3a:      48 8b 04 10             mov    (%rax,%rdx,1),%rax
    
    We can get a single instruction by using the optimized variants:
    
     return percpu_read(var);
    
     ffffffff8102ca3f:      65 48 8b 05 91 8f fd    mov    %gs:0x7efd8f91(%rip),%rax
    
    I also cleaned up the x86-specific APIs and made the x86 code use
    these new generic percpu primitives.
    
    tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out
        * added percpu_and() for completeness's sake
        * made generic percpu ops atomic against preemption
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 0930b4f8d672..0728480f5c56 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -10,7 +10,7 @@ struct task_struct;
 DECLARE_PER_CPU(struct task_struct *, current_task);
 static __always_inline struct task_struct *get_current(void)
 {
-	return x86_read_percpu(current_task);
+	return percpu_read(current_task);
 }
 
 #else /* X86_32 */
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h
index 86afd7473457..d7ed33ee94e9 100644
--- a/arch/x86/include/asm/irq_regs_32.h
+++ b/arch/x86/include/asm/irq_regs_32.h
@@ -15,7 +15,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs);
 
 static inline struct pt_regs *get_irq_regs(void)
 {
-	return x86_read_percpu(irq_regs);
+	return percpu_read(irq_regs);
 }
 
 static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
@@ -23,7 +23,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
 	struct pt_regs *old_regs;
 
 	old_regs = get_irq_regs();
-	x86_write_percpu(irq_regs, new_regs);
+	percpu_write(irq_regs, new_regs);
 
 	return old_regs;
 }
diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h
index 7e98ce1d2c0e..08b53454f831 100644
--- a/arch/x86/include/asm/mmu_context_32.h
+++ b/arch/x86/include/asm/mmu_context_32.h
@@ -4,8 +4,8 @@
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
 #ifdef CONFIG_SMP
-	if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK)
-		x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY);
+	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+		percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
 #endif
 }
 
@@ -19,8 +19,8 @@ static inline void switch_mm(struct mm_struct *prev,
 		/* stop flush ipis for the previous mm */
 		cpu_clear(cpu, prev->cpu_vm_mask);
 #ifdef CONFIG_SMP
-		x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
-		x86_write_percpu(cpu_tlbstate.active_mm, next);
+		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+		percpu_write(cpu_tlbstate.active_mm, next);
 #endif
 		cpu_set(cpu, next->cpu_vm_mask);
 
@@ -35,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev,
 	}
 #ifdef CONFIG_SMP
 	else {
-		x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
-		BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next);
+		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+		BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
 
 		if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
 			/* We were in lazy tlb mode and leave_mm disabled
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index e3d3a081d798..47f274fe6953 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -45,11 +45,11 @@ extern void pda_init(int);
 
 #define cpu_pda(cpu)		(&per_cpu(__pda, cpu))
 
-#define read_pda(field)		x86_read_percpu(__pda.field)
-#define write_pda(field, val)	x86_write_percpu(__pda.field, val)
-#define add_pda(field, val)	x86_add_percpu(__pda.field, val)
-#define sub_pda(field, val)	x86_sub_percpu(__pda.field, val)
-#define or_pda(field, val)	x86_or_percpu(__pda.field, val)
+#define read_pda(field)		percpu_read(__pda.field)
+#define write_pda(field, val)	percpu_write(__pda.field, val)
+#define add_pda(field, val)	percpu_add(__pda.field, val)
+#define sub_pda(field, val)	percpu_sub(__pda.field, val)
+#define or_pda(field, val)	percpu_or(__pda.field, val)
 
 /* This is not atomic against other CPUs -- CPU preemption needs to be off */
 #define test_and_clear_bit_pda(bit, field)				\
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 328b31a429d7..03aa4b00a1c3 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -40,16 +40,11 @@
 
 #ifdef CONFIG_SMP
 #define __percpu_seg_str	"%%"__stringify(__percpu_seg)":"
-#define __my_cpu_offset		x86_read_percpu(this_cpu_off)
+#define __my_cpu_offset		percpu_read(this_cpu_off)
 #else
 #define __percpu_seg_str
 #endif
 
-#include <asm-generic/percpu.h>
-
-/* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU(unsigned long, this_cpu_off);
-
 /* For arch-specific code, we can use direct single-insn ops (they
  * don't give an lvalue though). */
 extern void __bad_percpu_size(void);
@@ -115,11 +110,13 @@ do {							\
 	ret__;						\
 })
 
-#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
-#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val)
-#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val)
-#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val)
-#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val)
+#define percpu_read(var)	percpu_from_op("mov", per_cpu__##var)
+#define percpu_write(var, val)	percpu_to_op("mov", per_cpu__##var, val)
+#define percpu_add(var, val)	percpu_to_op("add", per_cpu__##var, val)
+#define percpu_sub(var, val)	percpu_to_op("sub", per_cpu__##var, val)
+#define percpu_and(var, val)	percpu_to_op("and", per_cpu__##var, val)
+#define percpu_or(var, val)	percpu_to_op("or", per_cpu__##var, val)
+#define percpu_xor(var, val)	percpu_to_op("xor", per_cpu__##var, val)
 
 /* This is not atomic against other CPUs -- CPU preemption needs to be off */
 #define x86_test_and_clear_bit_percpu(bit, var)				\
@@ -131,6 +128,11 @@ do {							\
 	old__;								\
 })
 
+#include <asm-generic/percpu.h>
+
+/* We can use this directly for local CPU (faster). */
+DECLARE_PER_CPU(unsigned long, this_cpu_off);
+
 #ifdef CONFIG_X86_64
 extern void load_pda_offset(int cpu);
 #else
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 127415402ea1..c7bbbbe65d3f 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -160,7 +160,7 @@ extern unsigned disabled_cpus __cpuinitdata;
  * from the initial startup. We map APIC_BASE very early in page_setup(),
  * so this is correct in the x86 case.
  */
-#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
+#define raw_smp_processor_id() (percpu_read(cpu_number))
 extern int safe_smp_processor_id(void);
 
 #elif defined(CONFIG_X86_64_SMP)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a546f55c77b4..77d546817d94 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -591,7 +591,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	if (prev->gs | next->gs)
 		loadsegment(gs, next->gs);
 
-	x86_write_percpu(current_task, next_p);
+	percpu_write(current_task, next_p);
 
 	return prev_p;
 }
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index ec53818f4e38..e65449d0f7d9 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -34,8 +34,8 @@ static DEFINE_SPINLOCK(tlbstate_lock);
  */
 void leave_mm(int cpu)
 {
-	BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK);
-	cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask);
+	BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK);
+	cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
 	load_cr3(swapper_pg_dir);
 }
 EXPORT_SYMBOL_GPL(leave_mm);
@@ -103,8 +103,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
 		 * BUG();
 		 */
 
-	if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) {
-		if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) {
+	if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
+		if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
 			if (flush_va == TLB_FLUSH_ALL)
 				local_flush_tlb();
 			else
@@ -222,7 +222,7 @@ static void do_flush_tlb_all(void *info)
 	unsigned long cpu = smp_processor_id();
 
 	__flush_tlb_all();
-	if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY)
+	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
 		leave_mm(cpu);
 }
 
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 1a48368acb09..96f15b09a4c5 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -402,7 +402,7 @@ void __init find_smp_config(void)
 	     VOYAGER_SUS_IN_CONTROL_PORT);
 
 	current_thread_info()->cpu = boot_cpu_id;
-	x86_write_percpu(cpu_number, boot_cpu_id);
+	percpu_write(cpu_number, boot_cpu_id);
 }
 
 /*
@@ -1782,7 +1782,7 @@ static void __init voyager_smp_cpus_done(unsigned int max_cpus)
 void __init smp_setup_processor_id(void)
 {
 	current_thread_info()->cpu = hard_smp_processor_id();
-	x86_write_percpu(cpu_number, hard_smp_processor_id());
+	percpu_write(cpu_number, hard_smp_processor_id());
 }
 
 static void voyager_send_call_func(cpumask_t callmask)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 312414ef9365..75b94139e1f2 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -695,17 +695,17 @@ static void xen_write_cr0(unsigned long cr0)
 
 static void xen_write_cr2(unsigned long cr2)
 {
-	x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
+	percpu_read(xen_vcpu)->arch.cr2 = cr2;
 }
 
 static unsigned long xen_read_cr2(void)
 {
-	return x86_read_percpu(xen_vcpu)->arch.cr2;
+	return percpu_read(xen_vcpu)->arch.cr2;
 }
 
 static unsigned long xen_read_cr2_direct(void)
 {
-	return x86_read_percpu(xen_vcpu_info.arch.cr2);
+	return percpu_read(xen_vcpu_info.arch.cr2);
 }
 
 static void xen_write_cr4(unsigned long cr4)
@@ -718,12 +718,12 @@ static void xen_write_cr4(unsigned long cr4)
 
 static unsigned long xen_read_cr3(void)
 {
-	return x86_read_percpu(xen_cr3);
+	return percpu_read(xen_cr3);
 }
 
 static void set_current_cr3(void *v)
 {
-	x86_write_percpu(xen_current_cr3, (unsigned long)v);
+	percpu_write(xen_current_cr3, (unsigned long)v);
 }
 
 static void __xen_write_cr3(bool kernel, unsigned long cr3)
@@ -748,7 +748,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
 	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
 
 	if (kernel) {
-		x86_write_percpu(xen_cr3, cr3);
+		percpu_write(xen_cr3, cr3);
 
 		/* Update xen_current_cr3 once the batch has actually
 		   been submitted. */
@@ -764,7 +764,7 @@ static void xen_write_cr3(unsigned long cr3)
 
 	/* Update while interrupts are disabled, so its atomic with
 	   respect to ipis */
-	x86_write_percpu(xen_cr3, cr3);
+	percpu_write(xen_cr3, cr3);
 
 	__xen_write_cr3(true, cr3);
 
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index bb042608c602..2e8271431e1a 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -39,7 +39,7 @@ static unsigned long xen_save_fl(void)
 	struct vcpu_info *vcpu;
 	unsigned long flags;
 
-	vcpu = x86_read_percpu(xen_vcpu);
+	vcpu = percpu_read(xen_vcpu);
 
 	/* flag has opposite sense of mask */
 	flags = !vcpu->evtchn_upcall_mask;
@@ -62,7 +62,7 @@ static void xen_restore_fl(unsigned long flags)
 	   make sure we're don't switch CPUs between getting the vcpu
 	   pointer and updating the mask. */
 	preempt_disable();
-	vcpu = x86_read_percpu(xen_vcpu);
+	vcpu = percpu_read(xen_vcpu);
 	vcpu->evtchn_upcall_mask = flags;
 	preempt_enable_no_resched();
 
@@ -83,7 +83,7 @@ static void xen_irq_disable(void)
 	   make sure we're don't switch CPUs between getting the vcpu
 	   pointer and updating the mask. */
 	preempt_disable();
-	x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
+	percpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
 	preempt_enable_no_resched();
 }
 
@@ -96,7 +96,7 @@ static void xen_irq_enable(void)
 	   the caller is confused and is trying to re-enable interrupts
 	   on an indeterminate processor. */
 
-	vcpu = x86_read_percpu(xen_vcpu);
+	vcpu = percpu_read(xen_vcpu);
 	vcpu->evtchn_upcall_mask = 0;
 
 	/* Doesn't matter if we get preempted here, because any
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 503c240e26c7..7bc7852cc5c4 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1074,7 +1074,7 @@ static void drop_other_mm_ref(void *info)
 
 	/* If this cpu still has a stale cr3 reference, then make sure
 	   it has been flushed. */
-	if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) {
+	if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) {
 		load_cr3(swapper_pg_dir);
 		arch_flush_lazy_cpu_mode();
 	}
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index 858938241616..e786fa7f2615 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -39,7 +39,7 @@ static inline void xen_mc_issue(unsigned mode)
 		xen_mc_flush();
 
 	/* restore flags saved in xen_mc_batch */
-	local_irq_restore(x86_read_percpu(xen_mc_irq_flags));
+	local_irq_restore(percpu_read(xen_mc_irq_flags));
 }
 
 /* Set up a callback to be called when the current batch is flushed */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 83fa4236477d..3bfd6dd0b47c 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -78,7 +78,7 @@ static __cpuinit void cpu_bringup(void)
 	xen_setup_cpu_clockevents();
 
 	cpu_set(cpu, cpu_online_map);
-	x86_write_percpu(cpu_state, CPU_ONLINE);
+	percpu_write(cpu_state, CPU_ONLINE);
 	wmb();
 
 	/* We can take interrupts now: we're officially "up". */
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index b0e63c672ebd..00f45ff081a6 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -80,4 +80,56 @@ extern void setup_per_cpu_areas(void);
 #define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \
 					__typeof__(type) per_cpu_var(name)
 
+/*
+ * Optional methods for optimized non-lvalue per-cpu variable access.
+ *
+ * @var can be a percpu variable or a field of it and its size should
+ * equal char, int or long.  percpu_read() evaluates to a lvalue and
+ * all others to void.
+ *
+ * These operations are guaranteed to be atomic w.r.t. preemption.
+ * The generic versions use plain get/put_cpu_var().  Archs are
+ * encouraged to implement single-instruction alternatives which don't
+ * require preemption protection.
+ */
+#ifndef percpu_read
+# define percpu_read(var)						\
+  ({									\
+	typeof(per_cpu_var(var)) __tmp_var__;				\
+	__tmp_var__ = get_cpu_var(var);					\
+	put_cpu_var(var);						\
+	__tmp_var__;							\
+  })
+#endif
+
+#define __percpu_generic_to_op(var, val, op)				\
+do {									\
+	get_cpu_var(var) op val;					\
+	put_cpu_var(var);						\
+} while (0)
+
+#ifndef percpu_write
+# define percpu_write(var, val)		__percpu_generic_to_op(var, (val), =)
+#endif
+
+#ifndef percpu_add
+# define percpu_add(var, val)		__percpu_generic_to_op(var, (val), +=)
+#endif
+
+#ifndef percpu_sub
+# define percpu_sub(var, val)		__percpu_generic_to_op(var, (val), -=)
+#endif
+
+#ifndef percpu_and
+# define percpu_and(var, val)		__percpu_generic_to_op(var, (val), &=)
+#endif
+
+#ifndef percpu_or
+# define percpu_or(var, val)		__percpu_generic_to_op(var, (val), |=)
+#endif
+
+#ifndef percpu_xor
+# define percpu_xor(var, val)		__percpu_generic_to_op(var, (val), ^=)
+#endif
+
 #endif /* _ASM_GENERIC_PERCPU_H_ */