diff --git a/Makefile b/Makefile
index 6b63edaf0c01a53b2443c6df1fad8cd520d0cae9..8fe23b78d41bbbc8824890abd61818b35682d48c 100644
--- a/Makefile
+++ b/Makefile
@@ -37,7 +37,7 @@ AS = $(TOOLPREFIX)gas
 LD = $(TOOLPREFIX)ld
 OBJCOPY = $(TOOLPREFIX)objcopy
 OBJDUMP = $(TOOLPREFIX)objdump
-CFLAGS = -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32
+CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32
 CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector)
 ASFLAGS = -m32
 # FreeBSD ld wants ``elf_i386_fbsd''
@@ -49,8 +49,8 @@ xv6.img: bootblock kernel fs.img
 	dd if=kernel of=xv6.img seek=1 conv=notrunc
 
 bootblock: bootasm.S bootmain.c
-	$(CC) $(CFLAGS) -O -nostdinc -I. -c bootmain.c
-	$(CC) $(CFLAGS) -nostdinc -I. -c bootasm.S
+	$(CC) $(CFLAGS) -fno-pic -O -nostdinc -I. -c bootmain.c
+	$(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c bootasm.S
 	$(LD) $(LDFLAGS) -N -e start -Ttext 0x7C00 -o bootblock.o bootasm.o bootmain.o
 	$(OBJDUMP) -S bootblock.o > bootblock.asm
 	$(OBJCOPY) -S -O binary -j .text bootblock.o bootblock
@@ -93,7 +93,7 @@ _forktest: forktest.o $(ULIB)
 	$(OBJDUMP) -S _forktest > forktest.asm
 
 mkfs: mkfs.c fs.h
-	gcc $(CFLAGS) -Wall -o mkfs mkfs.c
+	gcc -Wall -o mkfs mkfs.c
 
 UPROGS=\
 	_cat\
@@ -139,7 +139,7 @@ bochs : fs.img xv6.img
 	bochs -q
 
 qemu: fs.img xv6.img
-	qemu -parallel stdio -hdb fs.img xv6.img
+	qemu -parallel stdio -smp 2 -hdb fs.img xv6.img
 
 qemutty: fs.img xv6.img
 	qemu -nographic -smp 2 -hdb fs.img xv6.img
diff --git a/main.c b/main.c
index 57af27ccb15227974333ffd3e37f3668a4449ce3..60cd1b37b7ca66c7a83310985fd839a3667cb3e3 100644
--- a/main.c
+++ b/main.c
@@ -5,9 +5,6 @@
 #include "proc.h"
 #include "x86.h"
 
-__thread struct cpu *cpu;
-__thread struct proc *proc;
-
 static void bootothers(void);
 static void mpmain(void) __attribute__((noreturn));
 
@@ -22,6 +19,7 @@ main(void)
   ioapicinit();    // another interrupt controller
   consoleinit();   // I/O devices & their interrupts
   uartinit();      // serial port
+cprintf("cpus %p cpu %p\n", cpus, cpu);
   cprintf("\ncpu%d: starting xv6\n\n", cpu->id);
 
   kinit();         // physical memory allocator
diff --git a/proc.c b/proc.c
index e83e0f7ca3bb21e049965e78b7acff6d25e45b00..73cad6d97b9e1ad88bc302fb1ec0f82e9af84daf 100644
--- a/proc.c
+++ b/proc.c
@@ -70,12 +70,11 @@ ksegment(void)
   c = &cpus[cpunum()];
   c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0x100000 + 64*1024-1, 0);
   c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0);
-  c->gdt[SEG_KCPU] = SEG(STA_W, &c->tlsstruct, 0xffffffff, 0);
+  c->gdt[SEG_KCPU] = SEG(STA_W, &c->cpu, 8, 0);
   lgdt(c->gdt, sizeof(c->gdt));
   loadfsgs(SEG_KCPU << 3);
   
   // Initialize cpu-local storage.
-  c->tlsstruct = &c->tlsstruct;
   asm volatile("");  // Do not let gcc reorder across this line.
   cpu = c;
   proc = 0;
diff --git a/proc.h b/proc.h
index f20f67ec25cd70a764b6b0aa71e653b30de190e6..192f8ffd1575a82b8618c2699d1dc282aa0c780c 100644
--- a/proc.h
+++ b/proc.h
@@ -59,10 +59,9 @@ struct cpu {
   int ncli;                    // Depth of pushcli nesting.
   int intena;                  // Were interrupts enabled before pushcli?
   
-  // "Thread"-local storage variables
+  // Cpu-local storage variables; see below
   struct cpu *cpu;
   struct proc *proc;
-  void *tlsstruct;
 };
 
 extern struct cpu cpus[NCPU];
@@ -70,9 +69,11 @@ extern int ncpu;
 
 // Per-CPU variables, holding pointers to the
 // current cpu and to the current process.
-// The __thread prefix tells gcc to refer to them in the segment
-// pointed at by gs; the name __thread derives from the use
-// of the same mechanism to provide per-thread storage in
-// multithreaded user programs.
-extern __thread struct cpu *cpu;       // This cpu.
-extern __thread struct proc *proc;     // Current process on this cpu.
+// The asm suffix tells gcc to use "%gs:0" to refer to cpu
+// and "%gs:4" to refer to proc.  ksegment sets up the
+// %gs segment register so that %gs refers to the memory
+// holding those two variables in the local cpu's struct cpu.
+// This is similar to how thread-local variables are implemented
+// in thread libraries such as Linux pthreads.
+extern struct cpu *cpu asm("%gs:0");       // This cpu.
+extern struct proc *proc asm("%gs:4");     // Current proc on this cpu.