diff --git a/main.c b/main.c
index 275aa80e0986abb1586d60c86240d87603568842..2108d9570867bc3500a0bf3a9ed71c51da5d70e3 100644
--- a/main.c
+++ b/main.c
@@ -50,7 +50,6 @@ mpmain(void)
   if(cpu() != mp_bcpu())
     lapic_init(cpu());
   setupsegs(0);
-  cpuid(0, 0, 0, 0, 0);  // memory barrier
   cpus[cpu()].booted = 1;
 
   scheduler();
diff --git a/spinlock.c b/spinlock.c
index a1aa37d4cdef95cf52929486bae66f554259b2f8..c00c9787bad852f8e1215f978706e29ad80f2b31 100644
--- a/spinlock.c
+++ b/spinlock.c
@@ -10,6 +10,12 @@
 
 extern int use_console_lock;
 
+// Barrier to gcc's instruction reordering.
+static void inline gccbarrier(void)
+{
+  asm volatile("" : : : "memory");
+}
+
 void
 initlock(struct spinlock *lock, char *name)
 {
@@ -32,10 +38,6 @@ acquire(struct spinlock *lock)
   while(cmpxchg(0, 1, &lock->locked) == 1)
     ;
 
-  // Serialize instructions: now that lock is acquired, make sure 
-  // we wait for all pending writes from other processors.
-  cpuid(0, 0, 0, 0, 0);  // memory barrier (see Ch 7, IA-32 manual vol 3)
-  
   // Record info about lock acquisition for debugging.
   // The +10 is only so that we can tell the difference
   // between forgetting to initialize lock->cpu
@@ -53,12 +55,10 @@ release(struct spinlock *lock)
 
   lock->pcs[0] = 0;
   lock->cpu = 0xffffffff;
-  
-  // Serialize instructions: before unlocking the lock, make sure
-  // to flush any pending memory writes from this processor.
-  cpuid(0, 0, 0, 0, 0);  // memory barrier (see Ch 7, IA-32 manual vol 3)
 
+  gccbarrier();  // Keep gcc from moving lock->locked = 0 earlier.
   lock->locked = 0;
+
   popcli();
 }
 
diff --git a/x86.h b/x86.h
index a24214d89cfbdad4bba8b2e2f7f55002dec8fe1e..a1c66b5ee42894a948ac6ff4f51e94bbd32091f2 100644
--- a/x86.h
+++ b/x86.h
@@ -96,6 +96,7 @@ write_eflags(uint eflags)
   asm volatile("pushl %0; popfl" : : "r" (eflags));
 }
 
+// XXX: Kill this if not used.
 static inline void
 cpuid(uint info, uint *eaxp, uint *ebxp, uint *ecxp, uint *edxp)
 {