diff --git a/console.c b/console.c
index 86c080e1d2b5183f6e6ee8838d532c6b5cc2550b..5dad05aed670ca9cba6f619a993ad8297377add3 100644
--- a/console.c
+++ b/console.c
@@ -58,7 +58,7 @@ real_cons_putc(int c)
 
   if((ind / 80) >= 24){
     // scroll up
-    memcpy(crt, crt + 80, sizeof(crt[0]) * (23 * 80));
+    memmove(crt, crt + 80, sizeof(crt[0]) * (23 * 80));
     ind -= 80;
     memset(crt + ind, 0, sizeof(crt[0]) * ((24 * 80) - ind));
   }
diff --git a/defs.h b/defs.h
index 824b986358e411420d3c51ba5ba81d2356bd53d6..a11cc5757f84cf519d91dcfde909952a3a0abf1f 100644
--- a/defs.h
+++ b/defs.h
@@ -12,7 +12,7 @@ void cons_putc(int);
 struct proc;
 struct jmpbuf;
 void setupsegs(struct proc *);
-struct proc * newproc(void);
+struct proc * copyproc(struct proc*);
 struct spinlock;
 void sleep(void *, struct spinlock *);
 void wakeup(void *);
@@ -32,7 +32,6 @@ void tvinit(void);
 void idtinit(void);
 
 // string.c
-void * memcpy(void *dst, void *src, unsigned n);
 void * memset(void *dst, int c, unsigned n);
 int memcmp(const void *v1, const void *v2, unsigned n);
 void *memmove(void *dst, const void *src, unsigned n);
@@ -92,3 +91,4 @@ void ide_init(void);
 void ide_intr(void);
 void* ide_start_read(uint32_t secno, void *dst, unsigned nsecs);
 int ide_finish_read(void *);
+
diff --git a/main.c b/main.c
index 70e93cf47a9eff5bb465540139a5b09553bb851d..5f2d698a740911927878f34714486a886c7e6981 100644
--- a/main.c
+++ b/main.c
@@ -81,7 +81,7 @@ main()
   // become interruptable
   sti();
 
-  p = newproc();
+  p = copyproc(&proc[0]);
   
   load_icode(p, _binary_usertests_start, (unsigned) _binary_usertests_size);
   //load_icode(p, _binary_userfs_start, (unsigned) _binary_userfs_size);
@@ -122,7 +122,7 @@ load_icode(struct proc *p, uint8_t *binary, unsigned size)
       panic("load_icode: icode wants to be above UTOP");
 
     // Load/clear the segment
-    memcpy(p->mem + ph->p_va, binary + ph->p_offset, ph->p_filesz);
+    memmove(p->mem + ph->p_va, binary + ph->p_offset, ph->p_filesz);
     memset(p->mem + ph->p_va + ph->p_filesz, 0, ph->p_memsz - ph->p_filesz);
   }
 }
diff --git a/proc.c b/proc.c
index 01d8f2f59598f8f9e6f980e68168cdbca3eab9c9..c854576d2512bc1352a5a826f66ec18a962ca6f9 100644
--- a/proc.c
+++ b/proc.c
@@ -13,6 +13,7 @@ struct proc proc[NPROC];
 struct proc *curproc[NCPU];
 int next_pid = 1;
 extern void forkret(void);
+extern void forkret1(struct Trapframe*);
 
 /*
  * set up a process's task state and segment descriptors
@@ -42,84 +43,87 @@ setupsegs(struct proc *p)
   p->gdt_pd.pd_base = (unsigned) p->gdt;
 }
 
-extern void trapret();
+// Look in the process table for an UNUSED proc.
+// If found, change state to EMBRYO and return it.
+// Otherwise return 0.
+struct proc*
+allocproc(void)
+{
+  int i;
+  struct proc *p;
+  
+  for(i = 0; i < NPROC; i++){
+    p = &proc[i];
+    if(p->state == UNUSED){
+      p->state = EMBRYO;
+      return p;
+    }
+  }
+  return 0;
+}
 
-/*
- * internal fork(). does not copy kernel stack; instead,
- * sets up the stack to return as if from system call.
- * caller must set state to RUNNABLE.
- */
+// Create a new process copying p as the parent.
+// Does not copy the kernel stack.  
+// Instead, sets up stack to return as if from system call.
+// Caller must arrange for process to run (set state to RUNNABLE).
 struct proc *
-newproc()
+copyproc(struct proc* p)
 {
+  int i;
   struct proc *np;
-  struct proc *op;
-  int fd;
 
+  // Allocate process.
   acquire(&proc_table_lock);
-
-  for(np = &proc[1]; np < &proc[NPROC]; np++){
-    if(np->state == UNUSED){
-      np->state = EMBRYO;
-      break;
-    }
-  }
-  if(np >= &proc[NPROC]){
+  if((np = allocproc()) == 0){
     release(&proc_table_lock);
     return 0;
   }
-
-  // copy from proc[0] if we're bootstrapping
-  op = curproc[cpu()];
-  if(op == 0)
-    op = &proc[0];
-
   np->pid = next_pid++;
-  np->ppid = op->pid;
-
+  np->ppid = p->pid;
   release(&proc_table_lock);
 
-  np->sz = op->sz;
-  np->mem = kalloc(op->sz);
-  if(np->mem == 0)
+  // Copy process image memory.
+  np->sz = p->sz;
+  np->mem = kalloc(np->sz);
+  if(np->mem == 0){
+    np->state = UNUSED;
     return 0;
-  memcpy(np->mem, op->mem, np->sz);
+  }
+  memmove(np->mem, p->mem, np->sz);
+
+  // Allocate kernel stack.
   np->kstack = kalloc(KSTACKSIZE);
   if(np->kstack == 0){
-    kfree(np->mem, op->sz);
+    kfree(np->mem, np->sz);
     np->state = UNUSED;
     return 0;
   }
+  
+  // Initialize segment table.
   setupsegs(np);
+
+  // Copy trapframe registers from parent.
+  np->tf = (struct Trapframe*)(np->kstack + KSTACKSIZE) - 1;
+  *np->tf = *p->tf;
   
-  // set up kernel stack to return to user space
-  np->tf = (struct Trapframe *) (np->kstack + KSTACKSIZE - sizeof(struct Trapframe));
-  *(np->tf) = *(op->tf);
-  np->tf->tf_regs.reg_eax = 0; // so fork() returns 0 in child
-
-  // Set up new jmpbuf to start executing forkret (see trapasm.S)
-  // with esp pointing at tf.  Forkret will call forkret1 (below) to release
-  // the proc_table_lock and then jump into the usual trap return code.
+  // Clear %eax so that fork system call returns 0 in child.
+  np->tf->tf_regs.reg_eax = 0;
+
+  // Set up new jmpbuf to start executing at forkret (see below).
   memset(&np->jmpbuf, 0, sizeof np->jmpbuf);
-  np->jmpbuf.jb_eip = (unsigned) forkret;
-  np->jmpbuf.jb_esp = (unsigned) np->tf - 4;  // -4 for the %eip that isn't actually there
+  np->jmpbuf.jb_eip = (unsigned)forkret;
+  np->jmpbuf.jb_esp = (unsigned)np->tf;
 
   // Copy file descriptors
-  for(fd = 0; fd < NOFILE; fd++){
-    np->fds[fd] = op->fds[fd];
-    if(np->fds[fd])
-      fd_reference(np->fds[fd]);
+  for(i = 0; i < NOFILE; i++){
+    np->fds[i] = p->fds[i];
+    if(np->fds[i])
+      fd_reference(np->fds[i]);
   }
 
   return np;
 }
 
-void
-forkret1(void)
-{
-  release(&proc_table_lock);
-}
-
 // Per-CPU process scheduler. 
 // Each CPU calls scheduler() after setting itself up.
 // Scheduler never returns.  It loops, doing:
@@ -199,7 +203,7 @@ sched(void)
 
 // Give up the CPU for one scheduling round.
 void
-yield()
+yield(void)
 {
   struct proc *p;
 
@@ -211,6 +215,18 @@ yield()
   release(&proc_table_lock);
 }
 
+// A process's very first scheduling by scheduler()
+// will longjmp here to do the first jump into user space.
+void
+forkret(void)
+{
+  // Still holding proc_table_lock from scheduler.
+  release(&proc_table_lock);
+  
+  // Jump into assembly, never to return.
+  forkret1(curproc[cpu()]->tf);
+}
+
 // Atomically release lock and sleep on chan.
 // Reacquires lock when reawakened.
 void
diff --git a/string.c b/string.c
index c88e7de252010edfe441b02d9f3e8b1894cccc12..07082e5abad77c4d71328e4b757c7a41295e32bf 100644
--- a/string.c
+++ b/string.c
@@ -1,18 +1,6 @@
 #include "types.h"
 #include "defs.h"
 
-void *
-memcpy(void *dst, void *src, unsigned n)
-{
-  char *d = (char *) dst;
-  char *s = (char *) src;
-
-  while(n-- > 0)
-    *d++ = *s++;
-
-  return dst;
-}
-
 void *
 memset(void *dst, int c, unsigned n)
 {
@@ -69,3 +57,21 @@ strncmp(const char *p, const char *q, unsigned n)
 	else
 		return (int) ((unsigned char) *p - (unsigned char) *q);
 }
+
+// Memcpy is deprecated and should NOT be called.
+// Use memmove instead, which has defined semantics
+// when the two memory ranges overlap.
+// Memcpy is here only because gcc compiles some
+// structure assignments into calls to memcpy.
+void *
+memcpy(void *dst, void *src, unsigned n)
+{
+  char *d = (char *) dst;
+  char *s = (char *) src;
+
+  while(n-- > 0)
+    *d++ = *s++;
+
+  return dst;
+}
+
diff --git a/syscall.c b/syscall.c
index 420a578141ab143b3e415c374dc4f01b755cc173..6ac739f88bef6f18725076c58ea467e853f173d3 100644
--- a/syscall.c
+++ b/syscall.c
@@ -30,7 +30,7 @@ fetchint(struct proc *p, unsigned addr, int *ip)
 
   if(addr > p->sz - 4)
     return -1;
-  memcpy(ip, p->mem + addr, 4);
+  memmove(ip, p->mem + addr, 4);
   return 0;
 }
 
@@ -49,7 +49,7 @@ putint(struct proc *p, unsigned addr, int ip)
 {
   if(addr > p->sz - 4)
     return -1;
-  memcpy(p->mem + addr, &ip, 4);
+  memmove(p->mem + addr, &ip, 4);
   return 0;
 }
 
@@ -150,13 +150,10 @@ sys_fork(void)
 {
   struct proc *np;
 
-  np = newproc();
-  if(np){
-    np->state = RUNNABLE;
-    return np->pid;
-  } else {
+  if((np = copyproc(curproc[cpu()])) == 0)
     return -1;
-  }
+  np->state = RUNNABLE;
+  return np->pid;
 }
 
 int
diff --git a/trapasm.S b/trapasm.S
index e0e27be23ca908f2fc30279793b14f6689284843..3e0f37562a269a7bb382e17192a4307d07bed7f8 100644
--- a/trapasm.S
+++ b/trapasm.S
@@ -30,9 +30,9 @@ trapret:
         addl $0x8, %esp /* trapno and errcode */
         iret
 
-.globl forkret
-forkret:
-	call forkret1
+.globl forkret1
+forkret1:
+	movl 4(%esp), %esp
 	jmp trapret
 		
 .globl 	acpu