From f32f3638f4c34fbf2fc4398878e6304612bb3283 Mon Sep 17 00:00:00 2001
From: rsc <rsc>
Date: Tue, 21 Aug 2007 19:22:08 +0000
Subject: [PATCH] Various cleanup:

 - Got rid of dummy proc[0].  Now proc[0] is init.
 - Added initcode.S to exec /init, so that /init is
   just a regular binary.
 - Moved exec out of sysfile to exec.c
 - Moved code dealing with fs guts (like struct inode)
   from sysfile.c to fs.c.  Code dealing with system call
   arguments stays in sysfile.c
 - Refactored directory routines in fs.c; should be simpler.
 - Changed iget to return *unlocked* inode structure.
   This solves the lookup-then-use race in namei
   without introducing deadlocks.
   It also enabled getting rid of the dummy proc[0].
---
 BUGS       |  24 +++--
 Makefile   |  13 ++-
 defs.h     |   8 +-
 exec.c     | 136 ++++++++++++++++++++++++++++
 fs.c       | 253 +++++++++++++++++++++++++++++++----------------------
 fsvar.h    |   7 +-
 initcode.S |  28 ++++++
 main.c     | 120 +++++++++----------------
 proc.c     |  50 +++++------
 string.c   |  10 +++
 syscall.c  |   1 -
 sysfile.c  | 219 +++++++---------------------------------------
 12 files changed, 455 insertions(+), 414 deletions(-)
 create mode 100644 exec.c
 create mode 100644 initcode.S

diff --git a/BUGS b/BUGS
index ef4213f..1d2bd37 100644
--- a/BUGS
+++ b/BUGS
@@ -11,14 +11,15 @@ proc.c:
 
 	factor out switching and scheduling code from process code
 
-kalloc.c
-	more cleanups
+	shuffle for formatting
 
-ide.c: synchronous disk write -> polling disk write.  search for
-       (a)synchronous; xv6 doesn't have asynchronous writes.
+syscall.c:
+	cannot convince runoff1 to split the extern lists to fill previous page completely.
 
-fs.c: split all name operations off in name.c?  (starting with namei but move 
+fs.c: split all name operations off in name.c?  (starting with namei but
       wdir keep in fs.c)
+	locking?
+	shuffle for formatting
 
 pipe.c:
 	more comments?
@@ -31,6 +32,19 @@ sysfile.c:
 general:
 	sizeof parens?
 
+bio.c:
+	decide odd or even
+	bwrite doesn't need a second argument
+
+file.c:
+	move fileincref onto page 1?
+
 L=$HOME/mit/l
 (for i in *.c; do xoc -x xgnu -x ./nodecleq.zeta --typesonly $i; done) 2>&1 | grep warning
 
+saw random sharedfd failure.
+
+why does fdalloc consume reference?
+
+why mkdir and create?
+
diff --git a/Makefile b/Makefile
index 2606f4c..ac696dd 100644
--- a/Makefile
+++ b/Makefile
@@ -21,6 +21,7 @@ OBJS = \
 	vectors.o\
 	bio.o\
 	fs.o\
+	exec.o\
 	8253pit.o\
 
 # Cross-compiling (e.g., on Mac OS X)
@@ -34,7 +35,7 @@ LD = $(TOOLPREFIX)ld
 OBJCOPY = $(TOOLPREFIX)objcopy
 OBJDUMP = $(TOOLPREFIX)objdump
 # On newer gcc you may need to add -fno-stack-protector to $(CFLAGS)
-CFLAGS = -fno-builtin -O2 -Wall -MD
+CFLAGS = -fno-builtin -O2 -Wall -MD -ggdb -fno-stack-protector
 AS = $(TOOLPREFIX)gas
 
 xv6.img : bootblock kernel fs.img
@@ -50,12 +51,16 @@ bootblock : bootasm.S bootmain.c
 	$(OBJCOPY) -S -O binary bootblock.o bootblock
 	./sign.pl bootblock
 
-kernel : $(OBJS) bootother.S _init
+kernel : $(OBJS) bootother.S initcode.S
 	$(CC) -nostdinc -I. -c bootother.S
 	$(LD) -N -e start -Ttext 0x7000 -o bootother.out bootother.o
 	$(OBJCOPY) -S -O binary bootother.out bootother
 	$(OBJDUMP) -S bootother.o > bootother.asm
-	$(LD) -Ttext 0x100000 -e main0 -o kernel $(OBJS) -b binary bootother _init
+	$(CC) -nostdinc -I. -c initcode.S
+	$(LD) -N -e start -Ttext 0 -o initcode.out initcode.o
+	$(OBJCOPY) -S -O binary initcode.out initcode
+	$(OBJDUMP) -S initcode.o > initcode.asm
+	$(LD) -Ttext 0x100000 -e main0 -o kernel $(OBJS) -b binary initcode bootother
 	$(OBJDUMP) -S kernel > kernel.asm
 	$(OBJDUMP) -t kernel | awk '/SYMBOL TABLE/ { go=1; next } go {print $$1, $$NF}' >kernel.sym
 
@@ -132,7 +137,7 @@ PRINT =	\
 	proc.h proc.c setjmp.S kalloc.c\
 	syscall.h trapasm.S traps.h trap.c vectors.pl syscall.c sysproc.c\
 	buf.h dev.h fcntl.h stat.h file.h fs.h fsvar.h file.c fs.c bio.c ide.c sysfile.c\
-	pipe.c\
+	pipe.c exec.c\
 	mp.h ioapic.h mp.c lapic.c ioapic.c picirq.c\
 	console.c\
 	string.c\
diff --git a/defs.h b/defs.h
index f2f8d73..24fd52b 100644
--- a/defs.h
+++ b/defs.h
@@ -40,6 +40,7 @@ int memcmp(const void*, const void*, uint);
 void* memmove(void*, const void*, uint);
 int strncmp(const char*, const char*, uint);
 char* safestrcpy(char*, const char*, int);
+int strlen(const char*);
 
 // syscall.c
 void syscall(void);
@@ -135,11 +136,16 @@ int readi(struct inode*, char*, uint, uint);
 int writei(struct inode*, char*, uint, uint);
 struct inode* mknod(char*, short, short, short);
 struct inode* dircreat(struct inode*, char*, int, short, short, short);
-int dirlookup(struct inode*, char*, int, uint*, uint*);
+struct inode* dirlookup(struct inode*, char*, int, uint*);
 int unlink(char*);
 void iupdate(struct inode*);
 int link(char*, char*);
 struct inode* igetroot(void);
+int mkdir(char *path);
+struct inode* create(char *path);
+
+// exec.c
+int exec(char*, char**);
 
 // number of elements in fixed-size array
 #define NELEM(x) (sizeof(x)/sizeof((x)[0]))
diff --git a/exec.c b/exec.c
new file mode 100644
index 0000000..1f8b1af
--- /dev/null
+++ b/exec.c
@@ -0,0 +1,136 @@
+#include "types.h"
+#include "stat.h"
+#include "param.h"
+#include "mmu.h"
+#include "proc.h"
+#include "defs.h"
+#include "x86.h"
+#include "traps.h"
+#include "syscall.h"
+#include "spinlock.h"
+#include "buf.h"
+#include "fs.h"
+#include "fsvar.h"
+#include "elf.h"
+#include "file.h"
+#include "fcntl.h"
+
+int
+exec(char *path, char **argv)
+{
+  uint sz, sp, p1, p2;
+  int i, nargs, argbytes, len;
+  struct inode *ip;
+  struct elfhdr elf;
+  struct proghdr ph;
+  char *mem;
+  char *s, *last;
+
+  sz = 0;
+  mem = 0;
+
+  if((ip = namei(path)) == 0)
+    return -1;
+
+  if(readi(ip, (char*)&elf, 0, sizeof(elf)) < sizeof(elf))
+    goto bad;
+
+  if(elf.magic != ELF_MAGIC)
+    goto bad;
+
+  for(i = 0; i < elf.phnum; i++){
+    if(readi(ip, (char*)&ph, elf.phoff + i * sizeof(ph),
+             sizeof(ph)) != sizeof(ph))
+      goto bad;
+    if(ph.type != ELF_PROG_LOAD)
+      continue;
+    if(ph.memsz < ph.filesz)
+      goto bad;
+    sz += ph.memsz;
+  }
+
+  sz += 4096 - (sz % 4096);
+  sz += 4096;
+
+  mem = kalloc(sz);
+  if(mem == 0)
+    goto bad;
+  memset(mem, 0, sz);
+
+  argbytes = 0;
+  for(i = 0; argv[i]; i++){
+    len = strlen(argv[i]);
+    argbytes += len + 1;
+  }
+  nargs = i;
+
+  // argn\0
+  // ...
+  // arg0\0
+  // 0
+  // ptr to argn
+  // ...
+  // 12: ptr to arg0
+  //  8: argv (points to ptr to arg0)
+  //  4: argc
+  //  0: fake return pc
+  sp = sz - argbytes - (nargs+1)*4 - 4 - 4 - 4;
+  *(uint*)(mem + sp) = 0xffffffff;
+  *(uint*)(mem + sp + 4) = nargs;
+  *(uint*)(mem + sp + 8) = (uint)(sp + 12);
+
+  p1 = sp + 12;
+  p2 = sp + 12 + (nargs + 1) * 4;
+  for(i = 0; i < nargs; i++){
+    len = strlen(argv[i]);
+    memmove(mem + p2, argv[i], len + 1);
+    *(uint*)(mem + p1) = p2;
+    p1 += 4;
+    p2 += len + 1;
+  }
+  *(uint*)(mem + p1) = 0;
+
+  // Save name for debugging.
+  for(last=s=path; *s; s++)
+    if(*s == '/')
+      last = s+1;
+  safestrcpy(cp->name, last, sizeof cp->name);
+
+  // commit to the new image.
+  kfree(cp->mem, cp->sz);
+  cp->sz = sz;
+  cp->mem = mem;
+  mem = 0;
+
+  for(i = 0; i < elf.phnum; i++){
+    if(readi(ip, (char*)&ph, elf.phoff + i * sizeof(ph),
+             sizeof(ph)) != sizeof(ph))
+      goto bad2;
+    if(ph.type != ELF_PROG_LOAD)
+      continue;
+    if(ph.va + ph.memsz > sz)
+      goto bad2;
+    if(readi(ip, cp->mem + ph.va, ph.offset, ph.filesz) != ph.filesz)
+      goto bad2;
+    memset(cp->mem + ph.va + ph.filesz, 0, ph.memsz - ph.filesz);
+  }
+
+  iput(ip);
+  
+  cp->tf->eip = elf.entry;
+  cp->tf->esp = sp;
+  setupsegs(cp);
+
+  return 0;
+
+ bad:
+  if(mem)
+    kfree(mem, sz);
+  iput(ip);
+  return -1;
+
+ bad2:
+  iput(ip);
+  proc_exit();
+  return 0;
+}
diff --git a/fs.c b/fs.c
index 02ca8e9..4731b5f 100644
--- a/fs.c
+++ b/fs.c
@@ -25,8 +25,6 @@
 
 #define min(a, b) ((a) < (b) ? (a) : (b))
 
-static void ifree(struct inode*);
-
 // Blocks. 
 
 // Allocate a disk block.
@@ -116,30 +114,25 @@ iinit(void)
 }
 
 // Find the inode with number inum on device dev
-// and return an in-memory copy.  Loads the inode
-// from disk into the in-core table if necessary.
-// The returned inode is locked and has its ref count incremented.
-// Caller must iput the return value when done with it.
+// and return the in-memory copy.  The returned inode
+// has its reference count incremented (and thus must be
+// idecref'ed), but is *unlocked*, meaning that none of the fields
+// except dev and inum are guaranteed to be initialized.
+// This convention gives the caller maximum control over blocking;
+// it also guarantees that iget will not sleep, which is useful in 
+// the early igetroot and when holding other locked inodes.
 struct inode*
 iget(uint dev, uint inum)
 {
   struct inode *ip, *empty;
-  struct dinode *dip;
-  struct buf *bp;
 
   acquire(&icache.lock);
 
- loop:
   // Try for cached inode.
   empty = 0;
   for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
     if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
-      if(ip->busy){
-        sleep(ip, &icache.lock);
-        goto loop;
-      }
       ip->ref++;
-      ip->busy = 1;
       release(&icache.lock);
       return ip;
     }
@@ -155,52 +148,61 @@ iget(uint dev, uint inum)
   ip->dev = dev;
   ip->inum = inum;
   ip->ref = 1;
-  ip->busy = 1;
+  ip->flags = 0;
   release(&icache.lock);
 
-  bp = bread(dev, IBLOCK(inum));
-  dip = &((struct dinode*)(bp->data))[inum % IPB];
-  ip->type = dip->type;
-  ip->major = dip->major;
-  ip->minor = dip->minor;
-  ip->nlink = dip->nlink;
-  ip->size = dip->size;
-  memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
-  brelse(bp);
-
   return ip;
 }
 
 // Iget the inode for the file system root (/).
+// This gets called before there is a current process: it cannot sleep!
 struct inode*
 igetroot(void)
 {
-  return iget(ROOTDEV, 1);
+  struct inode *ip;
+  ip = iget(ROOTDEV, 1);
+  return ip;
 }
 
 // Lock the given inode.
 void
 ilock(struct inode *ip)
 {
+  struct buf *bp;
+  struct dinode *dip;
+
   if(ip->ref < 1)
     panic("ilock");
 
   acquire(&icache.lock);
-  while(ip->busy)
+  while(ip->flags & I_BUSY)
     sleep(ip, &icache.lock);
-  ip->busy = 1;
+  ip->flags |= I_BUSY;
   release(&icache.lock);
+
+  if(!(ip->flags & I_VALID)){
+    bp = bread(ip->dev, IBLOCK(ip->inum));
+    dip = &((struct dinode*)(bp->data))[ip->inum % IPB];
+    ip->type = dip->type;
+    ip->major = dip->major;
+    ip->minor = dip->minor;
+    ip->nlink = dip->nlink;
+    ip->size = dip->size;
+    memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
+    brelse(bp);
+    ip->flags |= I_VALID;
+  }
 }
 
 // Unlock the given inode.
 void
 iunlock(struct inode *ip)
 {
-  if(ip->busy != 1 || ip->ref < 1)
+  if(!(ip->flags & I_BUSY) || ip->ref < 1)
     panic("iunlock");
 
   acquire(&icache.lock);
-  ip->busy = 0;
+  ip->flags &= ~I_BUSY;
   wakeup(ip);
   release(&icache.lock);
 }
@@ -209,19 +211,8 @@ iunlock(struct inode *ip)
 void
 iput(struct inode *ip)
 {
-  if(ip->ref < 1 || ip->busy != 1)
-    panic("iput");
-
-  if((ip->ref == 1) && (ip->nlink == 0)) {
-    itrunc(ip);
-    ifree(ip);
-  }
-
-  acquire(&icache.lock);
-  ip->ref -= 1;
-  ip->busy = 0;
-  wakeup(ip);
-  release(&icache.lock);
+  iunlock(ip);
+  idecref(ip);
 }
 
 // Increment reference count for ip.
@@ -229,31 +220,42 @@ iput(struct inode *ip)
 struct inode*
 iincref(struct inode *ip)
 {
-  ilock(ip);
+  acquire(&icache.lock);
   ip->ref++;
-  iunlock(ip);
+  release(&icache.lock);
   return ip;
 }
 
-// Caller holds reference to unlocked ip.
-// Drop reference.
+// Caller holds reference to unlocked ip.  Drop reference.
 void
 idecref(struct inode *ip)
 {
-  ilock(ip);
-  iput(ip);
+  acquire(&icache.lock);
+  if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0) {
+    // inode is no longer used: truncate and free inode.
+    if(ip->flags & I_BUSY)
+      panic("idecref busy");
+    ip->flags |= I_BUSY;
+    release(&icache.lock);
+    // XXX convince rsc that no one will come find this inode.
+    itrunc(ip);
+    ip->type = 0;
+    iupdate(ip);
+    acquire(&icache.lock);
+    ip->flags &= ~I_BUSY;
+  }
+  ip->ref--;
+  release(&icache.lock);
 }
 
 // Allocate a new inode with the given type on device dev.
 struct inode*
 ialloc(uint dev, short type)
 {
-  struct inode *ip;
+  int inum, ninodes;
+  struct buf *bp;
   struct dinode *dip;
   struct superblock *sb;
-  int ninodes;
-  int inum;
-  struct buf *bp;
 
   bp = bread(dev, 1);
   sb = (struct superblock*)bp->data;
@@ -268,8 +270,7 @@ ialloc(uint dev, short type)
       dip->type = type;
       bwrite(bp, IBLOCK(inum));   // mark it allocated on the disk
       brelse(bp);
-      ip = iget(dev, inum);
-      return ip;
+      return iget(dev, inum);
     }
     brelse(bp);
   }
@@ -295,15 +296,6 @@ iupdate(struct inode *ip)
   brelse(bp);
 }
 
-// Free (delete) the given inode.
-// Caller must have ip locked.
-static void
-ifree(struct inode *ip)
-{
-  ip->type = 0;
-  iupdate(ip);
-}
-
 // Inode contents
 //
 // The contents (data) associated with each inode is stored
@@ -465,15 +457,15 @@ writei(struct inode *ip, char *src, uint off, uint n)
 //   set *poff to the byte offset of the directory entry
 //   set *pinum to the inode number
 //   return 0.
-int
-dirlookup(struct inode *dp, char *name, int namelen, uint *poff, uint *pinum)
+struct inode*
+dirlookup(struct inode *dp, char *name, int namelen, uint *poff)
 {
-  uint off;
+  uint off, inum;
   struct buf *bp;
   struct dirent *de;
 
   if(dp->type != T_DIR)
-    return -1;
+    return 0;
 
   for(off = 0; off < dp->size; off += BSIZE){
     bp = bread(dp->dev, bmap(dp, off / BSIZE, 0));
@@ -487,24 +479,30 @@ dirlookup(struct inode *dp, char *name, int namelen, uint *poff, uint *pinum)
         // entry matches path element
         if(poff)
           *poff = off + (uchar*)de - bp->data;
-        if(pinum)
-          *pinum = de->inum;
+        inum = de->inum;
         brelse(bp);
-        return 0;
+        return iget(dp->dev, inum);
       }
     }
     brelse(bp);
   }
-  return -1;
+  return 0;
 }
 
 // Write a new directory entry (name, ino) into the directory dp.
 // Caller must have locked dp.
-void
-dirwrite(struct inode *dp, char *name, int namelen, uint ino)
+int
+dirlink(struct inode *dp, char *name, int namelen, uint ino)
 {
   int off;
   struct dirent de;
+  struct inode *ip;
+
+  // Double-check that name is not present.
+  if((ip = dirlookup(dp, name, namelen, 0)) != 0){
+    idecref(ip);
+    return -1;
+  }
 
   // Look for an empty dirent.
   for(off = 0; off < dp->size; off += sizeof(de)){
@@ -519,9 +517,10 @@ dirwrite(struct inode *dp, char *name, int namelen, uint ino)
     namelen = DIRSIZ;
   memmove(de.name, name, namelen);
   memset(de.name+namelen, 0, DIRSIZ-namelen);
-
   if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
     panic("dirwrite");
+  
+  return 0;
 }
 
 // Create a new inode named name inside dp
@@ -535,13 +534,19 @@ dircreat(struct inode *dp, char *name, int namelen, short type, short major, sho
   ip = ialloc(dp->dev, type);
   if(ip == 0)
     return 0;
+  ilock(ip);
   ip->major = major;
   ip->minor = minor;
   ip->size = 0;
   ip->nlink = 1;
   iupdate(ip);
-
-  dirwrite(dp, name, namelen, ip->inum);
+  
+  if(dirlink(dp, name, namelen, ip->inum) < 0){
+    ip->nlink = 0;
+    iupdate(ip);
+    iput(ip);
+    return 0;
+  }
 
   return ip;
 }
@@ -590,17 +595,16 @@ skipelem(char *path, char **name, int *len)
 struct inode*
 _namei(char *path, int parent, char **pname, int *pnamelen)
 {
-  struct inode *dp;
+  struct inode *dp, *ip;
   char *name;
   int namelen;
-  uint off, dev, inum;
+  uint off;
 
   if(*path == '/')
     dp = igetroot();
-  else {
+  else
     dp = iincref(cp->cwd);
-    ilock(dp);
-  }
+  ilock(dp);
 
   while((path = skipelem(path, &name, &namelen)) != 0){
     // Truncate names in path to DIRSIZ chars.
@@ -617,12 +621,12 @@ _namei(char *path, int parent, char **pname, int *pnamelen)
       return dp;
     }
 
-    if(dirlookup(dp, name, namelen, &off, &inum) < 0)
+    if((ip = dirlookup(dp, name, namelen, &off)) == 0)
       goto fail;
 
-    dev = dp->dev;
     iput(dp);
-    dp = iget(dev, inum);
+    ilock(ip);
+    dp = ip;
     if(dp->type == 0 || dp->nlink < 1)
       panic("namei");
   }
@@ -660,10 +664,6 @@ mknod(char *path, short type, short major, short minor)
 
   if((dp = nameiparent(path, &name, &namelen)) == 0)
     return 0;
-  if(dirlookup(dp, name, namelen, 0, 0) >= 0){
-    iput(dp);
-    return 0;
-  }
   ip = dircreat(dp, name, namelen, type, major, minor);
   iput(dp);
   return ip;
@@ -675,13 +675,13 @@ unlink(char *path)
 {
   struct inode *ip, *dp;
   struct dirent de;
-  uint off, inum, dev;
+  uint off;
   char *name;
   int namelen;
 
   if((dp = nameiparent(path, &name, &namelen)) == 0)
     return -1;
-  if(dirlookup(dp, name, namelen, &off, 0) < 0){
+  if((ip = dirlookup(dp, name, namelen, &off)) == 0){
     iput(dp);
     return -1;
   }
@@ -691,20 +691,17 @@ unlink(char *path)
   
   // Cannot remove "." or ".." - the 2 and 3 count the trailing NUL.
   if(memcmp(de.name, ".", 2) == 0 || memcmp(de.name, "..", 3) == 0){
+    idecref(ip);
     iput(dp);
     return -1;
   }
 
-  inum = de.inum;
-
   memset(&de, 0, sizeof(de));
   if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
     panic("unlink dir write");
-
-  dev = dp->dev;
   iput(dp);
 
-  ip = iget(dev, inum);
+  ilock(ip);
   if(ip->nlink < 1)
     panic("unlink nlink < 1");
   ip->nlink--;
@@ -729,30 +726,76 @@ link(char *old, char *new)
     return -1;
   }
   iunlock(ip);
-  
+
   if((dp = nameiparent(new, &name, &namelen)) == 0){
     idecref(ip);
     return -1;
   }
-  if(dirlookup(dp, name, namelen, 0, 0) >= 0){
-    iput(dp);
-    idecref(ip);
-    return -1;
-  }
-  if(dp->dev != ip->dev){
+  if(dp->dev != ip->dev || dirlink(dp, name, namelen, ip->inum) < 0){
     idecref(ip);
     iput(dp);
     return -1;
   }
+  iput(dp);
 
-  // LOCKING ERROR HERE!  TWO LOCKS HELD AT ONCE.
+  // XXX write ordering wrong here too.
   ilock(ip);
   ip->nlink++;
   iupdate(ip);
+  iput(ip);
+  return 0;
+}
+
+int
+mkdir(char *path)
+{
+  struct inode *dp, *ip;
+  char *name;
+  int namelen;
+  
+  // XXX write ordering is screwy here- do we care?
+  if((dp = nameiparent(path, &name, &namelen)) == 0)
+    return -1;
+  
+  if((ip = dircreat(dp, name, namelen, T_DIR, 0, 0)) == 0){
+    iput(dp);
+    return -1;
+  }
+  dp->nlink++;
+  iupdate(dp);
 
-  dirwrite(dp, name, namelen, ip->inum);
+  if(dirlink(ip, ".", 1, ip->inum) < 0 || dirlink(ip, "..", 2, dp->inum) < 0)
+    panic("mkdir");
   iput(dp);
   iput(ip);
 
   return 0;
 }
+
+struct inode*
+create(char *path)
+{
+  struct inode *dp, *ip;
+  char *name;
+  int namelen;
+  
+  if((dp = nameiparent(path, &name, &namelen)) == 0)
+    return 0;
+  
+  if((ip = dirlookup(dp, name, namelen, 0)) != 0){
+    iput(dp);
+    ilock(ip);
+    if(ip->type == T_DIR){
+      iput(ip);
+      return 0;
+    }
+    return ip;
+  }
+  if((ip = dircreat(dp, name, namelen, T_FILE, 0, 0)) == 0){
+    iput(dp);
+    return 0;
+  }
+  iput(dp);
+  return ip;
+}
+
diff --git a/fsvar.h b/fsvar.h
index 449bf3d..8609c2a 100644
--- a/fsvar.h
+++ b/fsvar.h
@@ -4,7 +4,7 @@ struct inode {
   uint dev;           // Device number
   uint inum;          // Inode number
   int ref;            // Reference count
-  int busy;           // Is the inode "locked"?
+  int flags;           // I_BUSY, I_VALID
 
   short type;         // copy of disk inode
   short major;
@@ -16,6 +16,5 @@ struct inode {
 
 #define ROOTDEV  1   // Device number of root file system
 
-#define NAMEI_LOOKUP 1
-#define NAMEI_CREATE 2
-#define NAMEI_DELETE 3
+#define I_BUSY 0x1
+#define I_VALID 0x2
diff --git a/initcode.S b/initcode.S
new file mode 100644
index 0000000..c87c4b1
--- /dev/null
+++ b/initcode.S
@@ -0,0 +1,28 @@
+# Initial process execs /init.
+
+#include "syscall.h"
+#include "traps.h"
+
+# exec(init, argv)
+start:
+  pushl $argv
+  pushl $init
+  pushl $0
+  movl $SYS_exec, %eax
+  int $T_SYSCALL
+
+# for(;;) exit();
+exit:
+  movl $SYS_exit, %eax
+  int $T_SYSCALL
+  jmp exit
+
+# "/init\0"
+init:
+  .string "/init\0"
+
+.p2align 2
+argv:
+  .long init
+  .long 0
+
diff --git a/main.c b/main.c
index fb7a69a..2774636 100644
--- a/main.c
+++ b/main.c
@@ -11,9 +11,8 @@
 #include "spinlock.h"
 
 extern char edata[], end[];
-extern uchar _binary__init_start[], _binary__init_size[];
 
-void process0();
+void proc0init();
 
 // Bootstrap processor starts running C code here.
 // This is called main0 not main so that it can have
@@ -24,7 +23,6 @@ main0(void)
 {
   int i;
   static int bcpu;  // cannot be on stack
-  struct proc *p;
 
   // clear BSS
   memset(edata, 0, end - edata);
@@ -54,15 +52,6 @@ main0(void)
   fileinit();
   iinit(); // i-node table
 
-  // initialize process 0
-  p = &proc[0];
-  p->state = RUNNABLE;
-  p->kstack = kalloc(KSTACKSIZE);
-
-  // cause proc[0] to start in kernel at process0
-  p->jmpbuf.eip = (uint) process0;
-  p->jmpbuf.esp = (uint) (p->kstack + KSTACKSIZE - 4);
-
   // make sure there's a TSS
   setupsegs(0);
 
@@ -86,6 +75,9 @@ main0(void)
   cpus[cpu()].nlock--;
   sti();
 
+  // initialize process 0
+  proc0init();
+
   scheduler();
 }
 
@@ -114,77 +106,45 @@ mpmain(void)
   scheduler();
 }
 
-// proc[0] starts here, called by scheduler() in the ordinary way.
+char initcode[] = {
+  /* push ptr to argv */     0x6a, 0x1c,
+  /* push ptr to "/init" */  0x6a, 0x16,
+  /* push fake ret addr */   0x6a, 0x00,
+  /* mov $SYS_exec, %eax */  0xb8, 0x09, 0x00, 0x00, 0x00,
+  /* int $0x30 */            0xcd, 0x30,
+  /* Lx: */
+  /* mov $SYS_exit, %eax */  0xb8, 0x02, 0x00, 0x00, 0x00,
+  /* int $0x30 */            0xcd, 0x30,
+  /* jmp Lx */               0xeb, 0xf7,
+  
+  /* "/init\0" */            0x2f, 0x69, 0x6e, 0x69, 0x74, 0x00,
+  /* ptr to "/init" */       0x16, 0x00, 0x00, 0x00,
+  /* 0 */                    0x00, 0x00, 0x00, 0x00
+};
+
 void
-process0(void)
+proc0init(void)
 {
-  extern struct spinlock proc_table_lock;
-  struct proc *p0, *p1;
-  struct trapframe tf;
-
-  release(&proc_table_lock);
-
-  p0 = &proc[0];
-  p0->cwd = igetroot();
-  iunlock(p0->cwd);
-
-  // Dummy user memory to make copyproc() happy.
-  // Must be big enough to hold the init binary and stack.
-  p0->sz = 2*PAGE;
-  p0->mem = kalloc(p0->sz);
-
-  // Fake a trap frame as if a user process had made a system
-  // call, so that copyproc will have a place for the new
-  // process to return to.
-  p0->tf = &tf;
-  memset(p0->tf, 0, sizeof(struct trapframe));
-  p0->tf->es = p0->tf->ds = p0->tf->ss = (SEG_UDATA << 3) | DPL_USER;
-  p0->tf->cs = (SEG_UCODE << 3) | DPL_USER;
-  p0->tf->eflags = FL_IF;
-  p0->tf->esp = p0->sz;
+  struct proc *p;
+  extern uchar _binary_initcode_start[], _binary_initcode_size[];
   
-  // Push bogus return address, both to cause problems
-  // if main returns and also because gcc can generate
-  // function prologs that expect to be able to read the
-  // return address off the stack without causing a fault.
-  p0->tf->esp -= 4;
-  *(uint*)(p0->mem + p0->tf->esp) = 0xefefefef;
-
-  p1 = copyproc(p0);
-
-  load_icode(p1, _binary__init_start, (uint) _binary__init_size);
-  p1->state = RUNNABLE;
-  safestrcpy(p1->name, "init", sizeof p1->name);
+  p = copyproc(0);
+  p->sz = PAGE;
+  p->mem = kalloc(p->sz);
+  p->cwd = igetroot();
+  memset(&p->tf, 0, sizeof p->tf);
+  p->tf->es = p->tf->ds = p->tf->ss = (SEG_UDATA << 3) | DPL_USER;
+  p->tf->cs = (SEG_UCODE << 3) | DPL_USER;
+  p->tf->eflags = FL_IF;
+  p->tf->esp = p->sz;
+  
+  // Push dummy return address to placate gcc.
+  p->tf->esp -= 4;
+  *(uint*)(p->mem + p->tf->esp) = 0xefefefef;
 
-  proc_wait();
-  panic("init exited");
+  p->tf->eip = 0;
+  memmove(p->mem, _binary_initcode_start, (int)_binary_initcode_size);
+  safestrcpy(p->name, "initcode", sizeof p->name);
+  p->state = RUNNABLE;
 }
 
-void
-load_icode(struct proc *p, uchar *binary, uint size)
-{
-  int i;
-  struct elfhdr *elf;
-  struct proghdr *ph;
-
-  elf = (struct elfhdr*) binary;
-  if(elf->magic != ELF_MAGIC)
-    panic("load_icode: not an ELF binary");
-
-  p->tf->eip = elf->entry;
-
-  // Map and load segments as directed.
-  ph = (struct proghdr*) (binary + elf->phoff);
-  for(i = 0; i < elf->phnum; i++, ph++) {
-    if(ph->type != ELF_PROG_LOAD)
-      continue;
-    if(ph->va + ph->memsz < ph->va)
-      panic("load_icode: overflow in proghdr");
-    if(ph->va + ph->memsz >= p->sz)
-      panic("load_icode: icode too large");
-
-    // Load/clear the segment
-    memmove(p->mem + ph->va, binary + ph->offset, ph->filesz);
-    memset(p->mem + ph->va + ph->filesz, 0, ph->memsz - ph->filesz);
-  }
-}
diff --git a/proc.c b/proc.c
index b09b738..c86f88f 100644
--- a/proc.c
+++ b/proc.c
@@ -109,47 +109,43 @@ copyproc(struct proc *p)
     return 0;
   }
   np->pid = next_pid++;
-  np->ppid = p->pid;
   release(&proc_table_lock);
 
-  // Copy user memory.
-  np->sz = p->sz;
-  np->mem = kalloc(np->sz);
-  if(np->mem == 0){
-    np->state = UNUSED;
-    return 0;
-  }
-  memmove(np->mem, p->mem, np->sz);
-
   // Allocate kernel stack.
-  np->kstack = kalloc(KSTACKSIZE);
-  if(np->kstack == 0){
-    kfree(np->mem, np->sz);
-    np->mem = 0;
+  if((np->kstack = kalloc(KSTACKSIZE)) == 0){
     np->state = UNUSED;
     return 0;
   }
-
-  // Copy trapframe registers from parent.
   np->tf = (struct trapframe*)(np->kstack + KSTACKSIZE) - 1;
-  memmove(np->tf, p->tf, sizeof(*np->tf));
 
-  // Clear %eax so that fork system call returns 0 in child.
-  np->tf->eax = 0;
+  if(p){  // Copy process state from p.
+    np->ppid = p->pid;
+    memmove(np->tf, p->tf, sizeof *np->tf);
+  
+    np->sz = p->sz;
+    if((np->mem = kalloc(np->sz)) == 0){
+      kfree(np->kstack, KSTACKSIZE);
+      np->kstack = 0;
+      np->state = UNUSED;
+      return 0;
+    }
+    memmove(np->mem, p->mem, np->sz);
+
+    for(i = 0; i < NOFILE; i++){
+      np->ofile[i] = p->ofile[i];
+      if(np->ofile[i])
+        fileincref(np->ofile[i]);
+    }
+    np->cwd = iincref(p->cwd);
+  }
 
   // Set up new jmpbuf to start executing at forkret (see below).
   memset(&np->jmpbuf, 0, sizeof np->jmpbuf);
   np->jmpbuf.eip = (uint)forkret;
   np->jmpbuf.esp = (uint)np->tf - 4;
 
-  // Copy file descriptors
-  for(i = 0; i < NOFILE; i++){
-    np->ofile[i] = p->ofile[i];
-    if(np->ofile[i])
-      fileincref(np->ofile[i]);
-  }
-
-  np->cwd = iincref(p->cwd);
+  // Clear %eax so that fork system call returns 0 in child.
+  np->tf->eax = 0;
 
   return np;
 }
diff --git a/string.c b/string.c
index a871b68..0a92cca 100644
--- a/string.c
+++ b/string.c
@@ -75,3 +75,13 @@ safestrcpy(char *s, const char *t, int n)
   return os;
 }
 
+int
+strlen(const char *s)
+{
+  int n;
+
+  for(n = 0; s[n]; n++)
+    ;
+  return n;
+}
+
diff --git a/syscall.c b/syscall.c
index 7f4caa1..b18b62c 100644
--- a/syscall.c
+++ b/syscall.c
@@ -53,7 +53,6 @@ fetchstr(struct proc *p, uint addr, char **pp)
 int
 argint(int argno, int *ip)
 {
-
   return fetchint(cp, cp->tf->esp + 4 + 4*argno, ip);
 }
 
diff --git a/sysfile.c b/sysfile.c
index cd00494..fd28002 100644
--- a/sysfile.c
+++ b/sysfile.c
@@ -114,42 +114,22 @@ sys_close(void)
 int
 sys_open(void)
 {
-  struct inode *ip, *dp;
-  char *path, *name;
-  int namelen;
-  int omode;
-  int fd, dev;
-  uint inum;
+  char *path;
+  int fd, omode;
   struct file *f;
+  struct inode *ip;
 
   if(argstr(0, &path) < 0 || argint(1, &omode) < 0)
     return -1;
 
-  switch(omode & O_CREATE){
-  default:
-  case 0: // regular open
-    if((ip = namei(path)) == 0)
-      return -1;
-    break;
-  
-  case O_CREATE:
-    if((dp = nameiparent(path, &name, &namelen)) == 0)
-      return -1;
-    if(dirlookup(dp, name, namelen, 0, &inum) >= 0){
-      dev = dp->dev;
-      iput(dp);
-      ip = iget(dev, inum);
-    }else{
-      if((ip = dircreat(dp, name, namelen, T_FILE, 0, 0)) == 0){
-        iput(dp);
-        return -1;
-      }
-      iput(dp);
-    }
-    break;
-  }
+  if(omode & O_CREATE)
+    ip = create(path);
+  else
+    ip = namei(path);
+  if(ip == 0)
+    return -1;
 
-  if(ip->type == T_DIR && (omode & (O_RDWR|O_WRONLY|O_CREATE))){
+  if(ip->type == T_DIR && (omode & (O_RDWR|O_WRONLY))){
     iput(ip);
     return -1;
   }
@@ -194,6 +174,7 @@ sys_mknod(void)
      argint(2, &major) < 0 || argint(3, &minor) < 0)
     return -1;
 
+  // XXX why this check?
   if(len >= DIRSIZ)
     return -1;
 
@@ -206,45 +187,11 @@ sys_mknod(void)
 int
 sys_mkdir(void)
 {
-  struct inode *nip;
-  struct inode *dp;
-  char *name, *path;
-  struct dirent de;
-  int namelen;
+  char *path;
 
   if(argstr(0, &path) < 0)
     return -1;
-
-  dp = nameiparent(path, &name, &namelen);
-  if(dp == 0)
-    return -1;
-  if(dirlookup(dp, name, namelen, 0, 0) >= 0){
-    iput(dp);
-    return -1;
-  }
-
-  nip = dircreat(dp, name, namelen, T_DIR, 0, 0);
-  if(nip == 0){
-    iput(dp);
-    return -1;
-  }
-
-  dp->nlink++;
-  iupdate(dp);
-
-  memset(de.name, '\0', DIRSIZ);
-  de.name[0] = '.';
-  de.inum = nip->inum;
-  writei(nip, (char*) &de, 0, sizeof(de));
-
-  de.inum = dp->inum;
-  de.name[1] = '.';
-  writei(nip, (char*) &de, sizeof(de), sizeof(de));
-
-  iput(dp);
-  iput(nip);
-
-  return 0;
+  return mkdir(path);
 }
 
 int
@@ -315,132 +262,30 @@ sys_link(void)
   return link(old, new);
 }
 
+#define ARGMAX 10
+
 int
 sys_exec(void)
 {
-  uint sz=0, ap, sp, p1, p2;
-  int i, nargs, argbytes, len;
-  struct inode *ip;
-  struct elfhdr elf;
-  struct proghdr ph;
-  char *mem = 0;
-  char *path, *s, *last;
-  uint argv;
-  
-  if(argstr(0, &path) < 0 || argint(1, (int*)&argv) < 0)
-    return -1;
+  char *path, *argv[ARGMAX];
+  int i;
+  uint uargv, uarg;
 
-  if((ip = namei(path)) == 0)
+  if(argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0)
     return -1;
-
-  if(readi(ip, (char*)&elf, 0, sizeof(elf)) < sizeof(elf))
-    goto bad;
-
-  if(elf.magic != ELF_MAGIC)
-    goto bad;
-
-  sz = 0;
-  for(i = 0; i < elf.phnum; i++){
-    if(readi(ip, (char*)&ph, elf.phoff + i * sizeof(ph),
-             sizeof(ph)) != sizeof(ph))
-      goto bad;
-    if(ph.type != ELF_PROG_LOAD)
-      continue;
-    if(ph.memsz < ph.filesz)
-      goto bad;
-    sz += ph.memsz;
-  }
-
-  sz += 4096 - (sz % 4096);
-  sz += 4096;
-
-  mem = kalloc(sz);
-  if(mem == 0)
-    goto bad;
-  memset(mem, 0, sz);
-
-  nargs = 0;
-  argbytes = 0;
-  for(i = 0;; i++){
-    if(fetchint(cp, argv + 4*i, (int*)&ap) < 0)
-      goto bad;
-    if(ap == 0)
+  memset(argv, 0, sizeof argv);
+  for(i=0;; i++){
+    if(i >= ARGMAX)
+      return -1;
+    if(fetchint(cp, uargv+4*i, (int*)&uarg) < 0)
+      return -1;
+    if(uarg == 0){
+      argv[i] = 0;
       break;
-    len = fetchstr(cp, ap, &s);
-    if(len < 0)
-      goto bad;
-    nargs++;
-    argbytes += len + 1;
-  }
-
-  // argn\0
-  // ...
-  // arg0\0
-  // 0
-  // ptr to argn
-  // ...
-  // 12: ptr to arg0
-  //  8: argv (points to ptr to arg0)
-  //  4: argc
-  //  0: fake return pc
-  sp = sz - argbytes - (nargs+1)*4 - 4 - 4 - 4;
-  *(uint*)(mem + sp) = 0xffffffff;
-  *(uint*)(mem + sp + 4) = nargs;
-  *(uint*)(mem + sp + 8) = (uint)(sp + 12);
-
-  p1 = sp + 12;
-  p2 = sp + 12 + (nargs + 1) * 4;
-  for(i = 0; i < nargs; i++){
-    fetchint(cp, argv + 4*i, (int*)&ap);
-    len = fetchstr(cp, ap, &s);
-    memmove(mem + p2, s, len + 1);
-    *(uint*)(mem + p1) = p2;
-    p1 += 4;
-    p2 += len + 1;
-  }
-  *(uint*)(mem + p1) = 0;
-
-  // Save name for debugging.
-  for(last=s=path; *s; s++)
-    if(*s == '/')
-      last = s+1;
-  safestrcpy(cp->name, last, sizeof cp->name);
-
-  // commit to the new image.
-  kfree(cp->mem, cp->sz);
-  cp->sz = sz;
-  cp->mem = mem;
-  mem = 0;
-
-  for(i = 0; i < elf.phnum; i++){
-    if(readi(ip, (char*)&ph, elf.phoff + i * sizeof(ph),
-             sizeof(ph)) != sizeof(ph))
-      goto bad2;
-    if(ph.type != ELF_PROG_LOAD)
-      continue;
-    if(ph.va + ph.memsz > sz)
-      goto bad2;
-    if(readi(ip, cp->mem + ph.va, ph.offset, ph.filesz) != ph.filesz)
-      goto bad2;
-    memset(cp->mem + ph.va + ph.filesz, 0, ph.memsz - ph.filesz);
+    }
+    if(fetchstr(cp, uarg, &argv[i]) < 0)
+      return -1;
   }
-
-  iput(ip);
-  
-  cp->tf->eip = elf.entry;
-  cp->tf->esp = sp;
-  setupsegs(cp);
-
-  return 0;
-
- bad:
-  if(mem)
-    kfree(mem, sz);
-  iput(ip);
-  return -1;
-
- bad2:
-  iput(ip);
-  proc_exit();
-  return 0;
+  return exec(path, argv);
 }
+
-- 
GitLab