Newer
Older
// File system implementation.
//
// Four layers:
// + Blocks: allocator for raw disk blocks.
// + Files: inode allocator, reading, writing, metadata.
// + Directories: inode with special contents (list of other inodes!)
// + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
//
// Disk layout is: superblock, inodes, disk bitmap, data blocks.
// TODO: Check locking!
#include "param.h"
#include "x86.h"
#include "mmu.h"
#include "proc.h"
#include "defs.h"
#include "spinlock.h"
#include "buf.h"
#include "fs.h"
#include "fsvar.h"
static void itrunc(struct inode*);
static void iupdate(struct inode*);
struct buf *bp;
struct superblock *sb;
bp = bread(dev, 1);
for(b = 0; b < size; b++) {
if(b % BPB == 0) {
brelse(bp);
bp = bread(dev, BBLOCK(b, ninodes));
}
bi = b % BPB;
m = 0x1 << (bi % 8);
if((bp->data[bi/8] & m) == 0) { // is block free?
bp->data[bi/8] |= 0x1 << (bi % 8);
bwrite(bp, BBLOCK(b, ninodes)); // mark it allocated on disk
brelse(bp);
return b;
bfree(int dev, uint b)
{
struct buf *bp;
struct superblock *sb;
bp = bread(dev, b);
memset(bp->data, 0, BSIZE);
bwrite(bp, b);
brelse(bp);
bwrite(bp, BBLOCK(b, ninodes)); // mark it free on disk
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
// Inodes
//
// The inodes are laid out sequentially on disk immediately after
// the superblock. The kernel keeps a cache of the in-use
// on-disk structures to provide a place for synchronizing access
// to inodes shared between multiple processes.
//
// ip->ref counts the number of references to this
// inode; references are typically kept in struct file and in cp->cwd.
// When ip->ref falls to zero, the inode is no longer cached.
// It is an error to use an inode without holding a reference to it.
//
// Inodes can be marked busy, just like bufs, meaning
// that some process has logically locked the inode, and other processes
// are not allowed to look at it. Because the locking can last for
// a long time (for example, during a disk access), we use a flag
// like in buffer cache, not spin locks. The inode should always be
// locked during modifications to it.
struct {
struct spinlock lock;
struct inode inode[NINODE];
} icache;
void
iinit(void)
{
initlock(&icache.lock, "icache.lock");
}
// and return the in-memory copy. The returned inode
// has its reference count incremented (and thus must be
// idecref'ed), but is *unlocked*, meaning that none of the fields
// except dev and inum are guaranteed to be initialized.
// This convention gives the caller maximum control over blocking;
// it also guarantees that iget will not sleep, which is useful in
// the early igetroot and when holding other locked inodes.
// Try for cached inode.
empty = 0;
for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
ip->ref++;
if(empty == 0 && ip->ref == 0) // Remember empty slot.
empty = ip;
ip = empty;
ip->dev = dev;
ip->inum = inum;
ip->ref = 1;
return ip;
}
// Iget the inode for the file system root (/).
if(!(ip->flags & I_VALID)){
bp = bread(ip->dev, IBLOCK(ip->inum));
dip = &((struct dinode*)(bp->data))[ip->inum % IPB];
ip->type = dip->type;
ip->major = dip->major;
ip->minor = dip->minor;
ip->nlink = dip->nlink;
ip->size = dip->size;
memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
brelse(bp);
ip->flags |= I_VALID;
}
}
// Unlock the given inode.
void
iunlock(struct inode *ip)
{
wakeup(ip);
release(&icache.lock);
}
// Unlock inode and drop reference.
void
iput(struct inode *ip)
{
}
// Increment reference count for ip.
// Returns ip to enable ip = iincref(ip1) idiom.
struct inode*
iincref(struct inode *ip)
{
acquire(&icache.lock);
if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0) {
// inode is no longer used: truncate and free inode.
if(ip->flags & I_BUSY)
panic("idecref busy");
ip->flags |= I_BUSY;
release(&icache.lock);
// XXX convince rsc that no one will come find this inode.
itrunc(ip);
ip->type = 0;
iupdate(ip);
acquire(&icache.lock);
ip->flags &= ~I_BUSY;
}
ip->ref--;
release(&icache.lock);
// Allocate a new inode with the given type on device dev.
for(inum = 1; inum < ninodes; inum++) { // loop over inode blocks
dip = &((struct dinode*)(bp->data))[inum % IPB];
if(dip->type == 0) { // a free inode
memset(dip, 0, sizeof(*dip));
dip->type = type;
bwrite(bp, IBLOCK(inum)); // mark it allocated on the disk
brelse(bp);
panic("ialloc: no inodes");
// Copy inode, which has changed, from memory to disk.
bp = bread(ip->dev, IBLOCK(ip->inum));
dip = &((struct dinode*)(bp->data))[ip->inum % IPB];
dip->type = ip->type;
dip->major = ip->major;
dip->minor = ip->minor;
dip->nlink = ip->nlink;
dip->size = ip->size;
memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
bwrite(bp, IBLOCK(ip->inum));
brelse(bp);
// Inode contents
//
// The contents (data) associated with each inode is stored
// in a sequence of blocks on the disk. The first NDIRECT blocks
// are stored in ip->addrs[]. The next NINDIRECT blocks are
// listed in the block ip->addrs[INDIRECT].
// If there is no such block: if alloc is set, allocate one, else return -1.
uint
{
if((addr = ip->addrs[bn]) == 0) {
if(!alloc)
return -1;
ip->addrs[bn] = addr = balloc(ip->dev);
}
return addr;
bn -= NDIRECT;
if(bn < NINDIRECT) {
// Load indirect block, allocating if necessary.
if((addr = ip->addrs[INDIRECT]) == 0) {
if(!alloc)
return -1;
ip->addrs[INDIRECT] = addr = balloc(ip->dev);
}
bp = bread(ip->dev, addr);
a = (uint*)bp->data;
if((addr = a[bn]) == 0) {
if(!alloc) {
brelse(bp);
return -1;
}
a[bn] = addr = balloc(ip->dev);
bwrite(bp, ip->addrs[INDIRECT]);
}
brelse(bp);
return addr;
}
panic("bmap: out of range");
}
{
for(i = 0; i < NDIRECT; i++) {
if(ip->addrs[i]) {
ip->addrs[i] = 0;
}
}
if(ip->addrs[INDIRECT]) {
bp = bread(ip->dev, ip->addrs[INDIRECT]);
a = (uint*)bp->data;
for(j = 0; j < NINDIRECT; j++) {
if(a[j])
bfree(ip->dev, a[j]);
}
brelse(bp);
ip->addrs[INDIRECT] = 0;
st->dev = ip->dev;
st->ino = ip->inum;
st->type = ip->type;
st->nlink = ip->nlink;
st->size = ip->size;
readi(struct inode *ip, char *dst, uint off, uint n)
if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
if(off + n < off)
return -1;
if(off + n > ip->size)
n = ip->size - off;
for(tot=0; tot<n; tot+=m, off+=m, dst+=m) {
bp = bread(ip->dev, bmap(ip, off/BSIZE, 0));
m = min(n - tot, BSIZE - off%BSIZE);
memmove(dst, bp->data + off%BSIZE, m);
brelse(bp);
writei(struct inode *ip, char *src, uint off, uint n)
if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write)
if(off + n < off)
return -1;
if(off + n > MAXFILE*BSIZE)
n = MAXFILE*BSIZE - off;
for(tot=0; tot<n; tot+=m, off+=m, src+=m) {
bp = bread(ip->dev, bmap(ip, off/BSIZE, 1));
m = min(n - tot, BSIZE - off%BSIZE);
memmove(bp->data + off%BSIZE, src, m);
bwrite(bp, bmap(ip, off/BSIZE, 0));
if(n > 0 && off > ip->size) {
ip->size = off;
// Directories are just inodes (files) filled with dirent structures.
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
// Compare two names, which are strings with a max length of DIRSIZ.
static int
namecmp(const char *s, const char *t)
{
int i;
for(i=0; i<DIRSIZ; i++){
if(s[i] != t[i])
return s[i] - t[i];
if(s[i] == 0)
break;
}
return 0;
}
// Copy one name to another.
static void
namecpy(char *s, const char *t)
{
int i;
for(i=0; i<DIRSIZ && t[i]; i++)
s[i] = t[i];
for(; i<DIRSIZ; i++)
s[i] = 0;
}
// Look for a directory entry in a directory.
// If not found, return -1.
// If found:
// set *poff to the byte offset of the directory entry
// set *pinum to the inode number
// return 0.
static struct inode*
dirlookup(struct inode *dp, char *name, uint *poff)
struct buf *bp;
struct dirent *de;
if(dp->type != T_DIR)
for(de = (struct dirent*) bp->data;
de < (struct dirent*) (bp->data + BSIZE);
de++){
if(de->inum == 0)
continue;
if(poff)
*poff = off + (uchar*)de - bp->data;
// Write a new directory entry (name, ino) into the directory dp.
// Caller must have locked dp.
static int
dirlink(struct inode *dp, char *name, uint ino)
// Look for an empty dirent.
for(off = 0; off < dp->size; off += sizeof(de)){
if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
panic("dirwrite read");
if(de.inum == 0)
break;
}
de.inum = ino;
if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
panic("dirwrite");
// Create a new inode named name inside dp
// and return its locked inode structure.
// If name already exists, return 0.
static struct inode*
dircreat(struct inode *dp, char *name, short type, short major, short minor)
{
struct inode *ip;
ip = ialloc(dp->dev, type);
if(ip == 0)
return 0;
ip->major = major;
ip->minor = minor;
ip->size = 0;
ip->nlink = 1;
iupdate(ip);
// Paths
// Skip over the next path element in path,
// saving it in *name and its length in *len.
// Return a pointer to the element after that
// (after any trailing slashes).
// Thus the caller can check whether *path=='\0'
// to see whether the name just removed was
// the last one.
// If there is no name to remove, return 0.
//
// Examples:
// skipelem("a/bb/c") = "bb/c", with *name = "a/bb/c", len=1
// skipelem("///a/bb") = "b", with *name="a/bb", len=1
// skipelem("") = skipelem("////") = 0
//
static char*
while(*path == '/')
path++;
if(*path == 0)
return 0;
len = path - s;
if(len >= DIRSIZ)
memmove(name, s, DIRSIZ);
else{
memmove(name, s, len);
name[len] = 0;
}
while(*path == '/')
path++;
return path;
}
// look up a path name, in one of three modes.
// NAMEI_LOOKUP: return locked target inode.
// NAMEI_CREATE: return locked parent inode.
// return 0 if name does exist.
// *ret_last points to last path component (i.e. new file name).
// *ret_ip points to the the name that did exist, if it did.
// *ret_ip and *ret_last may be zero even if return value is zero.
// NAMEI_DELETE: return locked parent inode, offset of dirent in *ret_off.
// return 0 if name doesn't exist.
if(parent && *path == '\0'){
// Stop one level early.
static struct inode*
nameiparent(char *path, char *name)
// Create the path and return its locked inode structure.
// If cp already exists, return 0.
struct inode*
mknod(char *path, short type, short major, short minor)
{
struct inode *ip, *dp;
struct inode *ip, *dp;
struct dirent de;
// Cannot unlink "." or "..".
if(namecmp(name, ".") == 0 || namecmp(name, "..") == 0){
memset(&de, 0, sizeof(de));
if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
panic("unlink dir write");
ip->nlink--;
iupdate(ip);
iput(ip);
return -1;
if(ip->type == T_DIR){
iput(ip);
if(dp->dev != ip->dev || dirlink(dp, name, ip->inum) < 0){
idecref(ip);
iput(dp);
iput(ip);
return 0;
}
int
mkdir(char *path)
{
struct inode *dp, *ip;
if(dirlink(ip, ".", ip->inum) < 0 || dirlink(ip, "..", dp->inum) < 0)
iput(dp);
ilock(ip);
if(ip->type == T_DIR){
iput(ip);
return 0;
}
return ip;
}