CloverBootloader/FileSystems/VBoxFsDxe/fsw_reiserfs.c

875 lines
34 KiB
C

/**
* \file fsw_reiserfs.c
* ReiserFS file system driver code.
*/
/*-
* Copyright (c) 2006 Christoph Pfisterer
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "fsw_reiserfs.h"
// functions
static fsw_status_t fsw_reiserfs_volume_mount(struct fsw_reiserfs_volume *vol);
static void fsw_reiserfs_volume_free(struct fsw_reiserfs_volume *vol);
static fsw_status_t fsw_reiserfs_volume_stat(struct fsw_reiserfs_volume *vol, struct fsw_volume_stat *sb);
static fsw_status_t fsw_reiserfs_dnode_fill(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno);
static void fsw_reiserfs_dnode_free(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno);
static fsw_status_t fsw_reiserfs_dnode_stat(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
struct fsw_dnode_stat_str *sb);
static fsw_status_t fsw_reiserfs_get_extent(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
struct fsw_extent *extent);
static fsw_status_t fsw_reiserfs_dir_lookup(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
struct fsw_string *lookup_name, struct fsw_reiserfs_dnode **child_dno);
static fsw_status_t fsw_reiserfs_dir_read(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
struct fsw_shandle *shand, struct fsw_reiserfs_dnode **child_dno);
static fsw_status_t fsw_reiserfs_readlink(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
struct fsw_string *link);
static fsw_status_t fsw_reiserfs_item_search(struct fsw_reiserfs_volume *vol,
fsw_u32 dir_id, fsw_u32 objectid, fsw_u64 offset,
struct fsw_reiserfs_item *item);
static fsw_status_t fsw_reiserfs_item_next(struct fsw_reiserfs_volume *vol,
struct fsw_reiserfs_item *item);
static void fsw_reiserfs_item_release(struct fsw_reiserfs_volume *vol,
struct fsw_reiserfs_item *item);
//
// Dispatch Table
//
struct fsw_fstype_table FSW_FSTYPE_TABLE_NAME(reiserfs) = {
{ FSW_STRING_TYPE_ISO88591, 8, 8, "reiserfs" },
sizeof(struct fsw_reiserfs_volume),
sizeof(struct fsw_reiserfs_dnode),
fsw_reiserfs_volume_mount,
fsw_reiserfs_volume_free,
fsw_reiserfs_volume_stat,
fsw_reiserfs_dnode_fill,
fsw_reiserfs_dnode_free,
fsw_reiserfs_dnode_stat,
fsw_reiserfs_get_extent,
fsw_reiserfs_dir_lookup,
fsw_reiserfs_dir_read,
fsw_reiserfs_readlink,
};
// misc data
static fsw_u32 superblock_offsets[3] = {
REISERFS_DISK_OFFSET_IN_BYTES >> REISERFS_SUPERBLOCK_BLOCKSIZEBITS,
REISERFS_OLD_DISK_OFFSET_IN_BYTES >> REISERFS_SUPERBLOCK_BLOCKSIZEBITS,
0
};
/**
* Mount an reiserfs volume. Reads the superblock and constructs the
* root directory dnode.
*/
static fsw_status_t fsw_reiserfs_volume_mount(struct fsw_reiserfs_volume *vol)
{
fsw_status_t status;
void *buffer;
fsw_u32 blocksize;
int i;
struct fsw_string s;
// allocate memory to keep the superblock around
status = fsw_alloc(sizeof(struct reiserfs_super_block), &vol->sb);
if (status)
return status;
// read the superblock into its buffer
fsw_set_blocksize(vol, REISERFS_SUPERBLOCK_BLOCKSIZE, REISERFS_SUPERBLOCK_BLOCKSIZE);
for (i = 0; superblock_offsets[i]; i++) {
status = fsw_block_get(vol, superblock_offsets[i], 0, &buffer);
if (status)
return status;
fsw_memcpy(vol->sb, buffer, sizeof(struct reiserfs_super_block));
fsw_block_release(vol, superblock_offsets[i], buffer);
// check for one of the magic strings
if (fsw_memeq(vol->sb->s_v1.s_magic,
REISERFS_SUPER_MAGIC_STRING, 8)) {
vol->version = REISERFS_VERSION_1;
break;
} else if (fsw_memeq(vol->sb->s_v1.s_magic,
REISER2FS_SUPER_MAGIC_STRING, 9)) {
vol->version = REISERFS_VERSION_2;
break;
} else if (fsw_memeq(vol->sb->s_v1.s_magic,
REISER2FS_JR_SUPER_MAGIC_STRING, 9)) {
vol->version = vol->sb->s_v1.s_version;
if (vol->version == REISERFS_VERSION_1 || vol->version == REISERFS_VERSION_2)
break;
}
}
if (superblock_offsets[i] == 0)
return FSW_UNSUPPORTED;
// check the superblock
if (vol->sb->s_v1.s_root_block == (__le32)-1) // unfinished 'reiserfsck --rebuild-tree'
return FSW_VOLUME_CORRUPTED;
/*
if (vol->sb->s_rev_level != EXT2_GOOD_OLD_REV &&
vol->sb->s_rev_level != EXT2_DYNAMIC_REV)
return FSW_UNSUPPORTED;
if (vol->sb->s_rev_level == EXT2_DYNAMIC_REV &&
(vol->sb->s_feature_incompat & ~(EXT2_FEATURE_INCOMPAT_FILETYPE | EXT3_FEATURE_INCOMPAT_RECOVER)))
return FSW_UNSUPPORTED;
*/
// set real blocksize
blocksize = vol->sb->s_v1.s_blocksize;
fsw_set_blocksize(vol, blocksize, blocksize);
// get other info from superblock
/*
vol->ind_bcnt = EXT2_ADDR_PER_BLOCK(vol->sb);
vol->dind_bcnt = vol->ind_bcnt * vol->ind_bcnt;
vol->inode_size = EXT2_INODE_SIZE(vol->sb);
*/
for (i = 0; i < 16; i++)
if (vol->sb->s_label[i] == 0)
break;
s.type = FSW_STRING_TYPE_ISO88591;
s.size = s.len = i;
s.data = vol->sb->s_label;
status = fsw_strdup_coerce(&vol->g.label, vol->g.host_string_type, &s);
if (status)
return status;
// setup the root dnode
status = fsw_dnode_create_root(vol, REISERFS_ROOT_OBJECTID, &vol->g.root);
if (status)
return status;
vol->g.root->dir_id = REISERFS_ROOT_PARENT_OBJECTID;
FSW_MSG_DEBUG((FSW_MSGSTR("fsw_reiserfs_volume_mount: success, blocksize %d tree height %d\n"),
blocksize, vol->sb->s_v1.s_tree_height));
return FSW_SUCCESS;
}
/**
* Free the volume data structure. Called by the core after an unmount or after
* an unsuccessful mount to release the memory used by the file system type specific
* part of the volume structure.
*/
static void fsw_reiserfs_volume_free(struct fsw_reiserfs_volume *vol)
{
if (vol->sb)
fsw_free(vol->sb);
}
/**
* Get in-depth information on a volume.
*/
static fsw_status_t fsw_reiserfs_volume_stat(struct fsw_reiserfs_volume *vol, struct fsw_volume_stat *sb)
{
sb->total_bytes = (fsw_u64)vol->sb->s_v1.s_block_count * vol->g.log_blocksize;
sb->free_bytes = (fsw_u64)vol->sb->s_v1.s_free_blocks * vol->g.log_blocksize;
return FSW_SUCCESS;
}
/**
* Get full information on a dnode from disk. This function is called by the core
* whenever it needs to access fields in the dnode structure that may not
* be filled immediately upon creation of the dnode. In the case of reiserfs, we
* delay fetching of the stat data until dnode_fill is called. The size and
* type fields are invalid until this function has been called.
*/
static fsw_status_t fsw_reiserfs_dnode_fill(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno)
{
fsw_status_t status;
fsw_u32 item_len, mode;
struct fsw_reiserfs_item item;
if (dno->sd_v1 || dno->sd_v2)
return FSW_SUCCESS;
FSW_MSG_DEBUG((FSW_MSGSTR("fsw_reiserfs_dnode_fill: object %d/%d\n"), dno->dir_id, dno->g.dnode_id));
// find stat data item in reiserfs tree
status = fsw_reiserfs_item_search(vol, dno->dir_id, dno->g.dnode_id, 0, &item);
if (status == FSW_NOT_FOUND) {
FSW_MSG_ASSERT((FSW_MSGSTR("fsw_reiserfs_dnode_fill: cannot find stat_data for object %d/%d\n"),
dno->dir_id, dno->g.dnode_id));
return FSW_VOLUME_CORRUPTED;
}
if (status)
return status;
if (item.item_offset != 0) {
FSW_MSG_ASSERT((FSW_MSGSTR("fsw_reiserfs_dnode_fill: got item that's not stat_data\n")));
fsw_reiserfs_item_release(vol, &item);
return FSW_VOLUME_CORRUPTED;
}
item_len = item.ih.ih_item_len;
// get data in appropriate version
if (item.ih.ih_version == KEY_FORMAT_3_5 && item_len == SD_V1_SIZE) {
// have stat_data_v1 structure
status = fsw_memdup((void **)&dno->sd_v1, item.item_data, item_len);
fsw_reiserfs_item_release(vol, &item);
if (status)
return status;
// get info from the inode
dno->g.size = dno->sd_v1->sd_size;
mode = dno->sd_v1->sd_mode;
} else if (item.ih.ih_version == KEY_FORMAT_3_6 && item_len == SD_V2_SIZE) {
// have stat_data_v2 structure
status = fsw_memdup((void **)&dno->sd_v2, item.item_data, item_len);
fsw_reiserfs_item_release(vol, &item);
if (status)
return status;
// get info from the inode
dno->g.size = dno->sd_v2->sd_size;
mode = dno->sd_v2->sd_mode;
} else {
FSW_MSG_ASSERT((FSW_MSGSTR("fsw_reiserfs_dnode_fill: version %d(%d) and size %d(%d) not recognized for stat_data\n"),
item.ih.ih_version, KEY_FORMAT_3_6, item_len, SD_V2_SIZE));
fsw_reiserfs_item_release(vol, &item);
return FSW_VOLUME_CORRUPTED;
}
// get node type from mode field
if (S_ISREG(mode))
dno->g.type = FSW_DNODE_TYPE_FILE;
else if (S_ISDIR(mode))
dno->g.type = FSW_DNODE_TYPE_DIR;
else if (S_ISLNK(mode))
dno->g.type = FSW_DNODE_TYPE_SYMLINK;
else
dno->g.type = FSW_DNODE_TYPE_SPECIAL;
return FSW_SUCCESS;
}
/**
* Free the dnode data structure. Called by the core when deallocating a dnode
* structure to release the memory used by the file system type specific part
* of the dnode structure.
*/
static void fsw_reiserfs_dnode_free(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno)
{
if (dno->sd_v1)
fsw_free(dno->sd_v1);
if (dno->sd_v2)
fsw_free(dno->sd_v2);
}
/**
* Get in-depth information on a dnode. The core makes sure that fsw_reiserfs_dnode_fill
* has been called on the dnode before this function is called. Note that some
* data is not directly stored into the structure, but passed to a host-specific
* callback that converts it to the host-specific format.
*/
static fsw_status_t fsw_reiserfs_dnode_stat(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
struct fsw_dnode_stat_str *sb)
{
if (dno->sd_v1) {
if (dno->g.type == FSW_DNODE_TYPE_SPECIAL)
sb->used_bytes = 0;
else
sb->used_bytes = dno->sd_v1->u.sd_blocks * vol->g.log_blocksize;
sb->store_time_posix(sb, FSW_DNODE_STAT_CTIME, dno->sd_v1->sd_ctime);
sb->store_time_posix(sb, FSW_DNODE_STAT_ATIME, dno->sd_v1->sd_atime);
sb->store_time_posix(sb, FSW_DNODE_STAT_MTIME, dno->sd_v1->sd_mtime);
sb->store_attr_posix(sb, dno->sd_v1->sd_mode);
} else if (dno->sd_v2) {
sb->used_bytes = dno->sd_v2->sd_blocks * vol->g.log_blocksize;
sb->store_time_posix(sb, FSW_DNODE_STAT_CTIME, dno->sd_v2->sd_ctime);
sb->store_time_posix(sb, FSW_DNODE_STAT_ATIME, dno->sd_v2->sd_atime);
sb->store_time_posix(sb, FSW_DNODE_STAT_MTIME, dno->sd_v2->sd_mtime);
sb->store_attr_posix(sb, dno->sd_v2->sd_mode);
}
return FSW_SUCCESS;
}
/**
* Retrieve file data mapping information. This function is called by the core when
* fsw_shandle_read needs to know where on the disk the required piece of the file's
* data can be found. The core makes sure that fsw_reiserfs_dnode_fill has been called
* on the dnode before. Our task here is to get the physical disk block number for
* the requested logical block number.
*/
static fsw_status_t fsw_reiserfs_get_extent(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
struct fsw_extent *extent)
{
fsw_status_t status;
fsw_u64 search_offset, intra_offset;
struct fsw_reiserfs_item item;
fsw_u32 intra_bno, nr_item;
// Preconditions: The caller has checked that the requested logical block
// is within the file's size. The dnode has complete information, i.e.
// fsw_reiserfs_dnode_read_info was called successfully on it.
FSW_MSG_DEBUG((FSW_MSGSTR("fsw_reiserfs_get_extent: mapping block %d of object %d/%d\n"),
extent->log_start, dno->dir_id, dno->g.dnode_id));
extent->type = FSW_EXTENT_TYPE_SPARSE;
extent->log_count = 1;
// get the item for the requested block
search_offset = (fsw_u64)extent->log_start * vol->g.log_blocksize + 1;
status = fsw_reiserfs_item_search(vol, dno->dir_id, dno->g.dnode_id, search_offset, &item);
if (status)
return status;
if (item.item_offset == 0) {
fsw_reiserfs_item_release(vol, &item);
return FSW_SUCCESS; // no data items found, assume all-sparse file
}
intra_offset = search_offset - item.item_offset;
// check the kind of block
if (item.item_type == TYPE_INDIRECT || item.item_type == V1_INDIRECT_UNIQUENESS) {
// indirect item, contains block numbers
if (intra_offset & (vol->g.log_blocksize - 1)) {
FSW_MSG_ASSERT((FSW_MSGSTR("fsw_reiserfs_get_extent: intra_offset not block-aligned for indirect block\n")));
goto bail;
}
intra_bno = (fsw_u32)FSW_U64_DIV(intra_offset, vol->g.log_blocksize);
nr_item = item.ih.ih_item_len / sizeof(fsw_u32);
if (intra_bno >= nr_item) {
FSW_MSG_ASSERT((FSW_MSGSTR("fsw_reiserfs_get_extent: indirect block too small\n")));
goto bail;
}
extent->type = FSW_EXTENT_TYPE_PHYSBLOCK;
extent->phys_start = ((fsw_u32 *)item.item_data)[intra_bno];
// TODO: check if the following blocks can be aggregated into one extent
fsw_reiserfs_item_release(vol, &item);
return FSW_SUCCESS;
} else if (item.item_type == TYPE_DIRECT || item.item_type == V1_DIRECT_UNIQUENESS) {
// direct item, contains file data
// TODO: Check if direct items always start on block boundaries. If not, we may have
// to do extra work here.
if (intra_offset != 0) {
FSW_MSG_ASSERT((FSW_MSGSTR("fsw_reiserfs_get_extent: intra_offset not aligned for direct block\n")));
goto bail;
}
extent->type = FSW_EXTENT_TYPE_BUFFER;
status = fsw_memdup(&extent->buffer, item.item_data, item.ih.ih_item_len);
fsw_reiserfs_item_release(vol, &item);
if (status)
return status;
return FSW_SUCCESS;
}
bail:
fsw_reiserfs_item_release(vol, &item);
return FSW_VOLUME_CORRUPTED;
/*
// check if the following blocks can be aggregated into one extent
file_bcnt = (fsw_u32)((dno->g.size + vol->g.log_blocksize - 1) & (vol->g.log_blocksize - 1));
while (path[i] + extent->log_count < buf_bcnt && // indirect block has more block pointers
extent->log_start + extent->log_count < file_bcnt) { // file has more blocks
if (buffer[path[i] + extent->log_count] == buffer[path[i] + extent->log_count - 1] + 1)
extent->log_count++;
else
break;
}
*/
}
/**
* Lookup a directory's child dnode by name. This function is called on a directory
* to retrieve the directory entry with the given name. A dnode is constructed for
* this entry and returned. The core makes sure that fsw_reiserfs_dnode_fill has been called
* and the dnode is actually a directory.
*/
static fsw_status_t fsw_reiserfs_dir_lookup(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
struct fsw_string *lookup_name, struct fsw_reiserfs_dnode **child_dno_out)
{
fsw_status_t status;
struct fsw_reiserfs_item item;
fsw_u32 nr_item, i, name_offset, next_name_offset, name_len;
fsw_u32 child_dir_id;
struct reiserfs_de_head *dhead;
struct fsw_string entry_name;
// Preconditions: The caller has checked that dno is a directory node.
// BIG TODOS: Use the hash function to start with the item containing the entry.
// Use binary search within the item.
entry_name.type = FSW_STRING_TYPE_ISO88591;
// get the item for that position
status = fsw_reiserfs_item_search(vol, dno->dir_id, dno->g.dnode_id, FIRST_ITEM_OFFSET, &item);
if (status)
return status;
if (item.item_offset == 0) {
fsw_reiserfs_item_release(vol, &item);
return FSW_NOT_FOUND; // empty directory or something
}
for(;;) {
// search the directory item
dhead = (struct reiserfs_de_head *)item.item_data;
nr_item = item.ih.u.ih_entry_count;
next_name_offset = item.ih.ih_item_len;
for (i = 0; i < nr_item; i++, dhead++, next_name_offset = name_offset) {
// get the name
name_offset = dhead->deh_location;
name_len = next_name_offset - name_offset;
while (name_len > 0 && item.item_data[name_offset + name_len - 1] == 0)
name_len--;
entry_name.len = entry_name.size = name_len;
entry_name.data = item.item_data + name_offset;
// compare name
if (fsw_streq(lookup_name, &entry_name)) {
// found the entry we're looking for!
// setup a dnode for the child item
status = fsw_dnode_create(dno, dhead->deh_objectid, FSW_DNODE_TYPE_UNKNOWN, &entry_name, child_dno_out);
child_dir_id = dhead->deh_dir_id;
fsw_reiserfs_item_release(vol, &item);
if (status)
return status;
(*child_dno_out)->dir_id = child_dir_id;
return FSW_SUCCESS;
}
}
// We didn't find the next directory entry in this item. Look for the next
// item of the directory.
status = fsw_reiserfs_item_next(vol, &item);
if (status)
return status;
}
}
/**
* Get the next directory entry when reading a directory. This function is called during
* directory iteration to retrieve the next directory entry. A dnode is constructed for
* the entry and returned. The core makes sure that fsw_reiserfs_dnode_fill has been called
* and the dnode is actually a directory. The shandle provided by the caller is used to
* record the position in the directory between calls.
*/
static fsw_status_t fsw_reiserfs_dir_read(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
struct fsw_shandle *shand, struct fsw_reiserfs_dnode **child_dno_out)
{
fsw_status_t status;
struct fsw_reiserfs_item item;
fsw_u32 nr_item, i, name_offset, next_name_offset, name_len;
fsw_u32 child_dir_id;
struct reiserfs_de_head *dhead;
struct fsw_string entry_name;
// Preconditions: The caller has checked that dno is a directory node. The caller
// has opened a storage handle to the directory's storage and keeps it around between
// calls.
// BIG TODOS: Use binary search within the item.
// adjust pointer to first entry if necessary
if (shand->pos == 0)
shand->pos = FIRST_ITEM_OFFSET;
// get the item for that position
status = fsw_reiserfs_item_search(vol, dno->dir_id, dno->g.dnode_id, shand->pos, &item);
if (status)
return status;
if (item.item_offset == 0) {
fsw_reiserfs_item_release(vol, &item);
return FSW_NOT_FOUND; // empty directory or something
}
for(;;) {
// search the directory item
dhead = (struct reiserfs_de_head *)item.item_data;
nr_item = item.ih.u.ih_entry_count;
for (i = 0; i < nr_item; i++, dhead++) {
if (dhead->deh_offset < shand->pos)
continue; // not yet past the last entry returned
if (dhead->deh_offset == DOT_OFFSET || dhead->deh_offset == DOT_DOT_OFFSET)
continue; // never report . or ..
// get the name
name_offset = dhead->deh_location;
if (i == 0)
next_name_offset = item.ih.ih_item_len;
else
next_name_offset = dhead[-1].deh_location;
name_len = next_name_offset - name_offset;
while (name_len > 0 && item.item_data[name_offset + name_len - 1] == 0)
name_len--;
entry_name.type = FSW_STRING_TYPE_ISO88591;
entry_name.len = entry_name.size = name_len;
entry_name.data = item.item_data + name_offset;
if (fsw_streq_cstr(&entry_name, ".reiserfs_priv"))
continue; // never report this special file
// found the next entry!
shand->pos = dhead->deh_offset + 1;
// setup a dnode for the child item
status = fsw_dnode_create(dno, dhead->deh_objectid, FSW_DNODE_TYPE_UNKNOWN, &entry_name, child_dno_out);
child_dir_id = dhead->deh_dir_id;
fsw_reiserfs_item_release(vol, &item);
if (status)
return status;
(*child_dno_out)->dir_id = child_dir_id;
return FSW_SUCCESS;
}
// We didn't find the next directory entry in this item. Look for the next
// item of the directory.
status = fsw_reiserfs_item_next(vol, &item);
if (status)
return status;
}
}
/**
* Get the target path of a symbolic link. This function is called when a symbolic
* link needs to be resolved. The core makes sure that the fsw_reiserfs_dnode_fill has been
* called on the dnode and that it really is a symlink.
*/
static fsw_status_t fsw_reiserfs_readlink(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
struct fsw_string *link_target)
{
return fsw_dnode_readlink_data(dno, link_target);
}
/**
* Compare an on-disk tree key against the search key.
*/
static int fsw_reiserfs_compare_key(struct reiserfs_key *key,
fsw_u32 dir_id, fsw_u32 objectid, fsw_u64 offset)
{
fsw_u32 key_type;
fsw_u64 key_offset;
if (key->k_dir_id > dir_id)
return FIRST_GREATER;
if (key->k_dir_id < dir_id)
return SECOND_GREATER;
if (key->k_objectid > objectid)
return FIRST_GREATER;
if (key->k_objectid < objectid)
return SECOND_GREATER;
// determine format of the on-disk key
key_type = (fsw_u32)FSW_U64_SHR(key->u.k_offset_v2.v, 60);
if (key_type != TYPE_DIRECT && key_type != TYPE_INDIRECT && key_type != TYPE_DIRENTRY) {
// detected 3.5 format (_v1)
key_offset = key->u.k_offset_v1.k_offset;
} else {
// detected 3.6 format (_v2)
key_offset = key->u.k_offset_v2.v & (~0ULL >> 4);
}
if (key_offset > offset)
return FIRST_GREATER;
if (key_offset < offset)
return SECOND_GREATER;
return KEYS_IDENTICAL;
}
/**
* Find an item by key in the reiserfs tree.
*/
static fsw_status_t fsw_reiserfs_item_search(struct fsw_reiserfs_volume *vol,
fsw_u32 dir_id, fsw_u32 objectid, fsw_u64 offset,
struct fsw_reiserfs_item *item)
{
fsw_status_t status;
int comp_result;
fsw_u32 tree_bno, next_tree_bno, tree_level, nr_item, i;
fsw_u8 *buffer;
struct block_head *bhead;
struct reiserfs_key *key;
struct item_head *ihead;
FSW_MSG_DEBUG((FSW_MSGSTR("fsw_reiserfs_item_search: searching %d/%d/%lld\n"), dir_id, objectid, offset));
// BIG TODOS: Use binary search within the item.
// Remember tree path for "get next item" function.
item->valid = 0;
item->block_bno = 0;
// walk the tree
tree_bno = vol->sb->s_v1.s_root_block;
for (tree_level = vol->sb->s_v1.s_tree_height - 1; ; tree_level--) {
// get the current tree block into memory
status = fsw_block_get(vol, tree_bno, tree_level, (void **)&buffer);
if (status)
return status;
bhead = (struct block_head *)buffer;
if (bhead->blk_level != tree_level) {
FSW_MSG_ASSERT((FSW_MSGSTR("fsw_reiserfs_item_search: tree block %d has not expected level %d\n"), tree_bno, tree_level));
fsw_block_release(vol, tree_bno, buffer);
return FSW_VOLUME_CORRUPTED;
}
nr_item = bhead->blk_nr_item;
FSW_MSG_DEBUGV((FSW_MSGSTR("fsw_reiserfs_item_search: visiting block %d level %d items %d\n"), tree_bno, tree_level, nr_item));
item->path_bno[tree_level] = tree_bno;
// check if we have reached a leaf block
if (tree_level == DISK_LEAF_NODE_LEVEL)
break;
// search internal node block, look for the path to follow
key = (struct reiserfs_key *)(buffer + BLKH_SIZE);
for (i = 0; i < nr_item; i++, key++) {
if (fsw_reiserfs_compare_key(key, dir_id, objectid, offset) == FIRST_GREATER)
break;
}
item->path_index[tree_level] = i;
next_tree_bno = ((struct disk_child *)(buffer + BLKH_SIZE + nr_item * KEY_SIZE))[i].dc_block_number;
fsw_block_release(vol, tree_bno, buffer);
tree_bno = next_tree_bno;
}
// search leaf node block, look for our data
ihead = (struct item_head *)(buffer + BLKH_SIZE);
for (i = 0; i < nr_item; i++, ihead++) {
comp_result = fsw_reiserfs_compare_key(&ihead->ih_key, dir_id, objectid, offset);
if (comp_result == KEYS_IDENTICAL)
break;
if (comp_result == FIRST_GREATER) {
// Current key is greater than the search key. Use the last key before this
// one as the preliminary result.
if (i == 0) {
fsw_block_release(vol, tree_bno, buffer);
return FSW_NOT_FOUND;
}
i--, ihead--;
break;
}
}
if (i >= nr_item) {
// Go back to the last key, it was smaller than the search key.
// NOTE: The first key of the next leaf block is guaranteed to be greater than
// our search key.
i--, ihead--;
}
item->path_index[tree_level] = i;
// Since we may have a key that is smaller than the search key, verify that
// it is for the same object.
if (ihead->ih_key.k_dir_id != dir_id || ihead->ih_key.k_objectid != objectid) {
fsw_block_release(vol, tree_bno, buffer);
return FSW_NOT_FOUND; // Found no key for this object at all
}
// return results
fsw_memcpy(&item->ih, ihead, sizeof(struct item_head));
item->item_type = (fsw_u32)FSW_U64_SHR(ihead->ih_key.u.k_offset_v2.v, 60);
if (item->item_type != TYPE_DIRECT &&
item->item_type != TYPE_INDIRECT &&
item->item_type != TYPE_DIRENTRY) {
// 3.5 format (_v1)
item->item_type = ihead->ih_key.u.k_offset_v1.k_uniqueness;
item->item_offset = ihead->ih_key.u.k_offset_v1.k_offset;
} else {
// 3.6 format (_v2)
item->item_offset = ihead->ih_key.u.k_offset_v2.v & (~0ULL >> 4);
}
item->item_data = buffer + ihead->ih_item_location;
item->valid = 1;
// add information for block release
item->block_bno = tree_bno;
item->block_buffer = buffer;
FSW_MSG_DEBUG((FSW_MSGSTR("fsw_reiserfs_item_search: found %d/%d/%lld (%d)\n"),
ihead->ih_key.k_dir_id, ihead->ih_key.k_objectid, item->item_offset, item->item_type));
return FSW_SUCCESS;
}
/**
* Find the next item in the reiserfs tree for an already-found item.
*/
static fsw_status_t fsw_reiserfs_item_next(struct fsw_reiserfs_volume *vol,
struct fsw_reiserfs_item *item)
{
fsw_status_t status;
fsw_u32 dir_id, objectid;
// fsw_u64 offset;
fsw_u32 tree_bno, next_tree_bno, tree_level, nr_item, nr_ptr_item;
fsw_u8 *buffer;
struct block_head *bhead;
struct item_head *ihead;
if (!item->valid)
return FSW_NOT_FOUND;
fsw_reiserfs_item_release(vol, item); // TODO: maybe delay this and/or use the cached block!
dir_id = item->ih.ih_key.k_dir_id;
objectid = item->ih.ih_key.k_objectid;
// offset = item->item_offset;
FSW_MSG_DEBUG((FSW_MSGSTR("fsw_reiserfs_item_next: next for %d/%d/%lld\n"), dir_id, objectid, item->item_offset));
// find a node that has more items, moving up until we find one
for (tree_level = DISK_LEAF_NODE_LEVEL; tree_level < vol->sb->s_v1.s_tree_height; tree_level++) {
// get the current tree block into memory
tree_bno = item->path_bno[tree_level];
status = fsw_block_get(vol, tree_bno, tree_level, (void **)&buffer);
if (status)
return status;
bhead = (struct block_head *)buffer;
if (bhead->blk_level != tree_level) {
FSW_MSG_ASSERT((FSW_MSGSTR("fsw_reiserfs_item_next: tree block %d has not expected level %d\n"), tree_bno, tree_level));
fsw_block_release(vol, tree_bno, buffer);
return FSW_VOLUME_CORRUPTED;
}
nr_item = bhead->blk_nr_item;
FSW_MSG_DEBUGV((FSW_MSGSTR("fsw_reiserfs_item_next: visiting block %d level %d items %d\n"), tree_bno, tree_level, nr_item));
nr_ptr_item = nr_item + ((tree_level > DISK_LEAF_NODE_LEVEL) ? 1 : 0); // internal nodes have (nr_item) keys and (nr_item+1) pointers
item->path_index[tree_level]++;
if (item->path_index[tree_level] >= nr_ptr_item) {
item->path_index[tree_level] = 0;
fsw_block_release(vol, tree_bno, buffer);
continue; // this node doesn't have any more items, move up one level
}
// we have a new path to follow, move down to the leaf node again
while (tree_level > DISK_LEAF_NODE_LEVEL) {
// get next pointer from current block
next_tree_bno = ((struct disk_child *)(buffer + BLKH_SIZE + nr_item * KEY_SIZE))[item->path_index[tree_level]].dc_block_number;
fsw_block_release(vol, tree_bno, buffer);
tree_bno = next_tree_bno;
tree_level--;
// get the current tree block into memory
status = fsw_block_get(vol, tree_bno, tree_level, (void **)&buffer);
if (status)
return status;
bhead = (struct block_head *)buffer;
if (bhead->blk_level != tree_level) {
FSW_MSG_ASSERT((FSW_MSGSTR("fsw_reiserfs_item_next: tree block %d has not expected level %d\n"), tree_bno, tree_level));
fsw_block_release(vol, tree_bno, buffer);
return FSW_VOLUME_CORRUPTED;
}
nr_item = bhead->blk_nr_item;
FSW_MSG_DEBUGV((FSW_MSGSTR("fsw_reiserfs_item_next: visiting block %d level %d items %d\n"), tree_bno, tree_level, nr_item));
item->path_bno[tree_level] = tree_bno;
}
// get the item from the leaf node
ihead = ((struct item_head *)(buffer + BLKH_SIZE)) + item->path_index[tree_level];
// We now have the item that follows the previous one in the tree. Check that it
// belongs to the same object.
if (ihead->ih_key.k_dir_id != dir_id || ihead->ih_key.k_objectid != objectid) {
fsw_block_release(vol, tree_bno, buffer);
return FSW_NOT_FOUND; // Found no next key for this object
}
// return results
fsw_memcpy(&item->ih, ihead, sizeof(struct item_head));
item->item_type = (fsw_u32)FSW_U64_SHR(ihead->ih_key.u.k_offset_v2.v, 60);
if (item->item_type != TYPE_DIRECT &&
item->item_type != TYPE_INDIRECT &&
item->item_type != TYPE_DIRENTRY) {
// 3.5 format (_v1)
item->item_type = ihead->ih_key.u.k_offset_v1.k_uniqueness;
item->item_offset = ihead->ih_key.u.k_offset_v1.k_offset;
} else {
// 3.6 format (_v2)
item->item_offset = ihead->ih_key.u.k_offset_v2.v & (~0ULL >> 4);
}
item->item_data = buffer + ihead->ih_item_location;
item->valid = 1;
// add information for block release
item->block_bno = tree_bno;
item->block_buffer = buffer;
FSW_MSG_DEBUG((FSW_MSGSTR("fsw_reiserfs_item_next: found %d/%d/%lld (%d)\n"),
ihead->ih_key.k_dir_id, ihead->ih_key.k_objectid, item->item_offset, item->item_type));
return FSW_SUCCESS;
}
// we went to the highest level node and there still were no more items...
return FSW_NOT_FOUND;
}
/**
* Release the disk block still referenced by an item search result.
*/
static void fsw_reiserfs_item_release(struct fsw_reiserfs_volume *vol,
struct fsw_reiserfs_item *item)
{
if (!item->valid)
return;
if (item->block_bno > 0) {
fsw_block_release(vol, item->block_bno, item->block_buffer);
item->block_bno = 0;
}
}
// EOF