pjs/grendel/storage/intertwingle/SimpleDB.java

501 строка
15 KiB
Java

/* -*- Mode: java; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The contents of this file are subject to the Mozilla Public License
* Version 1.0 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is the Grendel mail/news client.
*
* The Initial Developer of the Original Code is Netscape Communications
* Corporation. Portions created by Netscape are Copyright (C) 1997
* Netscape Communications Corporation. All Rights Reserved.
*
* Created: Terry Weissman <terry@netscape.com>, 30 Sep 1997.
*/
package grendel.storage.intertwingle;
import calypso.util.Assert;
import calypso.util.NullEnumeration;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.io.PrintStream;
import java.util.Enumeration;
import java.util.NoSuchElementException;
/** Simplistic implementation of a RDF-ish database. This is basically a
massive hashtable hack. Almost certainly we shouldn't ship this, but it
ought to be good enough to play with for a while.
<p>
Here's how it works:
<p>
There are five files. One is the "store", and it stores the data. All
data is accessed via a hash table, so each entry in the store contains
one or two pointers to other entries with the same hash. ("two" is in
the case where there are two hash tables into the same data.)
<p>
The other four files are head pointers into the hashtable, for various
hashes.
<p>
Random strings are stored in the store. They get hashed using
String.hashCode(), the hash value is masked off to HASHMASK, and the
first pointer to the hashtable appear in the "strhead" file. Each
entry consists of a pointer to the next string with the same hashcode, and
then a refcount, and then the UTF version of the string itself.
<p>
Assertions are stored in the store. Each assertion is three strings:
name, slot, and value. The strings are first put into the store, and
then the assertion is recorded by writing down the location of the
strings. Each assertion is hashed twice: once by combining the name and
slot, and once by combining the value and slot. So, each assertion records
the next value for both hash tables, followed by the three string
locations. So, each assertion costs 20 bytes on disk, not counting the
string storage. The "head[]" files are the first pointers for each
hashtable.
<p>
Finally, a free list of unused chunks in the store are kept. The
"freehead" file points to the first free entry of a given size; each
free entry points to the next free entry of the same size.
*/
public final class SimpleDB extends BaseDB {
private File storename;
private RandomAccessFile store;
private RandomAccessFile head[] = new RandomAccessFile[2];
private RandomAccessFile strhead;
private RandomAccessFile freehead;
private static final int MINALLOC = 12; // Try not to leave free chunks
// smaller than this.
private static final int MAXALLOC = 1024;// Don't bother recording free
// chunks bigger than this.
private static final int HASHMASK = 0xfff; // Only use these bits in our
// hashtables.
public SimpleDB(File f) throws IOException {
storename = f;
String name = f.getPath();
store = new RandomAccessFile(f, "rw");
head[0] = new RandomAccessFile(name + ".key0", "rw");
head[1] = new RandomAccessFile(name + ".key1", "rw");
freehead = new RandomAccessFile(name + ".free", "rw");
strhead = new RandomAccessFile(name + ".strings", "rw");
if (store.length() == 0) {
store.write(("# DO NOT EDIT THIS FILE; it contains binary data.\n").getBytes());
}
}
private int alloc(int length) throws IOException {
int biggest = (int) (freehead.length() / 4);
if (length > biggest) {
return (int) store.length();
}
freehead.seek(length * 4);
int t = length;
while (t <= biggest) {
int result = freehead.readInt();
if (result > 0 && (t == length || t > length + MINALLOC)) {
store.seek(result);
freehead.seek(t * 4);
freehead.writeInt(store.readInt());
free(result + length, t - length);
return result;
}
}
return (int) store.length();
}
private void free(int ptr, int length) throws IOException {
if (length == 0) return;
if (length > MAXALLOC) {
while (length > MAXALLOC * 2) {
free(ptr, MAXALLOC);
ptr += MAXALLOC;
length -= MAXALLOC;
}
free(ptr, length / 2);
ptr += length / 2;
length -= (length / 2);
free(ptr, length);
return;
}
int biggest = (int) (freehead.length() / 4);
if (length >= biggest) {
freehead.seek(freehead.length());
while (biggest <= length) {
freehead.writeInt(0);
biggest++;
}
}
freehead.seek(length * 4);
store.seek(ptr);
store.writeInt(freehead.readInt());
freehead.seek(length * 4);
freehead.writeInt(ptr);
}
private int LookupString(String str, boolean create) throws IOException {
int hash = (str.hashCode() & HASHMASK) * 4;
//System.out.println("LookupString: Looking up '" + str + "' [hashcode is " + hash + "]");
int result;
int firstval = 0;
if (hash < strhead.length()) {
strhead.seek(hash);
firstval = strhead.readInt();
result = firstval;
while (result > 0) {
//System.out.println("LookupString: Seeking to " + result);
store.seek(result);
int next = store.readInt();
int refcount = store.readInt();
String t = store.readUTF();
if (str.equals(t)) {
return result;
}
//System.out.println("LookupString: Skipping past " + t);
result = next;
}
}
if (!create) return 0;
if (hash > strhead.length()) {
strhead.seek(strhead.length());
for (int i=(int) strhead.length() ; i<hash ; i+=4) {
strhead.writeInt(0);
}
}
// Figure out the UTF length. Code stolen from java.io.RandomAccessFile.
// Ick.
int strlen = str.length();
int utflen = 0;
for (int i = 0 ; i < strlen ; i++) {
int c = str.charAt(i);
if ((c >= 0x0001) && (c <= 0x007F)) {
utflen++;
} else if (c > 0x07FF) {
utflen += 3;
} else {
utflen += 2;
}
}
int totallength = 4 + // Pointer to next entry with same hash
2 + // Space to write down length of string
utflen; // Length of string itself.
result = alloc(totallength);
store.seek(result);
store.writeInt(firstval);
store.writeInt(0); // Initial value of refcount. The caller
// better update this soon!
store.writeUTF(str);
strhead.seek(hash);
strhead.writeInt(result);
//System.out.println("LookupString: Added entry at " + result);
return result;
}
private void addStringRef(int loc) throws IOException {
store.seek(loc + 4);
int refcount = store.readInt();
store.seek(loc + 4);
store.writeInt(refcount + 1);
}
private void removeStringRef(int loc) throws IOException {
store.seek(loc + 4);
int refcount = store.readInt();
if (refcount <= 1) {
int length = store.readShort() + 8;
free(loc, length);
} else {
store.seek(loc + 4);
store.writeInt(refcount - 1);
}
}
public synchronized void assert(String name, String slot, String value)
throws IOException
{
int n = LookupString(name, true);
int s = LookupString(slot, true);
int v = LookupString(value, true);
int hash0 = ((name.hashCode() + slot.hashCode()) & HASHMASK) * 4;
int firstval0 = 0;
if (head[0].length() > hash0) {
head[0].seek(hash0);
firstval0 = head[0].readInt();
int ptr = firstval0;
while (ptr > 0) {
store.seek(ptr);
ptr = store.readInt();
store.readInt();
if (n == store.readInt()) {
if (s == store.readInt()) {
if (v == store.readInt()) {
// Gee, we already have this one. No-op.
return;
}
}
}
}
}
// OK, we know this isn't already in the database. Add it.
int hash1 = ((value.hashCode() + slot.hashCode()) & HASHMASK) * 4;
int firstval1 = 0;
if (head[1].length() > hash1) {
head[1].seek(hash1);
firstval1 = head[1].readInt();
}
addStringRef(n);
addStringRef(s);
addStringRef(v);
int result = alloc(5 * 4);
store.seek(result);
store.writeInt(firstval0);
store.writeInt(firstval1);
store.writeInt(n);
store.writeInt(s);
store.writeInt(v);
for (int i=0 ; i<2 ; i++) {
int hash = (i==0 ? hash0 : hash1);
if (hash > head[i].length()) {
head[i].seek(head[i].length());
for (int j=(int) head[i].length() ; j<hash ; j+=4) {
head[i].writeInt(0);
}
}
head[i].seek(hash);
head[i].writeInt(result);
}
//System.out.println("assert: Added '" + name + "','" + slot + "','" + value + "' at position " + result);
}
public synchronized void unassert(String name, String slot, String value)
throws IOException
{
int n = LookupString(name, false);
int s = LookupString(slot, false);
int v = LookupString(value, false);
if (n == 0 || s == 0 || v == 0) {
// Gee, one of these strings was never defined, so we never could
// have made this assertion in the first place.
return;
}
int hash0 = ((name.hashCode() + slot.hashCode()) & HASHMASK) * 4;
int hash1 = ((value.hashCode() + slot.hashCode()) & HASHMASK) * 4;
int location = 0;
for (int w=0 ; w<2 ; w++) {
int hash = ((w==0) ? hash0 : hash1);
if (head[w].length() <= hash) continue;
head[w].seek(hash);
int last = 0;
int ptr = head[w].readInt();
while (ptr > 0) {
store.seek(ptr);
int next0 = store.readInt();
int next1 = store.readInt();
int next = ((w==0) ? next0 : next1);
if (store.readInt() == n) {
if (store.readInt() == s) {
if (store.readInt() == v) {
// Found it! Remove it from the chain.
Assert.Assertion(location == 0 || location == ptr);
location = ptr;
if (last == 0) {
head[w].seek(hash);
head[w].writeInt(next);
} else {
store.seek(last + w*4);
store.writeInt(next);
}
next = 0; // To break out of loop.
}
}
}
last = ptr;
ptr = next;
}
}
if (location > 0) {
free(location, 5 * 4);
removeStringRef(n);
removeStringRef(s);
removeStringRef(v);
}
}
public synchronized String findFirst(String name, String slot, boolean reverse)
throws IOException
{
int w = reverse ? 1 : 0;
int n = LookupString(name, false);
int s = LookupString(slot, false);
//System.out.println("findFirst: Name: '" + name + "' (" + n + ")");
//System.out.println("findFirst: Slot: '" + slot + "' (" + s + ")");
if (n == 0 || s == 0) return null;
int hash = ((name.hashCode() + slot.hashCode()) & HASHMASK) * 4;
//System.out.println("findFirst: hash is " + hash);
if (head[w].length() <= hash) return null;
head[w].seek(hash);
int ptr = head[w].readInt();
while (ptr > 0) {
//System.out.println("findFirst: checking position " + ptr);
store.seek(ptr);
int next0 = store.readInt();
int next1 = store.readInt();
int n1 = store.readInt();
int s1 = store.readInt();
if (s1 == s) {
int v1 = store.readInt();
if ((w == 0 && n == n1) || (w == 1 && n == v1)) {
int result = (w == 0) ? v1 : n1;
store.seek(result + 8);
return store.readUTF();
}
}
ptr = (w == 0) ? next0 : next1;
}
return null;
}
public synchronized Enumeration findAll(String name, String slot, boolean reverse)
throws IOException
{
final int w = reverse ? 1 : 0;
final int n = LookupString(name, false);
if (n == 0) return null;
final int s = LookupString(slot, false);
if (s == 0) return null;
int hash = ((name.hashCode() + slot.hashCode()) & HASHMASK) * 4;
//System.out.println("findFirst: hash is " + hash);
if (head[w].length() <= hash) return null;
head[w].seek(hash);
final int ptr = head[w].readInt();
return new AllEnumerator(this, ptr, n, s, w);
}
class AllEnumerator implements Enumeration {
SimpleDB db;
int ptr;
int n;
int s;
int w;
String next;
AllEnumerator(SimpleDB d, int p, int nm, int sl, int ww) {
db = d;
ptr = p;
n = nm;
s = sl;
w = ww;
}
public boolean hasMoreElements() {
if (next != null) return true;
while (ptr > 0) {
synchronized(db) {
try {
db.store.seek(ptr);
int next0 = db.store.readInt();
int next1 = db.store.readInt();
int n1 = db.store.readInt();
int s1 = db.store.readInt();
ptr = ((w == 0) ? next0 : next1);
if (s1 == s) {
int v1 = db.store.readInt();
if ((w == 0 && n == n1) || (w == 1 && n == v1)) {
db.store.seek(((w == 0) ? v1 : n1) + 8);
next = db.store.readUTF();
return true;
}
}
} catch (IOException e) {
ptr = 0;
return false;
}
}
}
return false;
}
public Object nextElement() throws NoSuchElementException {
if (!hasMoreElements()) throw new NoSuchElementException();
String result = next;
next = null;
return result;
}
}
private void dumpString(int loc) throws IOException {
store.seek(loc + 4);
int refcount = store.readInt();
String str = store.readUTF();
System.out.print(str);
System.out.print("[" + refcount + "]");
}
public synchronized void dump(PrintStream out) throws IOException {
int length = (int) head[0].length();
head[0].seek(0);
int id[] = new int[3];
for (int hash=0 ; hash < length ; hash += 4) {
int position = head[0].readInt();
while (position > 0) {
store.seek(position);
position = store.readInt();
store.readInt();
for (int i=0 ; i<3 ; i++) {
id[i] = store.readInt();
}
for (int i=0 ; i<3 ; i++) {
dumpString(id[i]);
if (i < 2) System.out.print(",");
}
System.out.println("");
}
}
length = (int) freehead.length();
for (int hash=0 ; hash < length ; hash += 4) {
int position = freehead.readInt();
if (position > 0) {
System.out.print(" -- Free chunks of size " + (hash/4) + ":");
while (position > 0) {
System.out.print(" " + position);
store.seek(position);
position = store.readInt();
}
System.out.println("");
}
}
}
}