#-----------------------------------------------------------------------
# Copyright (C) 2000, 2001 by Autonomous Zone Industries
# Copyright (C) 2002 Gregory P. Smith
# License: This is free software. You may use this software for any
# purpose including modification/redistribution, so long as
# this header remains intact and that you do not claim any
# rights of ownership or authorship of this software. This
# software has been tested, but no warranty is expressed or
# -- Gregory P. Smith <greg@electricrain.com>
# This provides a simple database table interface built on top of
# the Python BerkeleyDB 3 interface.
_cvsid
= '$Id: dbtables.py,v 1.11 2004/08/08 00:54:20 tim_one Exp $'
from types
import ListType
, StringType
# For Pythons w/distutils pybsddb
class TableDBError(StandardError):
class TableAlreadyExists(TableDBError
):
"""This condition matches everything"""
"""Acts as an exact match condition function"""
def __init__(self
, strtomatch
):
self
.strtomatch
= strtomatch
return s
== self
.strtomatch
"""Acts as a condition function for matching a string prefix"""
def __init__(self
, prefix
):
return s
[:len(self
.prefix
)] == self
.prefix
"""Acts as a condition function for matching a string postfix"""
def __init__(self
, postfix
):
return s
[-len(self
.postfix
):] == self
.postfix
Acts as a function that will match using an SQL 'LIKE' style
string. Case insensitive and % signs are wild cards.
This isn't perfect but it should work for the simple common cases.
def __init__(self
, likestr
, re_flags
=re
.IGNORECASE
):
# escape python re characters
chars_to_escape
= '.*+()[]?'
for char
in chars_to_escape
:
likestr
= likestr
.replace(char
, '\\'+char
)
# convert %s to wildcards
self
.likestr
= likestr
.replace('%', '.*')
self
.re
= re
.compile('^'+self
.likestr
+'$', re_flags
)
# keys used to store database metadata
_table_names_key
= '__TABLE_NAMES__' # list of the tables in this db
_columns
= '._COLUMNS__' # table_name+this key contains a list of columns
# these keys are found within table sub databases
_data
= '._DATA_.' # this+column+this+rowid key contains table data
_rowid
= '._ROWID_.' # this+rowid+this key contains a unique entry for each
# row in the table. (no data is stored)
_rowid_str_len
= 8 # length in bytes of the unique rowid strings
def _data_key(table
, col
, rowid
):
return table
+ _data
+ col
+ _data
+ rowid
def _search_col_data_key(table
, col
):
return table
+ _data
+ col
+ _data
def _search_all_data_key(table
):
def _rowid_key(table
, rowid
):
return table
+ _rowid
+ rowid
+ _rowid
def _search_rowid_key(table
):
def contains_metastrings(s
) :
"""Verify that the given string does not contain any
metadata strings that might interfere with dbtables database operation.
if (s
.find(_table_names_key
) >= 0 or
def __init__(self
, filename
, dbhome
, create
=0, truncate
=0, mode
=0600,
"""bsdTableDB.open(filename, dbhome, create=0, truncate=0, mode=0600)
Open database name in the dbhome BerkeleyDB directory.
Use keyword arguments when calling this constructor.
flagsforenv
= (DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_LOG |
# DB_AUTO_COMMIT isn't a valid flag for env.open()
dbflags |
= DB_AUTO_COMMIT
flagsforenv
= flagsforenv | DB_RECOVER
# enable auto deadlock avoidance
self
.env
.set_lk_detect(DB_LOCK_DEFAULT
)
self
.env
.open(dbhome
, myflags | flagsforenv
)
# this code relies on DBCursor.set* methods to raise exceptions
# rather than returning None
self
.db
.set_get_returns_none(1)
# allow duplicate entries [warning: be careful w/ metadata]
self
.db
.set_flags(DB_DUP
)
self
.db
.open(filename
, DB_BTREE
, dbflags | myflags
, mode
)
self
.dbfilename
= filename
# Initialize the table names list if this is a new database
txn
= self
.env
.txn_begin()
if not self
.db
.has_key(_table_names_key
, txn
):
self
.db
.put(_table_names_key
, pickle
.dumps([], 1), txn
=txn
)
# TODO verify more of the database's metadata?
def checkpoint(self
, mins
=0):
self
.env
.txn_checkpoint(mins
)
except DBIncompleteError
:
except DBIncompleteError
:
"""Print the database to stdout for debugging"""
print "******** Printing raw database for debugging ********"
def CreateTable(self
, table
, columns
):
"""CreateTable(table, columns) - Create a new table in the database
raises TableDBError if it already exists or for other DB errors.
assert isinstance(columns
, ListType
)
# checking sanity of the table and column names here on
# table creation will prevent problems elsewhere.
if contains_metastrings(table
):
"bad table name: contains reserved metastrings")
if contains_metastrings(column
):
"bad column name: contains reserved metastrings")
columnlist_key
= _columns_key(table
)
if self
.db
.has_key(columnlist_key
):
raise TableAlreadyExists
, "table already exists"
txn
= self
.env
.txn_begin()
# store the table's column info
self
.db
.put(columnlist_key
, pickle
.dumps(columns
, 1), txn
=txn
)
# add the table name to the tablelist
tablelist
= pickle
.loads(self
.db
.get(_table_names_key
, txn
=txn
,
# delete 1st, in case we opened with DB_DUP
self
.db
.delete(_table_names_key
, txn
)
self
.db
.put(_table_names_key
, pickle
.dumps(tablelist
, 1), txn
=txn
)
raise TableDBError
, dberror
[1]
def ListTableColumns(self
, table
):
"""Return a list of columns in the given table.
[] if the table doesn't exist.
assert isinstance(table
, StringType
)
if contains_metastrings(table
):
raise ValueError, "bad table name: contains reserved metastrings"
columnlist_key
= _columns_key(table
)
if not self
.db
.has_key(columnlist_key
):
pickledcolumnlist
= self
.db
.get(columnlist_key
)
return pickle
.loads(pickledcolumnlist
)
"""Return a list of tables in this database."""
pickledtablelist
= self
.db
.get(_table_names_key
)
return pickle
.loads(pickledtablelist
)
def CreateOrExtendTable(self
, table
, columns
):
"""CreateOrExtendTable(table, columns)
- Create a new table in the database.
If a table of this name already exists, extend it to have any
additional columns present in the given list as well as
all of its current columns.
assert isinstance(columns
, ListType
)
self
.CreateTable(table
, columns
)
except TableAlreadyExists
:
# the table already existed, add any new columns
columnlist_key
= _columns_key(table
)
txn
= self
.env
.txn_begin()
# load the current column list
oldcolumnlist
= pickle
.loads(
self
.db
.get(columnlist_key
, txn
=txn
, flags
=DB_RMW
))
# create a hash table for fast lookups of column names in the
# create a new column list containing both the old and new
newcolumnlist
= copy
.copy(oldcolumnlist
)
if not oldcolumnhash
.has_key(c
):
# store the table's new extended column list
if newcolumnlist
!= oldcolumnlist
:
# delete the old one first since we opened with DB_DUP
self
.db
.delete(columnlist_key
, txn
)
self
.db
.put(columnlist_key
,
pickle
.dumps(newcolumnlist
, 1),
self
.__load
_column
_info
(table
)
raise TableDBError
, dberror
[1]
def __load_column_info(self
, table
) :
"""initialize the self.__tablecolumns dict"""
tcolpickles
= self
.db
.get(_columns_key(table
))
raise TableDBError
, "unknown table: %r" % (table
,)
raise TableDBError
, "unknown table: %r" % (table
,)
self
.__tablecolumns
[table
] = pickle
.loads(tcolpickles
)
def __new_rowid(self
, table
, txn
) :
"""Create a new unique row identifier"""
# Generate a random 64-bit row ID string
# (note: this code has <64 bits of randomness
# but it's plenty for our database id needs!)
p
.pack_int(int(random
.random()*2147483647))
p
.pack_int(int(random
.random()*2147483647))
# Guarantee uniqueness by adding this key to the database
self
.db
.put(_rowid_key(table
, newid
), None, txn
=txn
,
def Insert(self
, table
, rowdict
) :
"""Insert(table, datadict) - Insert a new row into the table
using the keys+values from rowdict as the column values.
if not self
.db
.has_key(_columns_key(table
)):
raise TableDBError
, "unknown table"
# check the validity of each column name
if not self
.__tablecolumns
.has_key(table
):
self
.__load
_column
_info
(table
)
for column
in rowdict
.keys() :
if not self
.__tablecolumns
[table
].count(column
):
raise TableDBError
, "unknown column: %r" % (column
,)
# get a unique row identifier for this row
txn
= self
.env
.txn_begin()
rowid
= self
.__new
_rowid
(table
, txn
=txn
)
# insert the row values into the table database
for column
, dataitem
in rowdict
.items():
self
.db
.put(_data_key(table
, column
, rowid
), dataitem
, txn
=txn
)
# WIBNI we could just abort the txn and re-raise the exception?
# But no, because TableDBError is not related to DBError via
# inheritance, so it would be backwards incompatible. Do the next
self
.db
.delete(_rowid_key(table
, rowid
))
raise TableDBError
, dberror
[1], info
[2]
def Modify(self
, table
, conditions
={}, mappings
={}):
"""Modify(table, conditions) - Modify in rows matching 'conditions'
using mapping functions in 'mappings'
* conditions is a dictionary keyed on column names
containing condition functions expecting the data string as an
argument and returning a boolean.
* mappings is a dictionary keyed on column names containint condition
functions expecting the data string as an argument and returning the
new string for that column.
matching_rowids
= self
.__Select
(table
, [], conditions
)
# modify only requested columns
columns
= mappings
.keys()
for rowid
in matching_rowids
.keys():
txn
= self
.env
.txn_begin()
# modify the requested column
_data_key(table
, column
, rowid
),
_data_key(table
, column
, rowid
),
# XXXXXXX row key somehow didn't exist, assume no
dataitem
= mappings
[column
](dataitem
)
_data_key(table
, column
, rowid
),
raise TableDBError
, dberror
[1]
def Delete(self
, table
, conditions
={}):
"""Delete(table, conditions) - Delete items matching the given
conditions from the table.
* conditions is a dictionary keyed on column names
containing condition functions expecting the data string as an
argument and returning a boolean.
matching_rowids
= self
.__Select
(table
, [], conditions
)
# delete row data from all columns
columns
= self
.__tablecolumns
[table
]
for rowid
in matching_rowids
.keys():
txn
= self
.env
.txn_begin()
self
.db
.delete(_data_key(table
, column
, rowid
),
# XXXXXXX column may not exist, assume no error
self
.db
.delete(_rowid_key(table
, rowid
), txn
)
# XXXXXXX row key somehow didn't exist, assume no error
raise TableDBError
, dberror
[1]
def Select(self
, table
, columns
, conditions
={}):
"""Select(table, conditions) - retrieve specific row data
Returns a list of row column->value mapping dictionaries.
* columns is a list of which column data to return. If
columns is None, all columns will be returned.
* conditions is a dictionary keyed on column names
containing callable conditions expecting the data string as an
argument and returning a boolean.
if not self
.__tablecolumns
.has_key(table
):
self
.__load
_column
_info
(table
)
columns
= self
.__tablecolumns
[table
]
matching_rowids
= self
.__Select
(table
, columns
, conditions
)
raise TableDBError
, dberror
[1]
# return the matches as a list of dictionaries
return matching_rowids
.values()
def __Select(self
, table
, columns
, conditions
):
"""__Select() - Used to implement Select and Delete (above)
Returns a dictionary keyed on rowids containing dicts
holding the row data for columns listed in the columns param
that match the given conditions.
* conditions is a dictionary keyed on column names
containing callable conditions expecting the data string as an
argument and returning a boolean.
# check the validity of each column name
if not self
.__tablecolumns
.has_key(table
):
self
.__load
_column
_info
(table
)
columns
= self
.tablecolumns
[table
]
for column
in (columns
+ conditions
.keys()):
if not self
.__tablecolumns
[table
].count(column
):
raise TableDBError
, "unknown column: %r" % (column
,)
# keyed on rows that match so far, containings dicts keyed on
# column names containing the data for that row and column.
# keys are rowids that do not match
# attempt to sort the conditions in such a way as to minimize full
def cmp_conditions(atuple
, btuple
):
if isinstance(a
, PrefixCond
) and isinstance(b
, PrefixCond
):
return cmp(len(b
.prefix
), len(a
.prefix
))
if isinstance(a
, LikeCond
) and isinstance(b
, LikeCond
):
return cmp(len(b
.likestr
), len(a
.likestr
))
if isinstance(a
, ExactCond
):
if isinstance(b
, ExactCond
):
if isinstance(a
, PrefixCond
):
if isinstance(b
, PrefixCond
):
# leave all unknown condition callables alone as equals
conditionlist
= conditions
.items()
conditionlist
.sort(cmp_conditions
)
# Apply conditions to column data to find what we want
for column
, condition
in conditionlist
:
column_num
= column_num
+ 1
searchkey
= _search_col_data_key(table
, column
)
# speedup: don't linear search columns within loop
savethiscolumndata
= 1 # save the data for return
savethiscolumndata
= 0 # data only used for selection
key
, data
= cur
.set_range(searchkey
)
while key
[:len(searchkey
)] == searchkey
:
# extract the rowid from the key
rowid
= key
[-_rowid_str_len
:]
if not rejected_rowids
.has_key(rowid
):
# if no condition was specified or the condition
# succeeds, add row to our match list.
if not condition
or condition(data
):
if not matching_rowids
.has_key(rowid
):
matching_rowids
[rowid
] = {}
matching_rowids
[rowid
][column
] = data
if matching_rowids
.has_key(rowid
):
del matching_rowids
[rowid
]
rejected_rowids
[rowid
] = rowid
if dberror
[0] != DB_NOTFOUND
:
# we're done selecting rows, garbage collect the reject list
# extract any remaining desired column data from the
# database for the matching rows.
for rowid
, rowdata
in matching_rowids
.items():
if rowdata
.has_key(column
):
rowdata
[column
] = self
.db
.get(
_data_key(table
, column
, rowid
))
if dberror
[0] != DB_NOTFOUND
:
"""Remove an entire table from the database"""
txn
= self
.env
.txn_begin()
self
.db
.delete(_columns_key(table
), txn
)
cur
= self
.db
.cursor(txn
)
# delete all keys containing this tables column and row info
table_key
= _search_all_data_key(table
)
key
, data
= cur
.set_range(table_key
)
# only delete items in this table
if key
[:len(table_key
)] != table_key
:
# delete all rowids used by this table
table_key
= _search_rowid_key(table
)
key
, data
= cur
.set_range(table_key
)
# only delete items in this table
if key
[:len(table_key
)] != table_key
:
# delete the tablename from the table name list
tablelist
= pickle
.loads(
self
.db
.get(_table_names_key
, txn
=txn
, flags
=DB_RMW
))
# hmm, it wasn't there, oh well, that's what we want.
# delete 1st, incase we opened with DB_DUP
self
.db
.delete(_table_names_key
, txn
)
self
.db
.put(_table_names_key
, pickle
.dumps(tablelist
, 1), txn
=txn
)
if self
.__tablecolumns
.has_key(table
):
del self
.__tablecolumns
[table
]
raise TableDBError
, dberror
[1]