diff options
author | Brian Harring <ferringb@gentoo.org> | 2004-12-05 11:00:15 +0000 |
---|---|---|
committer | Brian Harring <ferringb@gentoo.org> | 2004-12-05 11:00:15 +0000 |
commit | 788dcec736da0a57a97e927889cc912734ff28f2 (patch) | |
tree | cfef5b536f4d57d0adf88df9c3a81138d34ef284 /pym | |
parent | Strip patch from SpanKY. cpv_exists() fix. (diff) | |
download | portage-cvs-788dcec736da0a57a97e927889cc912734ff28f2.tar.gz portage-cvs-788dcec736da0a57a97e927889cc912734ff28f2.tar.bz2 portage-cvs-788dcec736da0a57a97e927889cc912734ff28f2.zip |
cache refactoring code.
Diffstat (limited to 'pym')
-rw-r--r-- | pym/cache/__init__.py | 0 | ||||
-rw-r--r-- | pym/cache/anydbm.py | 45 | ||||
-rw-r--r-- | pym/cache/cache_errors.py | 34 | ||||
-rw-r--r-- | pym/cache/flat_list.py | 95 | ||||
-rw-r--r-- | pym/cache/fs_template.py | 52 | ||||
-rw-r--r-- | pym/cache/sql_template.py | 218 | ||||
-rw-r--r-- | pym/cache/sqlite.py | 62 | ||||
-rw-r--r-- | pym/cache/template.py | 84 |
8 files changed, 590 insertions, 0 deletions
diff --git a/pym/cache/__init__.py b/pym/cache/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/pym/cache/__init__.py diff --git a/pym/cache/anydbm.py b/pym/cache/anydbm.py new file mode 100644 index 0000000..9590f58 --- /dev/null +++ b/pym/cache/anydbm.py @@ -0,0 +1,45 @@ +anydbm_module = __import__("anydbm") +import cPickle, os +import fs_template +import cache_errors + + +class database(fs_template.FsBased): + def __init__(self, label, auxdbkeys, **config): + super(database,self).__init__(label, auxdbkeys, **config) + + default_db = config.get("dbtype","anydbm") + if not default_db.startswith("."): + default_db = '.' + default_db + self._db_path = os.path.join(self._base, fs_template.gen_label(self._base, self.label)+default_db) + + try: + self.__db = anydbm_module.open(self._db_path, "c", self._perms) + try: + self._ensure_access(self._db_path) + except (OSError, IOError), e: + raise cache_errors.InitializationError(self.__clas__, e) + + except anydbm_module.error, e: + # XXX handle this at some point + raise + + + def __getitem__(self, cpv): + return cPickle.loads(self.__db[cpv]) + + + def _setitem(self, cpv, values): + self.__db[cpv] = cPickle.dumps(values,cPickle.HIGHEST_PROTOCOL) + + + def _delitem(self, cpv): + del self.__db[cpv] + + + def keys(self): + return self.__db + + + def has_key(self, cpv): + return cpv in self.__db diff --git a/pym/cache/cache_errors.py b/pym/cache/cache_errors.py new file mode 100644 index 0000000..4fd8622 --- /dev/null +++ b/pym/cache/cache_errors.py @@ -0,0 +1,34 @@ +class InitializationError(Exception): + def __init__(self, class_name, error): + self.error, self.class_name = error, class_name + def __str__(self): + return "Creation of instance %s failed due to %s" % \ + (self.class_name, str(self.error)) + + +class CacheCorruption(Exception): + def __init__(self, key, ex): + self.key, self.ex = key, ex + def __str__(self): + return "%s is corrupt: %s" % (self.key, str(self.ex)) + + +class GeneralCacheCorruption(CacheCorruption): + def __init__(self,ex): self.ex = ex + def __str__(self): return "corruption detected: %s" % str(self.ex) + + +class InvalidRestriction(Exception): + def __init__(self, key, restriction, exception=None): + if exception == None: exception = '' + self.key, self.restriction, self.ex = key, restriction, ex + def __str__(self): + return "%s:%s is not valid: %s" % \ + (self.key, self.restriction, str(self.ex)) + + +class ReadOnlyRestriction(Exception): + def __init__(self, info=''): + self.info = info + def __str__(self): + return "cache is non-modifiable"+str(self.info) diff --git a/pym/cache/flat_list.py b/pym/cache/flat_list.py new file mode 100644 index 0000000..bb1da7f --- /dev/null +++ b/pym/cache/flat_list.py @@ -0,0 +1,95 @@ +import os, stat +import fs_template +import cache_errors + +# store the current key order *here*. +auxdbkey_order=('DEPEND', 'RDEPEND', 'SLOT', 'SRC_URI', + 'RESTRICT', 'HOMEPAGE', 'LICENSE', 'DESCRIPTION', + 'KEYWORDS', 'INHERITED', 'IUSE', 'CDEPEND', + 'PDEPEND', 'PROVIDE') + +class database(fs_template.FsBased): + + + def __init__(self, label, auxdbkeys, **config): + super(database,self).__init__(label, auxdbkeys, **config) + self._base = os.path.join(self._base, + self.label.lstrip(os.path.sep).rstrip(os.path.sep)) + if len(self._known_keys) > len(auxdbkey_order): + raise Exception("less ordered keys then auxdbkeys") + + + def __getitem__(self, cpv): + d = {} + try: + myf = open(os.path.join(self._base, cpv),"r") + for k,v in zip(auxdbkey_order, myf): + d[k] = v.rstrip("\n") + except (OSError, IOError),e: + if isinstance(e,IOError) and e.errno == 2: + raise KeyError(cpv) + raise cache_errors.CacheCorruption(cpv, e) + + try: d["_mtime_"] = os.lstat(os.path.join(self._base, cpv)).st_mtime + except OSError, e: raise cache_errors.CacheCorruption(cpv, e) + + return d + + + def _setitem(self, cpv, values): + try: myf=open(os.path.join(self._base, cpv), "w") + except (OSError, IOError), e: + try: + s = os.path.split(cpv) + if len(s[0]) == 0: + s = s[1] + else: + s = s[0] + os._ensure_dirs(s) + + except (OSError, IOError), e: + raise cache_errors.CacheCorruption(cpv, e) + + try: + myf.writelines([values.get(x,"")+"\n" for x in auxdbkey_order]) + myf.close() + self._ensure_access(os.path.join(self._base, cpv), mtime=values["_mtime_"]) + + except (IOError, OSError), e: + try: myf.close() + except (OSError, IOError): pass + try: os.remove(os.path.join(self._base, cpv)) + except (OSError, IOError): pass + raise cache_errors.CacheCorruption(cpv, e) + + + def _delitem(self, cpv): + try: + os.remove(os.path.join(self._base,cpv)) + except OSError, e: + if e.errno == 2: + raise KeyError(cpv) + else: + raise cache_errors.CacheCorruption(cpv, e) + + + def has_key(self, cpv): + return os.path.exists(os.path.join(self._base, cpv)) + + + def keys(self): + """generator for walking the dir struct""" + dirs = [self._base] + len_base = len(self._base) + while len(dirs): + for l in os.listdir(dirs[0]): + if l.endswith(".cpickle"): + continue + p = os.path.join(dirs[0],l) + st = os.lstat(p) + if stat.S_ISDIR(st.st_mode): + dirs.append(p) + continue + yield p[len_base+1:] + dirs.pop(0) + diff --git a/pym/cache/fs_template.py b/pym/cache/fs_template.py new file mode 100644 index 0000000..ce6fd44 --- /dev/null +++ b/pym/cache/fs_template.py @@ -0,0 +1,52 @@ +import os +import template, cache_errors + +class FsBased(template.database): + """template wrapping fs needed options, and providing _ensure_access as a way to + attempt to ensure files have the specified owners/perms""" + + def __init__(self, label, auxdbkeys, basepath=None, gid=-1, perms=0664, **config): + """throws InitializationError if needs args aren't specified""" + if not gid: + raise cache_errors.InitializationError(self.__class__, "must specify gid!") + if not basepath: + raise cache_errors.InitializationError(self.__class__, "must specify basepath!") + + self._gid = gid + self._base = basepath + self._perms = perms + + super(FsBased, self).__init__(label, auxdbkeys, **config) + + + def _ensure_access(self, path, mtime=-1): + """returns true or false if it's able to ensure that path is properly chmod'd and chowned. + if mtime is specified, attempts to ensure that's correct also""" + try: + os.chown(path, -1, self._gid) + os.chmod(path, self._perms) + if mtime: + mtime=long(mtime) + os.utime(path, (mtime, mtime)) + except OSError, IOError: + return False + return True + + def _ensure_dirs(self, base, path): + for dir in os.path.sep.split(path.lstrip("/").rstrip("/")): + base += dir + if not os.path.exists(base): + os.mkdir(base, self.perms | 0111) + os.chown(base, -1, self.gid) + + + +def gen_label(base, label): + """if supplied label is a path, generate a unique label based upon label, and supplied base path""" + if label.find(os.path.sep) == -1: + return label + label = label.strip("\"").strip("'") + label = os.path.join(*(label.rstrip(os.path.sep).split(os.path.sep))) + tail = os.path.split(label)[1] + return "%s-%X" % (tail, abs(label.__hash__())) + diff --git a/pym/cache/sql_template.py b/pym/cache/sql_template.py new file mode 100644 index 0000000..8aaba92 --- /dev/null +++ b/pym/cache/sql_template.py @@ -0,0 +1,218 @@ +import template, cache_errors + +class SQLDatabase(template.database): + """template class for RDBM based caches + + This class is designed such that derivatives don't have to change much code, mostly constant strings. + _BaseError must be an exception class that all Exceptions thrown from the derived RDBMS are derived + from. + + SCHEMA_INSERT_CPV_INTO_PACKAGE should be modified dependant on the RDBMS, as should SCHEMA_PACKAGE_CREATE- + basically you need to deal with creation of a unique pkgid. If the dbapi2 rdbms class has a method of + recovering that id, then modify _insert_cpv to remove the extra select. + + Creation of a derived class involves supplying _initdb_con, and table_exists. + Additionally, the default schemas may have to be modified. + """ + + SCHEMA_PACKAGE_NAME = "package_cache" + SCHEMA_PACKAGE_CREATE = "CREATE TABLE %s (\ + pkgid INTEGER PRIMARY KEY, label VARCHAR(255), cpv VARCHAR(255), UNIQUE(label, cpv))" % SCHEMA_PACKAGE_NAME + SCHEMA_PACKAGE_DROP = "DROP TABLE %s" % SCHEMA_PACKAGE_NAME + + SCHEMA_VALUES_NAME = "values_cache" + SCHEMA_VALUES_CREATE = "CREATE TABLE %s ( pkgid integer references %s (pkgid) on delete cascade, \ + key varchar(255), value text, UNIQUE(pkgid, key))" % (SCHEMA_VALUES_NAME, SCHEMA_PACKAGE_NAME) + SCHEMA_VALUES_DROP = "DROP TABLE %s" % SCHEMA_VALUES_NAME + SCHEMA_INSERT_CPV_INTO_PACKAGE = "INSERT INTO %s (label, cpv) VALUES(%%s, %%s)" % SCHEMA_PACKAGE_NAME + + _BaseError = () + _dbClass = None + + + # boolean indicating if the derived RDBMS class supports replace syntax + _supports_replace = False + + def __init__(self, label, auxdbkeys, **config): + """initialize the instance. + derived classes shouldn't need to override this""" + + super(SQLDatabase, self).__init__(label, auxdbkeys, **config) + + config.setdefault("host","127.0.0.1") + self._initdb_con(config) + + self.label = self._sfilter(self.label) + + + def _dbconnect(self, config): + """should be overridden if the derived class needs special parameters for initializing + the db connection, or cursor""" + self.db = self._dbClass(**config) + self.con = self.db.cursor() + + + def _initdb_con(self,config): + """ensure needed tables are in place. + If the derived class needs a different set of table creation commands, overload the approriate + SCHEMA_ attributes. If it needs additional execution beyond, override""" + + self._dbconnect(config) + if not self._table_exists(self.SCHEMA_PACKAGE_NAME): + if self.readonly: + raise cache_errors.ReadOnlyRestriction("table %s doesn't exist" % \ + self.SCHEMA_PACKAGE_NAME) + try: self.con.execute(self.SCHEMA_PACKAGE_CREATE) + except self._BaseError, e: + raise cache_errors.InitializationError(self.__class__, e) + + if not self._table_exists(self.SCHEMA_VALUES_NAME): + if self.readonly: + raise cache_errors.ReadOnlyRestriction("table %s doesn't exist" % \ + self.SCHEMA_VALUES_NAME) + try: self.con.execute(self.SCHEMA_VALUES_CREATE) + except self._BaseError, e: + raise cache_errors.InitializationError(self.__class__, e) + + + def _table_exists(self, tbl): + """return true if a table exists + derived classes must override this""" + raise NotImplementedError + + + def _sfilter(self, s): + """meta escaping, returns quoted string for use in sql statements""" + return "\"%s\"" % s.replace("\\","\\\\").replace("\"","\\\"") + + + def __getitem__(self, cpv): + try: self.con.execute("SELECT key, value FROM %s NATURAL JOIN %s " + "WHERE label=%s AND cpv=%s" % (self.SCHEMA_PACKAGE_NAME, self.SCHEMA_VALUES_NAME, + self.label, self._sfilter(cpv))) + except self._BaseError, e: + raise cache_errors.CacheCorruption(self, cpv, e) + + rows = self.con.fetchall() + + if len(rows) == 0: + raise KeyError(cpv) + + vals = dict([(k,"") for k in self._known_keys]) + vals.update(dict(rows)) + return vals + + + def _delitem(self, cpv): + """delete a cpv cache entry + derived RDBM classes for this *must* either support cascaded deletes, or + override this method""" + try: + try: + self.con.execute("DELETE FROM %s WHERE label=%s AND cpv=%s" % \ + (self.SCHEMA_PACKAGE_NAME, self.label, self._sfilter(cpv))) + self.db.commit() + except self._BaseError, e: + raise cache_errors.CacheCorruption(self, cpv, e) + if self.con.rowcount <= 0: + raise KeyError(cpv) + except Exception: + self.db.rollback() + raise + + def __del__(self): + self.db.close() + + def _setitem(self, cpv, values): + + try: + # insert. + try: pkgid = self._insert_cpv(cpv) + except self._BaseError, e: + raise cache_errors.CacheCorruption(cpv, e) + + # __getitem__ fills out missing values, + # so we store only what's handed to us and is a known key + db_values = [] + for key in self._known_keys: + if values.has_key(key): + db_values.append({"key":key, "value":values[key]}) + + if len(db_values) > 0: + try: self.con.executemany("INSERT INTO %s (pkgid, key, value) VALUES(\"%s\", %%(key)s, %%(value)s)" % \ + (self.SCHEMA_VALUES_NAME, str(pkgid)), db_values) + except self._BaseError, e: + raise cache_errors.CacheCorruption(cpv, e) + self.db.commit() + + except Exception: + try: self.db.rollback() + except self._BaseError: pass + raise + + + def _insert_cpv(self, cpv): + """uses SCHEMA_INSERT_CPV_INTO_PACKAGE, which must be overloaded if the table definition + doesn't support auto-increment columns for pkgid. + returns the cpvs new pkgid + note this doesn't commit the transaction. The caller is expected to.""" + + cpv = self._sfilter(cpv) + if self._supports_replace: + query_str = self.SCHEMA_INSERT_CPV_INTO_PACKAGE.replace("INSERT","REPLACE",1) + else: + # just delete it. + try: del self[cpv] + except (cache_errors.CacheCorruption, KeyError): pass + query_str = self.SCHEMA_INSERT_CPV_INTO_PACKAGE + try: + self.con.execute(query_str % (self.label, cpv)) + except self._BaseError: + self.db.rollback() + raise + self.con.execute("SELECT pkgid FROM %s WHERE label=%s AND cpv=%s" % \ + (self.SCHEMA_PACKAGE_NAME, self.label, cpv)) + + if self.con.rowcount != 1: + raise cache_error.CacheCorruption(cpv, "Tried to insert the cpv, but found " + " %i matches upon the following select!" % len(rows)) + return self.con.fetchone()[0] + + + def has_key(self, cpv): + try: self.con.execute("SELECT cpv FROM %s WHERE label=%s AND cpv=%s" % \ + (self.SCHEMA_PACKAGE_NAME, self.label, self._sfilter(cpv))) + except self._BaseError, e: + raise cache_errors.GeneralCacheCorruption(e) + return self.con.rowcount > 0 + + + def keys(self): + try: self.con.execute("SELECT cpv FROM %s WHERE label=%s" % + (self.SCHEMA_PACKAGE_NAME, self.label)) + except self._BaseError, e: + raise cache_errors.GeneralCacheCorruption(e) + return [ row[0] for row in self.con.fetchall() ] + + def get_matches(self,match_dict): + query_list = [] + for k,v in match_dict.items(): + if k not in self._known_keys: + raise cache_errors.InvalidRestriction(k, v, "key isn't known to this cache instance") + v = v.replace("%","\\%") + v = v.replace(".*","%") + query_list.append("(key=%s AND value LIKE %s)" % (self._sfilter(k), self._sfilter(v))) + + if len(query_list): + query = " AND "+" AND ".join(query_list) + else: + query = '' + + print "query = SELECT cpv from package_cache natural join values_cache WHERE label=%s %s" % (self.label, query) + try: self.con.execute("SELECT cpv from package_cache natural join values_cache WHERE label=%s %s" % \ + (self.label, query)) + except self._BaseError, e: + raise cache_errors.GeneralCacheCorruption(e) + + return [ row[0] for row in self.con.fetchall() ] + diff --git a/pym/cache/sqlite.py b/pym/cache/sqlite.py new file mode 100644 index 0000000..e7f9be2 --- /dev/null +++ b/pym/cache/sqlite.py @@ -0,0 +1,62 @@ +sqlite_module =__import__("sqlite") +import os +import sql_template, fs_template +import cache_errors + +class database(fs_template.FsBased, sql_template.SQLDatabase): + + SCHEMA_DELETE_NAME = "delete_package_values" + SCHEMA_DELETE_TRIGGER = """CREATE TRIGGER %s AFTER DELETE on %s + begin + DELETE FROM %s WHERE pkgid=old.pkgid; + end;""" % (SCHEMA_DELETE_NAME, sql_template.SQLDatabase.SCHEMA_PACKAGE_NAME, + sql_template.SQLDatabase.SCHEMA_VALUES_NAME) + + _BaseError = sqlite_module.Error + _dbClass = sqlite_module + _supports_replace = True + + def _dbconnect(self, config): + self._dbpath = os.path.join(self._base, fs_template.gen_label(self._base, self.label)+".sqldb") + try: + self.db = sqlite_module.connect(self._dbpath, mode=self._perms, autocommit=False) + if not self._ensure_access(self._dbpath): + raise cache_errors.InitializationError(self.__class__, "can't ensure perms on %s" % self._dbpath) + self.con = self.db.cursor() + except self._BaseError, e: + raise cache_errors.InitializationError(self.__class__, e) + + + def _initdb_con(self, config): + sql_template.SQLDatabase._initdb_con(self, config) + try: + self.con.execute("SELECT name FROM sqlite_master WHERE type=\"trigger\" AND name=%s" % \ + self._sfilter(self.SCHEMA_DELETE_NAME)) + if self.con.rowcount == 0: + self.con.execute(self.SCHEMA_DELETE_TRIGGER); + self.db.commit() + except self._BaseError, e: + raise cache_errors.InitializationError(self.__class__, e) + + def _table_exists(self, tbl): + """return true/false dependant on a tbl existing""" + try: self.con.execute("SELECT name FROM sqlite_master WHERE type=\"table\" AND name=%s" % + self._sfilter(tbl)) + except self._BaseError, e: + # XXX crappy. + return False + return len(self.con.fetchall()) == 1 + + # we can do it minus a query via rowid. + def _insert_cpv(self, cpv): + cpv = self._sfilter(cpv) + try: self.con.execute(self.SCHEMA_INSERT_CPV_INTO_PACKAGE.replace("INSERT","REPLACE",1) % \ + (self.label, cpv)) + except self._BaseError, e: + raise cache_errors.CacheCorruption(cpv, "tried to insert a cpv, but failed: %s" % str(e)) + + # sums the delete also + if self.con.rowcount <= 0 or self.con.rowcount > 2: + raise cache_errors.CacheCorruption(cpv, "tried to insert a cpv, but failed- %i rows modified" % self.rowcount) + return self.con.lastrowid + diff --git a/pym/cache/template.py b/pym/cache/template.py new file mode 100644 index 0000000..553cd87 --- /dev/null +++ b/pym/cache/template.py @@ -0,0 +1,84 @@ +import cache_errors + +class database(object): + # XXX questionable on storing the auxdbkeys + def __init__(self, label, auxdbkeys, readonly=False, **config): + """ initialize the derived class; specifically, store label/keys""" + self._known_keys = auxdbkeys + self.label = label + self.readonly = readonly + + + def __getitem__(self, cpv): + """get cpv's values. + override this in derived classess""" + raise NotImplementedError + + + def __setitem__(self, cpv, values): + """set a cpv to values + This shouldn't be overriden in derived classes since it handles the readonly checks""" + if self.readonly: + raise cache_errors.ReadOnlyRestriction() + self._setitem(cpv, values) + + + def _setitem(self, name, values): + """__setitem__ calls this after readonly checks. override it in derived classes""" + raise NotImplementedError + + + def __delitem__(self, cpv): + """delete a key from the cache. + This shouldn't be overriden in derived classes since it handles the readonly checks""" + if self.readonly: + raise cache_errors.ReadOnlyRestriction() + self._delitem(cpv) + + + def _delitem(self,cpv): + """__delitem__ calls this after readonly checks. override it in derived classes""" + raise NotImplementedError + + + def has_key(self, cpv): + raise NotImplementedError + + + def keys(self): + raise NotImplementedError + + + def get_matches(self, match_dict): + """generic function for walking the entire cache db, matching restrictions to + filter what cpv's are returned. Derived classes should override this if they + can implement a faster method then pulling each cpv:values, and checking it. + + For example, RDBMS derived classes should push the matching logic down to the + actual RDBM.""" + + import re + restricts = {} + for key,match in match_dict.iteritems(): + # XXX this sucks. + try: + if isinstance(match, str): + restricts[key] = re.compile(match).match + else: + restricts[key] = re.compile(match[0],match[1]).match + except re.error, e: + raise InvalidRestriction(key, match, e) + if key not in self.__known_keys: + raise InvalidRestriction(key, match, "Key isn't valid") + + for cpv in self.keys(): + cont = True + vals = self[cpv] + for key, match in restricts.iteritems(): + if not match(vals[key]): + cont = False + break + if cont: +# yield cpv,vals + yield cpv + |