From d44ce453d88624c8444f0733a56197d5291f52f6 Mon Sep 17 00:00:00 2001 From: René 'Necoro' Neumann Date: Fri, 14 Aug 2009 15:18:10 +0200 Subject: Rename DictDatabase to HashDatabase --- portato/db/__init__.py | 6 +- portato/db/dict.py | 153 ------------------------------------------------- portato/db/hash.py | 153 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 156 insertions(+), 156 deletions(-) delete mode 100644 portato/db/dict.py create mode 100644 portato/db/hash.py (limited to 'portato/db') diff --git a/portato/db/__init__.py b/portato/db/__init__.py index da8a81e..e6f9759 100644 --- a/portato/db/__init__.py +++ b/portato/db/__init__.py @@ -43,9 +43,9 @@ def Database(type): return SQLDatabase(SectionDict(_SESSION, "SQL")) elif type == "dict": - debug("Using DictDatabase") - from .dict import DictDatabase - return DictDatabase(SectionDict(_SESSION, "dict")) + debug("Using HashDatabase") + from .hash import HashDatabase + return HashDatabase(SectionDict(_SESSION, "dict")) else: error(_("Unknown database type: %s"), type) diff --git a/portato/db/dict.py b/portato/db/dict.py deleted file mode 100644 index 279ab97..0000000 --- a/portato/db/dict.py +++ /dev/null @@ -1,153 +0,0 @@ -# -*- coding: utf-8 -*- -# -# File: portato/db/dict.py -# This file is part of the Portato-Project, a graphical portage-frontend. -# -# Copyright (C) 2006-2009 René 'Necoro' Neumann -# This is free software. You may redistribute copies of it under the terms of -# the GNU General Public License version 2. -# There is NO WARRANTY, to the extent permitted by law. -# -# Written by René 'Necoro' Neumann - -from __future__ import absolute_import, with_statement - -import re -from collections import defaultdict - -from ..helper import info -from ..backend import system -from .database import Database, PkgData - -class DictDatabase (Database): - """An internal database which holds a simple dictionary cat -> [package_list].""" - - lock = Database.lock - - def __init__ (self, session): - """Constructor.""" - Database.__init__(self) - self.session = session - - self.__initialize() - self.populate() - - def __initialize (self): - self._db = defaultdict(list) - self.inst_cats = set([self.ALL]) - self._restrict = None - - def __sort_key (self, x): - return x.pkg.lower() - - @lock - def populate (self, category = None): - - # get the lists - packages = system.find_packages(category, with_version = False) - installed = system.find_packages(category, system.SET_INSTALLED, with_version = False) - - # cycle through packages - for p in packages: - cat, pkg = p.split("/") - inst = p in installed - t = PkgData(cat, pkg, inst, False) - self._db[cat].append(t) - self._db[self.ALL].append(t) - - if inst: - self.inst_cats.add(cat) - - for key in self._db: # sort alphabetically - self._db[key].sort(key = self.__sort_key) - - @lock - def get_cat (self, cat = None, byName = True, showDisabled = False): - if not cat: - cat = self.ALL - - def get_pkgs(): - if byName: - for pkg in self._db[cat]: - if showDisabled or not pkg.disabled: - yield pkg - else: - ninst = [] - for pkg in self._db[cat]: - if not showDisabled and pkg.disabled: continue - - if pkg.inst: - yield pkg - else: - ninst.append(pkg) - - for pkg in ninst: - yield pkg - - try: - if self.restrict: - return (pkg for pkg in get_pkgs() if self.restrict.search(pkg.cat+"/"+pkg.pkg)) - else: - return get_pkgs() - - except KeyError: # cat is in category list - but not in portage - info(_("Catched KeyError => %s seems not to be an available category. Have you played with rsync-excludes?"), cat) - - @lock - def get_categories (self, installed = False): - if not self.restrict: - if installed: - cats = self.inst_cats - else: - cats = self._db.iterkeys() - - else: - if installed: - cats = set((pkg.cat for pkg in self.get_cat(self.ALL) if pkg.inst)) - else: - cats = set((pkg.cat for pkg in self.get_cat(self.ALL))) - - if len(cats)>1: - cats.add(self.ALL) - - return (cat for cat in cats) - - @lock - def reload (self, cat = None): - if cat: - del self._db[cat] - try: - self.inst_cats.remove(cat) - except KeyError: # not in inst_cats - can be ignored - pass - - self._db[self.ALL] = filter(lambda x: x.cat != cat, self._db[self.ALL]) - self.populate(cat+"/*") - else: - self.__initialize() - self.populate() - - @lock - def disable (self, cpv): - cat, pkg = cpv.split("/") - - c = self._db[cat] - p = c[c.index(PkgData(cat, pkg))] - p.disabled = True - - def get_restrict (self): - return self._restrict - - @lock - def set_restrict (self, restrict): - if not restrict: - self._restrict = None - else: - try: - regex = re.compile(restrict, re.I) - except re.error, e: - info(_("Error while compiling search expression: '%s'."), str(e)) - else: # only set self._restrict if no error occurred - self._restrict = regex - - restrict = property(get_restrict, set_restrict) diff --git a/portato/db/hash.py b/portato/db/hash.py new file mode 100644 index 0000000..8cea6f2 --- /dev/null +++ b/portato/db/hash.py @@ -0,0 +1,153 @@ +# -*- coding: utf-8 -*- +# +# File: portato/db/hash.py +# This file is part of the Portato-Project, a graphical portage-frontend. +# +# Copyright (C) 2006-2009 René 'Necoro' Neumann +# This is free software. You may redistribute copies of it under the terms of +# the GNU General Public License version 2. +# There is NO WARRANTY, to the extent permitted by law. +# +# Written by René 'Necoro' Neumann + +from __future__ import absolute_import, with_statement + +import re +from collections import defaultdict + +from ..helper import info +from ..backend import system +from .database import Database, PkgData + +class HashDatabase (Database): + """An internal database which holds a simple dictionary cat -> [package_list].""" + + lock = Database.lock + + def __init__ (self, session): + """Constructor.""" + Database.__init__(self) + self.session = session + + self.__initialize() + self.populate() + + def __initialize (self): + self._db = defaultdict(list) + self.inst_cats = set([self.ALL]) + self._restrict = None + + def __sort_key (self, x): + return x.pkg.lower() + + @lock + def populate (self, category = None): + + # get the lists + packages = system.find_packages(category, with_version = False) + installed = system.find_packages(category, system.SET_INSTALLED, with_version = False) + + # cycle through packages + for p in packages: + cat, pkg = p.split("/") + inst = p in installed + t = PkgData(cat, pkg, inst, False) + self._db[cat].append(t) + self._db[self.ALL].append(t) + + if inst: + self.inst_cats.add(cat) + + for key in self._db: # sort alphabetically + self._db[key].sort(key = self.__sort_key) + + @lock + def get_cat (self, cat = None, byName = True, showDisabled = False): + if not cat: + cat = self.ALL + + def get_pkgs(): + if byName: + for pkg in self._db[cat]: + if showDisabled or not pkg.disabled: + yield pkg + else: + ninst = [] + for pkg in self._db[cat]: + if not showDisabled and pkg.disabled: continue + + if pkg.inst: + yield pkg + else: + ninst.append(pkg) + + for pkg in ninst: + yield pkg + + try: + if self.restrict: + return (pkg for pkg in get_pkgs() if self.restrict.search(pkg.cat+"/"+pkg.pkg)) + else: + return get_pkgs() + + except KeyError: # cat is in category list - but not in portage + info(_("Catched KeyError => %s seems not to be an available category. Have you played with rsync-excludes?"), cat) + + @lock + def get_categories (self, installed = False): + if not self.restrict: + if installed: + cats = self.inst_cats + else: + cats = self._db.iterkeys() + + else: + if installed: + cats = set((pkg.cat for pkg in self.get_cat(self.ALL) if pkg.inst)) + else: + cats = set((pkg.cat for pkg in self.get_cat(self.ALL))) + + if len(cats)>1: + cats.add(self.ALL) + + return (cat for cat in cats) + + @lock + def reload (self, cat = None): + if cat: + del self._db[cat] + try: + self.inst_cats.remove(cat) + except KeyError: # not in inst_cats - can be ignored + pass + + self._db[self.ALL] = filter(lambda x: x.cat != cat, self._db[self.ALL]) + self.populate(cat+"/*") + else: + self.__initialize() + self.populate() + + @lock + def disable (self, cpv): + cat, pkg = cpv.split("/") + + c = self._db[cat] + p = c[c.index(PkgData(cat, pkg))] + p.disabled = True + + def get_restrict (self): + return self._restrict + + @lock + def set_restrict (self, restrict): + if not restrict: + self._restrict = None + else: + try: + regex = re.compile(restrict, re.I) + except re.error, e: + info(_("Error while compiling search expression: '%s'."), str(e)) + else: # only set self._restrict if no error occurred + self._restrict = regex + + restrict = property(get_restrict, set_restrict) -- cgit v1.2.3-54-g00ecf From 636eb3064e8ff8957bbf9fd172d7a0c6827e973c Mon Sep 17 00:00:00 2001 From: René 'Necoro' Neumann Date: Fri, 14 Aug 2009 19:24:50 +0200 Subject: Write eix-sql database backend --- portato/db/eix_sql.py | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 portato/db/eix_sql.py (limited to 'portato/db') diff --git a/portato/db/eix_sql.py b/portato/db/eix_sql.py new file mode 100644 index 0000000..0e4f569 --- /dev/null +++ b/portato/db/eix_sql.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +# +# File: portato/db/eix_sql.py +# This file is part of the Portato-Project, a graphical portage-frontend. +# +# Copyright (C) 2006-2009 René 'Necoro' Neumann +# This is free software. You may redistribute copies of it under the terms of +# the GNU General Public License version 2. +# There is NO WARRANTY, to the extent permitted by law. +# +# Written by René 'Necoro' Neumann + +from __future__ import absolute_import, with_statement + +try: + import sqlite3 as sql +except ImportError: + from pysqlite2 import dbapi2 as sql + +import os + +from .sql import SQLDatabase +from ..eix import EixReader +from ..helper import debug +from ..backend import system + +class EixSQLDatabase (SQLDatabase): + + CACHE_FILE = "/var/eix/cache" + + def __init__ (self, session): + SQLDatabase.__init__(self, session) + + if "cache" not in session: + self.cache = self.CACHE_FILE + session["cache"] = self.cache + else: + self.cache = session["cache"] + + debug("Using '%s' as eix cache file.", self.cache) + + def updated (self): + mtime = os.stat(self.cache).st_mtime + old = self.session.get("mtime", 0) + + self.session["mtime"] = mtime + + return old < mtime + + @SQLDatabase.con + def populate (self, category = None, connection = None): + inst = system.find_packages(pkgSet = system.SET_INSTALLED, key = category, with_version = False) + + def _get(): + with EixReader(self.cache) as eix: + for cat in eix.categories: + if category is None or cat.name() == category: + for pkg in cat.packages(): + yield (cat.name(), pkg.name(), pkg.name() in inst, False) + + connection.executemany("INSERT INTO packages (cat, name, inst, disabled) VALUES (?, ?, ?, ?)", _get()) + connection.commit() -- cgit v1.2.3-54-g00ecf From 801316be64177d889ab21fc28c07dd4d77cb8184 Mon Sep 17 00:00:00 2001 From: René 'Necoro' Neumann Date: Fri, 14 Aug 2009 19:42:01 +0200 Subject: Fix the EixSQLDatabase and EixReader --- portato/db/eix_sql.py | 18 ++++++++++-------- portato/db/sql.py | 1 + portato/eix/parser.py | 2 +- 3 files changed, 12 insertions(+), 9 deletions(-) (limited to 'portato/db') diff --git a/portato/db/eix_sql.py b/portato/db/eix_sql.py index 0e4f569..ac31019 100644 --- a/portato/db/eix_sql.py +++ b/portato/db/eix_sql.py @@ -21,29 +21,31 @@ import os from .sql import SQLDatabase from ..eix import EixReader -from ..helper import debug +from ..helper import debug, warning from ..backend import system class EixSQLDatabase (SQLDatabase): - CACHE_FILE = "/var/eix/cache" + CACHE_FILE = "/var/cache/eix" def __init__ (self, session): - SQLDatabase.__init__(self, session) - if "cache" not in session: + self.cache = session.get("cache", self.CACHE_FILE) + if not os.path.exists(self.cache): + warning(_("Cache file '%s' does not exist. Using default instead."), self.cache) self.cache = self.CACHE_FILE - session["cache"] = self.cache - else: - self.cache = session["cache"] debug("Using '%s' as eix cache file.", self.cache) + + session["cache"] = self.cache + + SQLDatabase.__init__(self, session) def updated (self): mtime = os.stat(self.cache).st_mtime old = self.session.get("mtime", 0) - self.session["mtime"] = mtime + self.session["mtime"] = str(mtime) return old < mtime diff --git a/portato/db/sql.py b/portato/db/sql.py index 6c95bb3..f2a736e 100644 --- a/portato/db/sql.py +++ b/portato/db/sql.py @@ -256,3 +256,4 @@ class SQLDatabase (Database): self._restrict = "AND (name LIKE '%%%(restrict)s%%' OR cat LIKE '%(restrict)s%%')" % {"restrict":restrict} restrict = property(get_restrict, set_restrict) + con = staticmethod(con) diff --git a/portato/eix/parser.py b/portato/eix/parser.py index 2a6658b..cc42553 100644 --- a/portato/eix/parser.py +++ b/portato/eix/parser.py @@ -357,7 +357,7 @@ class package (object): The indices of `header.useflags` representing the IUSE value of the package. """ - __slots__ = ("name", "description", "provide", + __slots__ = ("_offset", "name", "description", "provide", "homepage", "license", "useflags") def __init__ (self, file, skip = False): -- cgit v1.2.3-54-g00ecf From 0d877faef762319348b164dba98c159ac1ec4ecf Mon Sep 17 00:00:00 2001 From: René 'Necoro' Neumann Date: Fri, 14 Aug 2009 19:42:32 +0200 Subject: Add eixsql as db-choice --- portato/db/__init__.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'portato/db') diff --git a/portato/db/__init__.py b/portato/db/__init__.py index e6f9759..9869e3f 100644 --- a/portato/db/__init__.py +++ b/portato/db/__init__.py @@ -22,7 +22,8 @@ _SESSION = None types = { "sql": (_("SQLite"), _("Uses an SQLite-database to store package information.\nMay take longer to generate at the first time, but has advantages if portato is re-started with an unchanged portage tree. Additionally it allows to use fast SQL expressions for fetching the data.")), - "dict": (_("Hashmap"), _("Uses an in-memory hashmap to store package information.\nHas been used since at least version 0.3.3, but all information has to be regenerated on each startup.")) + "dict": (_("Hashmap"), _("Uses an in-memory hashmap to store package information.\nHas been used since at least version 0.3.3, but all information has to be regenerated on each startup.")), + "eixsql" : (_("eix + SQLite"), _("Similar to SQLite, but now uses the eix database to get the package information.\nThis should be much faster on startup, but requires that your eix database is always up-to-date.")) } def Database(type): @@ -40,13 +41,18 @@ def Database(type): warning(_("Cannot load SQLDatabase.")) return Database("dict") else: - return SQLDatabase(SectionDict(_SESSION, "SQL")) + return SQLDatabase(SectionDict(_SESSION, type)) elif type == "dict": debug("Using HashDatabase") from .hash import HashDatabase - return HashDatabase(SectionDict(_SESSION, "dict")) + return HashDatabase(SectionDict(_SESSION, type)) + elif type == "eixsql": + debug("Using EixSQLDatabase") + from .eix_sql import EixSQLDatabase + return EixSQLDatabase(SectionDict(_SESSION, type)) + else: error(_("Unknown database type: %s"), type) raise UnknownDatabaseTypeError, type -- cgit v1.2.3-54-g00ecf From 2a945321cbc54c6acae73bc25d543e2af6f3e42b Mon Sep 17 00:00:00 2001 From: René 'Necoro' Neumann Date: Fri, 14 Aug 2009 22:49:35 +0200 Subject: Use sets instead of lists --> faster results --- portato/db/eix_sql.py | 2 +- portato/db/sql.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'portato/db') diff --git a/portato/db/eix_sql.py b/portato/db/eix_sql.py index ac31019..089d3ed 100644 --- a/portato/db/eix_sql.py +++ b/portato/db/eix_sql.py @@ -51,7 +51,7 @@ class EixSQLDatabase (SQLDatabase): @SQLDatabase.con def populate (self, category = None, connection = None): - inst = system.find_packages(pkgSet = system.SET_INSTALLED, key = category, with_version = False) + inst = set(system.find_packages(pkgSet = system.SET_INSTALLED, key = category, with_version = False)) def _get(): with EixReader(self.cache) as eix: diff --git a/portato/db/sql.py b/portato/db/sql.py index f2a736e..415df92 100644 --- a/portato/db/sql.py +++ b/portato/db/sql.py @@ -175,7 +175,7 @@ class SQLDatabase (Database): def populate (self, category = None, connection = None): def _get(): # get the lists - inst = system.find_packages(pkgSet = system.SET_INSTALLED, key=category, with_version = False) + inst = set(system.find_packages(pkgSet = system.SET_INSTALLED, key=category, with_version = False)) for p in system.find_packages(key = category, with_version = False): cat, pkg = p.split("/") -- cgit v1.2.3-54-g00ecf From d9c6fb6767c6873782847df168f8224d83ab30cd Mon Sep 17 00:00:00 2001 From: René 'Necoro' Neumann Date: Fri, 14 Aug 2009 22:50:33 +0200 Subject: Rewrote eix-parser in Cython --> WAAAAAAAAAY faster --- portato/db/eix_sql.py | 7 +- portato/eix/__init__.py | 2 +- portato/eix/parser.py | 416 ----------------------------------------------- portato/eix/parser.pyx | 318 ++++++++++++++++++++++++++++++++++++ portato/eix/py_parser.py | 416 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 739 insertions(+), 420 deletions(-) delete mode 100644 portato/eix/parser.py create mode 100644 portato/eix/parser.pyx create mode 100644 portato/eix/py_parser.py (limited to 'portato/db') diff --git a/portato/db/eix_sql.py b/portato/db/eix_sql.py index 089d3ed..3a0c6e9 100644 --- a/portato/db/eix_sql.py +++ b/portato/db/eix_sql.py @@ -56,9 +56,10 @@ class EixSQLDatabase (SQLDatabase): def _get(): with EixReader(self.cache) as eix: for cat in eix.categories: - if category is None or cat.name() == category: - for pkg in cat.packages(): - yield (cat.name(), pkg.name(), pkg.name() in inst, False) + if category is None or cat.name == category: + for pkg in cat.packages: + p = "%s/%s" % (cat.name, pkg.name) + yield (cat.name, pkg.name, p in inst, False) connection.executemany("INSERT INTO packages (cat, name, inst, disabled) VALUES (?, ?, ?, ?)", _get()) connection.commit() diff --git a/portato/eix/__init__.py b/portato/eix/__init__.py index e726073..346fe82 100644 --- a/portato/eix/__init__.py +++ b/portato/eix/__init__.py @@ -69,7 +69,7 @@ class EixReader(object): self.file.seek(0) self.header = parser.header(self.file) - self.categories = parser.vector(self.file, parser.category, nelems = self.header.ncats()) + self.categories = parser.vector(self.file, parser.category, nelems = self.header.ncats) except: self.close() raise diff --git a/portato/eix/parser.py b/portato/eix/parser.py deleted file mode 100644 index cc42553..0000000 --- a/portato/eix/parser.py +++ /dev/null @@ -1,416 +0,0 @@ -# -*- coding: utf-8 -*- -# -# File: portato/eix/parser.py -# This file is part of the Portato-Project, a graphical portage-frontend. -# -# Copyright (C) 2006-2009 René 'Necoro' Neumann -# This is free software. You may redistribute copies of it under the terms of -# the GNU General Public License version 2. -# There is NO WARRANTY, to the extent permitted by law. -# -# Written by René 'Necoro' Neumann - -""" -The cache file supports different types of data. -In this module (nearly) all of these types have a corresponding function. - -For the exact way all the functions work, have a look at the eix format description. -""" - -from __future__ import absolute_import, with_statement -__docformat__ = "restructuredtext" - -import os -import struct -from functools import partial - -from ..helper import debug -from .exceptions import EndOfFileException - -# -# Helper -# - -def _get_bytes (file, length, expect_list = False): - """ - Return a number of bytes. - - :Parameters: - - file : file - The file to read from. - - length : int - The number of bytes to read. - - expect_list : bool - In case ``length`` is 1, only a single byte is returned. If ``expect_list`` is true, then a list is also returned in this case. - - :rtype: int or int[] - :raises EndOfFileException: if EOF is reached during execution - """ - - s = file.read(length) - - if len(s) != length: - raise EndOfFileException, file.name - - if length == 1 and not expect_list: - return ord(s) # is faster than unpack and we have a scalar - else: - return struct.unpack("%sB" % length, s) - -# -# Base Types -# - -def number (file, skip = False): - """ - Returns a number. - - :Parameters: - - file : file - The file to read from. - - skip : bool - Do not return the actual value, but just skip to the next datum. - - :rtype: int - """ - - n = _get_bytes(file, 1) - - if n < 0xFF: - value = n - else: - count = 0 - - while (n == 0xFF): - count += 1 - n = _get_bytes(file, 1) - - if n == 0: - n = 0xFF # 0xFF is encoded as 0xFF 0x00 - count -= 1 - - value = n << (count*8) - - if count > 0: - - if skip: - file.seek(count, os.SEEK_CUR) - return - - else: - rest = _get_bytes(file, count, expect_list = True) - - for i, r in enumerate(rest): - value += r << ((count - i - 1)*8) - - return value - -def vector (file, get_type, skip = False, nelems = None): - """ - Returns a vector of elements. - - :Parameters: - - file : file - The file to read from. - - get_type : function(file, bool) - The function determining type of the elements. - - skip : bool - Do not return the actual value, but just skip to the next datum. - - nelems : int - Normally the eix-Vector has the number of elements as the first argument. - If for some reason this is not the case, you can pass it in here. - - :rtype: list - """ - - if nelems is None: - nelems = number(file) - - if skip: - for i in range(nelems): - get_type(file, skip = True) - else: - return [get_type(file) for i in range(nelems)] - -def typed_vector(type, nelems = None): - """ - Shortcut to create a function for a special type of vector. - - :Parameters: - - type : function(file, bool) - The function determining type of the elements. - - nelems : int - Normally the eix-Vector has the number of elements as the first argument. - If for some reason this is not the case, you can pass it in here. - Do not return the actual value, but just skip to the next datum. - - :rtype: function(file, bool) - :see: `vector` - """ - - if nelems is None: - return partial(vector, get_type = type) - else: - return partial(vector, get_type = type, nelems = nelems) - -def string (file, skip = False): - """ - Returns a string. - - :Parameters: - - file : file - The file to read from. - - skip : bool - Do not return the actual value, but just skip to the next datum. - - :rtype: str - """ - nelems = number(file) - - if skip: - file.seek(nelems, os.SEEK_CUR) - return - else: - s = file.read(nelems) - - if len(s) != nelems: - raise EndOfFileException, file.name - - return s - -# -# Complex Types -# - -class LazyElement (object): - """ - This class models a value in the cache, which is only read on access. - - If not accessed directly, only the position inside the file is stored. - """ - __slots__ = ("file", "get_type", "_value", "pos") - - def __init__ (self, get_type, file): - """ - :Parameters: - - get_type : function(file, bool) - The function determining type of the elements. - - file : file - The file to read from. - """ - - self.file = file - self.get_type = get_type - self._value = None - - self.pos = file.tell() - get_type(file, skip=True) # skip it for the moment - - @property - def value (self): - """ - The value of the element. - """ - - if self._value is None: - old_pos = self.file.tell() - self.file.seek(self.pos, os.SEEK_SET) - self._value = self.get_type(self.file, skip = False) - self.file.seek(old_pos, os.SEEK_SET) - - return self._value - - def __call__ (self): - """ - Convenience function. Also returns the value. - """ - return self.value - -class overlay (object): - """ - Represents an overlay object. - - :IVariables: - - path : `LazyElement` - The path to the overlay - - label : `LazyElement` - The label/name of the overlay - """ - __slots__ = ("path", "label") - - def __init__ (self, file, skip = False): - """ - :Parameters: - - file : file - The file to read from. - - skip : bool - Do not return the actual value, but just skip to the next datum. - """ - - self.path = LazyElement(string, file) - self.label = LazyElement(string, file) - -class header (object): - """ - Represents the header of the cache. - - :IVariables: - - version : `LazyElement` - The version of the cache file. - - ncats : `LazyElement` - The number of categories. - - overlays : `LazyElement` <`overlay` []> - The list of overlays. - - provide : `LazyElement` - A list of "PROVIDE" values. - - licenses : `LazyElement` - The list of licenses. - - keywords : `LazyElement` - The list of keywords. - - useflags : `LazyElement` - The list of useflags. - - slots : `LazyElement` - The list of slots different from "0". - - sets : `LazyElement` - The names of world sets are the names (without leading @) of the world sets stored in /var/lib/portage/world_sets. - If SAVE_WORLD=false, the list is empty. - """ - __slots__ = ("version", "ncats", "overlays", "provide", - "licenses", "keywords", "useflags", "slots", "sets") - - def __init__ (self, file, skip = False): - """ - :Parameters: - - file : file - The file to read from. - - skip : bool - Do not return the actual value, but just skip to the next datum. - """ - def LE (t): - return LazyElement(t, file) - - self.version = LE(number) - self.ncats = LE(number) - self.overlays = LE(typed_vector(overlay)) - self.provide = LE(typed_vector(string)) - self.licenses = LE(typed_vector(string)) - self.keywords = LE(typed_vector(string)) - self.useflags = LE(typed_vector(string)) - self.slots = LE(typed_vector(string)) - self.sets = LE(typed_vector(string)) - -class package (object): - """ - The representation of one package. - - Currently, version information is not parsed and stored. - So you can gain general infos only. - - :IVariables: - - name : `LazyElement` - The name of the package. - - description : `LazyElement` - Description of the package. - - homepage : `LazyElement` - The homepage of the package. - - provide : `LazyElement` - The indices of `header.provide` representing the PROVIDE value of the package. - - license : `LazyElement` - The index of `header.licenses` representing the license of the package. - - useflags : `LazyElement` - The indices of `header.useflags` representing the IUSE value of the package. - """ - - __slots__ = ("_offset", "name", "description", "provide", - "homepage", "license", "useflags") - - def __init__ (self, file, skip = False): - """ - :Parameters: - - file : file - The file to read from. - - skip : bool - Do not return the actual value, but just skip to the next datum. - """ - def LE (t): - return LazyElement(t, file) - - self._offset = number(file) - - after_offset = file.tell() - - self.name = LE(string) - self.description = LE(string) - self.provide = LE(typed_vector(number)) - self.homepage = LE(string) - self.license = LE(number) - self.useflags = LE(typed_vector(number)) - - # self.versions = LE(typed_vector(version)) - # for the moment just skip the versions - file.seek(self._offset - (file.tell() - after_offset), os.SEEK_CUR) - -class category (object): - """ - Represents a whole category. - - :IVariables: - - name : `LazyElement` - The category name. - - packages : `LazyElement` <`package` []> - All the packages of the category. - """ - __slots__ = ("name", "packages") - - def __init__ (self, file, skip = False): - """ - :Parameters: - - file : file - The file to read from. - - skip : bool - Do not return the actual value, but just skip to the next datum. - """ - self.name = LazyElement(string, file) - self.packages = LazyElement(typed_vector(package), file) diff --git a/portato/eix/parser.pyx b/portato/eix/parser.pyx new file mode 100644 index 0000000..453376e --- /dev/null +++ b/portato/eix/parser.pyx @@ -0,0 +1,318 @@ +# -*- coding: utf-8 -*- +# +# File: portato/eix/_parser.pyx +# This file is part of the Portato-Project, a graphical portage-frontend. +# +# Copyright (C) 2006-2009 René 'Necoro' Neumann +# This is free software. You may redistribute copies of it under the terms of +# the GNU General Public License version 2. +# There is NO WARRANTY, to the extent permitted by law. +# +# Written by René 'Necoro' Neumann + +""" +The cache file supports different types of data. +In this module (nearly) all of these types have a corresponding function. + +For the exact way all the functions work, have a look at the eix format description. +""" + +__docformat__ = "restructuredtext" + +cdef extern from "stdio.h": + ctypedef struct FILE: + pass + + int fgetc(FILE* stream) + + int EOF + int SEEK_CUR + +cdef extern from "Python.h": + FILE* PyFile_AsFile(object) + + ctypedef int Py + +ctypedef unsigned char UChar +ctypedef long long LLong + +from portato.eix.exceptions import EndOfFileException + +# +# Helper +# + +cdef int _get_byte (FILE* file) except -1: + cdef int c = fgetc(file) + + if c == EOF: + raise EndOfFileException + + return c + + +# +# Base Types +# + +cdef LLong _number (object pfile): + cdef UChar n + cdef LLong value + cdef int i + + cdef unsigned short count = 1 + cdef FILE* file = PyFile_AsFile(pfile) + + n = _get_byte(file) + + if n < 0xFF: + return n + else: + + n = _get_byte(file) + while (n == 0xFF): + count += 1 + n = _get_byte(file) + + if n == 0: + value = 0xFF # 0xFF is encoded as 0xFF 0x00 + count -= 1 + else: + value = n + + for i in range(count): + value = (value << 8) | (_get_byte(file)) + + return value + +def number (file): + """ + Returns a number. + + :param file: The file to read from + :type file: file + :rtype: int + """ + + return _number(file) + +def vector (file, get_type, nelems = None): + """ + Returns a vector of elements. + + :Parameters: + + file : file + The file to read from. + + get_type : function(file, bool) + The function determining type of the elements. + + nelems : int + Normally the eix-Vector has the number of elements as the first argument. + If for some reason this is not the case, you can pass it in here. + + :rtype: list + """ + + cdef LLong n + cdef LLong i + + if nelems is None: + n = _number(file) + else: + n = nelems + + l = [] + for i in range(n): + l.append(get_type(file)) + + return l + +def string (file): + """ + Returns a string. + + :param file: The file to read from + :type file: file + :rtype: str + """ + cdef LLong nelems + + nelems = _number(file) + + s = file.read(nelems) + + if len(s) != nelems: + raise EndOfFileException, file.name + + return s + +# +# Complex Types +# + +cdef class overlay: + """ + Represents an overlay object. + + :IVariables: + + path : string + The path to the overlay + + label : string + The label/name of the overlay + """ + + cdef readonly object path + cdef readonly object label + + def __init__ (self, file): + """ + :param file: The file to read from + :type file: file + """ + + self.path = string(file) + self.label = string(file) + +cdef class header: + """ + Represents the header of the cache. + + :IVariables: + + version : int + The version of the cache file. + + ncats : int + The number of categories. + + overlays : `overlay` [] + The list of overlays. + + provide : string[] + A list of "PROVIDE" values. + + licenses : string[] + The list of licenses. + + keywords : string[] + The list of keywords. + + useflags : string[] + The list of useflags. + + slots : string[] + The list of slots different from "0". + + sets : string[] + The names of world sets are the names (without leading @) of the world sets stored in /var/lib/portage/world_sets. + If SAVE_WORLD=false, the list is empty. + """ + + cdef readonly object version + cdef readonly object ncats + cdef readonly object overlays + cdef readonly object provide + cdef readonly object licenses + cdef readonly object keywords + cdef readonly object useflags + cdef readonly object slots + cdef readonly object sets + + def __init__ (self, file): + """ + :param file: The file to read from + :type file: file + """ + self.version = number(file) + self.ncats = number(file) + self.overlays = vector(file, overlay) + self.provide = vector(file, string) + self.licenses = vector(file, string) + self.keywords = vector(file, string) + self.useflags = vector(file, string) + self.slots = vector(file, string) + self.sets = vector(file, string) + +cdef class package: + """ + The representation of one package. + + Currently, version information is not parsed and stored. + So you can gain general infos only. + + :IVariables: + + name : string + The name of the package. + + description : string + Description of the package. + + homepage : string + The homepage of the package. + + provide : int[] + The indices of `header.provide` representing the PROVIDE value of the package. + + license : int + The index of `header.licenses` representing the license of the package. + + useflags : int[] + The indices of `header.useflags` representing the IUSE value of the package. + """ + + cdef readonly object _offset + cdef readonly object name + cdef readonly object description + cdef readonly object provide + cdef readonly object homepage + cdef readonly object license + cdef readonly object useflags + + def __init__ (self, file): + """ + :param file: The file to read from + :type file: file + """ + self._offset = number(file) + + after_offset = file.tell() + + self.name = string(file) + self.description = string(file) + self.provide = vector(file, number) + self.homepage = string(file) + self.license = number(file) + self.useflags = vector(file, number) + + # self.versions = LE(typed_vector(version)) + # for the moment just skip the versions + file.seek(self._offset - (file.tell() - after_offset), SEEK_CUR) + +cdef class category: + """ + Represents a whole category. + + :IVariables: + + name : string + The category name. + + packages : `package` [] + All the packages of the category. + """ + + cdef readonly object name + cdef readonly object packages + + def __init__ (self, file): + """ + :param file: The file to read from + :type file: file + """ + self.name = string(file) + self.packages = vector(file, package) diff --git a/portato/eix/py_parser.py b/portato/eix/py_parser.py new file mode 100644 index 0000000..cc42553 --- /dev/null +++ b/portato/eix/py_parser.py @@ -0,0 +1,416 @@ +# -*- coding: utf-8 -*- +# +# File: portato/eix/parser.py +# This file is part of the Portato-Project, a graphical portage-frontend. +# +# Copyright (C) 2006-2009 René 'Necoro' Neumann +# This is free software. You may redistribute copies of it under the terms of +# the GNU General Public License version 2. +# There is NO WARRANTY, to the extent permitted by law. +# +# Written by René 'Necoro' Neumann + +""" +The cache file supports different types of data. +In this module (nearly) all of these types have a corresponding function. + +For the exact way all the functions work, have a look at the eix format description. +""" + +from __future__ import absolute_import, with_statement +__docformat__ = "restructuredtext" + +import os +import struct +from functools import partial + +from ..helper import debug +from .exceptions import EndOfFileException + +# +# Helper +# + +def _get_bytes (file, length, expect_list = False): + """ + Return a number of bytes. + + :Parameters: + + file : file + The file to read from. + + length : int + The number of bytes to read. + + expect_list : bool + In case ``length`` is 1, only a single byte is returned. If ``expect_list`` is true, then a list is also returned in this case. + + :rtype: int or int[] + :raises EndOfFileException: if EOF is reached during execution + """ + + s = file.read(length) + + if len(s) != length: + raise EndOfFileException, file.name + + if length == 1 and not expect_list: + return ord(s) # is faster than unpack and we have a scalar + else: + return struct.unpack("%sB" % length, s) + +# +# Base Types +# + +def number (file, skip = False): + """ + Returns a number. + + :Parameters: + + file : file + The file to read from. + + skip : bool + Do not return the actual value, but just skip to the next datum. + + :rtype: int + """ + + n = _get_bytes(file, 1) + + if n < 0xFF: + value = n + else: + count = 0 + + while (n == 0xFF): + count += 1 + n = _get_bytes(file, 1) + + if n == 0: + n = 0xFF # 0xFF is encoded as 0xFF 0x00 + count -= 1 + + value = n << (count*8) + + if count > 0: + + if skip: + file.seek(count, os.SEEK_CUR) + return + + else: + rest = _get_bytes(file, count, expect_list = True) + + for i, r in enumerate(rest): + value += r << ((count - i - 1)*8) + + return value + +def vector (file, get_type, skip = False, nelems = None): + """ + Returns a vector of elements. + + :Parameters: + + file : file + The file to read from. + + get_type : function(file, bool) + The function determining type of the elements. + + skip : bool + Do not return the actual value, but just skip to the next datum. + + nelems : int + Normally the eix-Vector has the number of elements as the first argument. + If for some reason this is not the case, you can pass it in here. + + :rtype: list + """ + + if nelems is None: + nelems = number(file) + + if skip: + for i in range(nelems): + get_type(file, skip = True) + else: + return [get_type(file) for i in range(nelems)] + +def typed_vector(type, nelems = None): + """ + Shortcut to create a function for a special type of vector. + + :Parameters: + + type : function(file, bool) + The function determining type of the elements. + + nelems : int + Normally the eix-Vector has the number of elements as the first argument. + If for some reason this is not the case, you can pass it in here. + Do not return the actual value, but just skip to the next datum. + + :rtype: function(file, bool) + :see: `vector` + """ + + if nelems is None: + return partial(vector, get_type = type) + else: + return partial(vector, get_type = type, nelems = nelems) + +def string (file, skip = False): + """ + Returns a string. + + :Parameters: + + file : file + The file to read from. + + skip : bool + Do not return the actual value, but just skip to the next datum. + + :rtype: str + """ + nelems = number(file) + + if skip: + file.seek(nelems, os.SEEK_CUR) + return + else: + s = file.read(nelems) + + if len(s) != nelems: + raise EndOfFileException, file.name + + return s + +# +# Complex Types +# + +class LazyElement (object): + """ + This class models a value in the cache, which is only read on access. + + If not accessed directly, only the position inside the file is stored. + """ + __slots__ = ("file", "get_type", "_value", "pos") + + def __init__ (self, get_type, file): + """ + :Parameters: + + get_type : function(file, bool) + The function determining type of the elements. + + file : file + The file to read from. + """ + + self.file = file + self.get_type = get_type + self._value = None + + self.pos = file.tell() + get_type(file, skip=True) # skip it for the moment + + @property + def value (self): + """ + The value of the element. + """ + + if self._value is None: + old_pos = self.file.tell() + self.file.seek(self.pos, os.SEEK_SET) + self._value = self.get_type(self.file, skip = False) + self.file.seek(old_pos, os.SEEK_SET) + + return self._value + + def __call__ (self): + """ + Convenience function. Also returns the value. + """ + return self.value + +class overlay (object): + """ + Represents an overlay object. + + :IVariables: + + path : `LazyElement` + The path to the overlay + + label : `LazyElement` + The label/name of the overlay + """ + __slots__ = ("path", "label") + + def __init__ (self, file, skip = False): + """ + :Parameters: + + file : file + The file to read from. + + skip : bool + Do not return the actual value, but just skip to the next datum. + """ + + self.path = LazyElement(string, file) + self.label = LazyElement(string, file) + +class header (object): + """ + Represents the header of the cache. + + :IVariables: + + version : `LazyElement` + The version of the cache file. + + ncats : `LazyElement` + The number of categories. + + overlays : `LazyElement` <`overlay` []> + The list of overlays. + + provide : `LazyElement` + A list of "PROVIDE" values. + + licenses : `LazyElement` + The list of licenses. + + keywords : `LazyElement` + The list of keywords. + + useflags : `LazyElement` + The list of useflags. + + slots : `LazyElement` + The list of slots different from "0". + + sets : `LazyElement` + The names of world sets are the names (without leading @) of the world sets stored in /var/lib/portage/world_sets. + If SAVE_WORLD=false, the list is empty. + """ + __slots__ = ("version", "ncats", "overlays", "provide", + "licenses", "keywords", "useflags", "slots", "sets") + + def __init__ (self, file, skip = False): + """ + :Parameters: + + file : file + The file to read from. + + skip : bool + Do not return the actual value, but just skip to the next datum. + """ + def LE (t): + return LazyElement(t, file) + + self.version = LE(number) + self.ncats = LE(number) + self.overlays = LE(typed_vector(overlay)) + self.provide = LE(typed_vector(string)) + self.licenses = LE(typed_vector(string)) + self.keywords = LE(typed_vector(string)) + self.useflags = LE(typed_vector(string)) + self.slots = LE(typed_vector(string)) + self.sets = LE(typed_vector(string)) + +class package (object): + """ + The representation of one package. + + Currently, version information is not parsed and stored. + So you can gain general infos only. + + :IVariables: + + name : `LazyElement` + The name of the package. + + description : `LazyElement` + Description of the package. + + homepage : `LazyElement` + The homepage of the package. + + provide : `LazyElement` + The indices of `header.provide` representing the PROVIDE value of the package. + + license : `LazyElement` + The index of `header.licenses` representing the license of the package. + + useflags : `LazyElement` + The indices of `header.useflags` representing the IUSE value of the package. + """ + + __slots__ = ("_offset", "name", "description", "provide", + "homepage", "license", "useflags") + + def __init__ (self, file, skip = False): + """ + :Parameters: + + file : file + The file to read from. + + skip : bool + Do not return the actual value, but just skip to the next datum. + """ + def LE (t): + return LazyElement(t, file) + + self._offset = number(file) + + after_offset = file.tell() + + self.name = LE(string) + self.description = LE(string) + self.provide = LE(typed_vector(number)) + self.homepage = LE(string) + self.license = LE(number) + self.useflags = LE(typed_vector(number)) + + # self.versions = LE(typed_vector(version)) + # for the moment just skip the versions + file.seek(self._offset - (file.tell() - after_offset), os.SEEK_CUR) + +class category (object): + """ + Represents a whole category. + + :IVariables: + + name : `LazyElement` + The category name. + + packages : `LazyElement` <`package` []> + All the packages of the category. + """ + __slots__ = ("name", "packages") + + def __init__ (self, file, skip = False): + """ + :Parameters: + + file : file + The file to read from. + + skip : bool + Do not return the actual value, but just skip to the next datum. + """ + self.name = LazyElement(string, file) + self.packages = LazyElement(typed_vector(package), file) -- cgit v1.2.3-54-g00ecf