Logo Search packages:      
Sourcecode: yum version File versions  Download package

mdcache.py

#!/usr/bin/python -tt
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# Copyright 2004 Duke University 

import os
import sys
import libxml2
import cPickle

import Errors


class RepodataParser:
    def __init__(self, storedir, callback=None):
        self.storedir = storedir
        self.callback = callback
        self.repodata = {
            'metadata': {}, 
            'filelists': {},
            'otherdata': {}
        }
        self.debug = 0
    
        
    def debugprint(self, msg):
        if self.debug:
            print msg
        
    def _piklFileName(self, location, checksum):
        filename = os.path.basename(location)
        piklfile = os.path.join(self.storedir, filename)
        piklfile = '%s.%s.pickle' % (piklfile, checksum)
        self.debugprint('piklfile=%s' % piklfile)
        return piklfile

    def _pickle(self, outfile, obj):
        self.debugprint('Trying to pickle into %s' % outfile)
        try: outfh = open(outfile, 'w')
        except IOError, e: 
            raise cPickle.PicklingError(e)
        try: cPickle.dump(obj, outfh, cPickle.HIGHEST_PROTOCOL)
        except AttributeError: cPickle.dump(obj, outfh, 1)
        self.debugprint('Pickle successful!')
        outfh.close()

    def _unpickle(self, infile):
        self.debugprint('Trying to unpickle from %s' % infile)
        try: infh = open(infile)
        except IOError, e: raise cPickle.UnpicklingError(e)
        obj = cPickle.load(infh)
        infh.close()
        self.debugprint('Unpickle successful!')
        return obj

    def _killold(self, location):
        filename = os.path.basename(location)
        dirfiles = os.listdir(self.storedir)
        for dirfile in dirfiles:
            if dirfile[-7:] == '.pickle':
                if dirfile[:len(filename)] == filename:
                    oldpickle = os.path.join(self.storedir, dirfile)
                    self.debugprint('removing old pickle file %s' % oldpickle)
                    try: os.unlink(oldpickle)
                    except OSError:
                        ## Give an error or something
                        pass
        
    def _getGeneric(self, ident, location, checksum):
        databank = self.repodata[ident]
        if databank: return databank
        if checksum is None:
            ##
            # Pass checksum as None to ignore pickling. This will
            # Go straight to xml files.
            return self.parseDataFromXml(location)
        piklfile = self._piklFileName(location, checksum)
        try: 
            databank = self._unpickle(piklfile)
            self.repodata[ident] = databank
            return databank
        except cPickle.UnpicklingError, e: 
            self.debugprint('Could not unpickle: %s!' % e)
            databank = self.parseDataFromXml(location)
            self._killold(location)
            try: self._pickle(piklfile, databank)
            except cPickle.PicklingError:
                self.debugprint('Could not pickle %s data in %s' % (ident, piklfile))
            return databank
        
    def getPrimary(self, location, checksum):
        return self._getGeneric('metadata', location, checksum)

    def getFilelists(self, location, checksum):
        return self._getGeneric('filelists', location, checksum)

    def getOtherdata(self, location, checksum):
        return self._getGeneric('otherdata', location, checksum)

    def parseDataFromXml(self, fileloc):
        ## TODO: Fail sanely.
        self.debugprint('Parsing data from %s' % fileloc)
        reader = libxml2.newTextReaderFilename(fileloc)
        count = 0
        total = 9999
        mode = None
        databank = None
        while reader.Read():
            if reader.NodeType() != 1: continue
            name = reader.LocalName()
            if name in ('metadata', 'filelists', 'otherdata'):
                mode = name
                databank = self.repodata[mode]
                try: total = int(reader.GetAttribute('packages'))
                except ValueError: pass
            elif name == 'package':
                count += 1
                if mode == 'metadata':
                    obj = PrimaryEntry(reader)
                    pkgid = obj.checksum['value']
                    #if pkgid in databank.keys():
                    #    print 'double detected!'
                    #    print databank[pkgid].nevra, 'vs', obj.nevra
                    if pkgid: databank[pkgid] = obj
                elif mode == 'filelists':
                    pkgid = reader.GetAttribute('pkgid')
                    if pkgid:
                        obj = FilelistsEntry(reader)
                        databank[pkgid] = obj
                elif mode == 'otherdata':
                    pkgid = reader.GetAttribute('pkgid')
                    if pkgid:
                        obj = OtherEntry(reader)
                        databank[pkgid] = obj
                if self.callback: 
                    self.callback.progressbar(count, total, 'MD Read')
        self.debugprint('Parsed %s packages' % count)
        reader.Close()
        del reader
        return databank

class BaseEntry:
    def _props(self, reader):
        if not reader.HasAttributes(): return {}
        propdict = {}
        reader.MoveToFirstAttribute()
        while 1:
            propdict[reader.LocalName()] = reader.Value()
            if not reader.MoveToNextAttribute(): break
        reader.MoveToElement()
        return propdict
        
    def _value(self, reader):
        if reader.IsEmptyElement(): return ''
        val = ''
        while reader.Read():
            if reader.NodeType() == 3: val += reader.Value()
            else: break
        return val

    def _getFileEntry(self, reader):
        type = 'file'
        props = self._props(reader)
        if props.has_key('type'): type = props['type']
        value = self._value(reader)
        return (type, value)

class PrimaryEntry(BaseEntry):
    def __init__(self, reader):
        self.nevra = (None, None, None, None, None)
        self.checksum = {'type': None, 'pkgid': None, 'value': None}
        self.info = {
            'summary': None,
            'description': None,
            'packager': None,
            'url': None,
            'license': None,
            'vendor': None,
            'group': None,
            'buildhost': None,
            'sourcerpm': None
        }
        self.time = {'file': None, 'build': None}
        self.size = {'package': None, 'installed': None, 'archive': None}
        self.location = {'href': None, 'value': None}
        self.hdrange = {'start': None, 'end': None}
        self.prco = {}
        self.files = {}

        n = e = v = r = a = None
        while reader.Read():
            if reader.NodeType() == 15 and reader.LocalName() == 'package':
                break
            if reader.NodeType() != 1: continue
            name = reader.LocalName()
            if name == 'name': n = self._value(reader)
            elif name == 'arch': a = self._value(reader)
            elif name == 'version': 
                evr = self._props(reader)
                (e, v, r) = (evr['epoch'], evr['ver'], evr['rel'])
            elif name in ('summary', 'description', 'packager', 'url'): 
                self.info[name] = self._value(reader)
            elif name == 'checksum': 
                self.checksum = self._props(reader)
                self.checksum['value'] = self._value(reader)
            elif name == 'location': 
                self.location = self._props(reader)
                self.location['value'] = self._value(reader)
            elif name == 'time':
                self.time = self._props(reader)
            elif name == 'size':
                self.size = self._props(reader)
            elif name == 'format': self.setFormat(reader)
        self.nevra = (n, e, v, r, a)

    def dump(self):
        print 'nevra=%s,%s,%s,%s,%s' % self.nevra
        print 'checksum=%s' % self.checksum
        print 'info=%s' % self.info
        print 'time=%s' % self.time
        print 'size=%s' % self.size
        print 'location=%s' % self.location
        print 'hdrange=%s' % self.hdrange
        print 'prco=%s' % self.prco
        print 'files=%s' % self.files

    def setFormat(self, reader):
        while reader.Read():
            if reader.NodeType() == 15 and reader.LocalName() == 'format':
                break
            if reader.NodeType() != 1: continue
            name = reader.LocalName()
            if name in ('license', 'vendor', 'group', 'buildhost',
                        'sourcerpm'):
                self.info[name] = self._value(reader)
            elif name in ('provides', 'requires', 'conflicts', 
                          'obsoletes'):
                self.setPrco(reader)
            elif name == 'header-range':
                self.hdrange = self._props(reader)
            elif name == 'file':
                (type, value) = self._getFileEntry(reader)
                self.files[value] = type

    def setPrco(self, reader):
        members = []
        myname = reader.LocalName()
        while reader.Read():
            if reader.NodeType() == 15 and reader.LocalName() == myname:
                break
            if reader.NodeType() != 1: continue
            name = reader.LocalName()
            members.append(self._props(reader))
        self.prco[myname] = members

class FilelistsEntry(BaseEntry):
    def __init__(self, reader):
        self.files = {}
        while reader.Read():
            if reader.NodeType() == 15 and reader.LocalName() == 'package':
                break
            if reader.NodeType() != 1: continue
            name = reader.LocalName()
            if name == 'file':
                (type, value) = self._getFileEntry(reader)
                self.files[value] = type
                
    def dump(self):
        print 'files=%s' % self.files

class OtherEntry(BaseEntry):
    def __init__(self, reader):
        self.changelog = []
        while reader.Read():
            if reader.NodeType() == 15 and reader.LocalName() == 'package':
                break
            if reader.NodeType() != 1: continue
            name = reader.LocalName()
            if name == 'changelog':
                entry = self._props(reader)
                entry['value'] = self._value(reader)
                self.changelog.append(entry)

    def dump(self):
        print 'changelog=%s' % self.changelog

def test(level, repodir, storedir, checksum):
    import time
    primary = os.path.join(repodir, 'primary.xml')
    filelists = os.path.join(repodir, 'filelists.xml')
    otherdata = os.path.join(repodir, 'other.xml')
    tick = time.time()
    bigtick = tick
    rp = RepodataParser(storedir)
    rp.getPrimary(primary, checksum)
    print 'operation took: %d seconds' % (time.time() - tick)
    print 'primary has %s entries' % len(rp.repodata['metadata'].keys())
    tick = time.time()
    if level == 'filelists' or level == 'other':
        rp.getFilelists(filelists, checksum)
        print 'operation took: %d seconds' % (time.time() - tick)
        print 'filelists has %s entries' % len(rp.repodata['filelists'].keys())
        tick = time.time()
    if level == 'other':
        rp.getOtherdata(otherdata, checksum)
        print 'operation took: %d seconds' % (time.time() - tick)
        print 'otherdata has %s entries' % len(rp.repodata['otherdata'].keys())
    print
    print 'total operation time: %d seconds' % (time.time() - bigtick)

def testusage():
    print 'Usage: %s level repodir storedir checksum' % sys.argv[0]
    print 'level can be primary, filelists, other'
    print 'repodir is the location of .xml files'
    print 'storedir is where pickles will be saved'
    print 'checksum can be anything you want it to be'
    sys.exit(1)
    
if __name__ == '__main__':
    try: (level, repodir, storedir, checksum) = sys.argv[1:]
    except ValueError: testusage()
    if level not in ('primary', 'filelists', 'other'): testusage()
    if checksum == 'None': checksum = None
    test(level, repodir, storedir, checksum)

Generated by  Doxygen 1.6.0   Back to index