/usr/share/doc/python-tables-doc/bench/stress-test3.py

#!/usr/bin/env python

""" This script allows to create arbitrarily large files with the desired
combination of groups, tables per group and rows per table.

Issue "python stress-test3.py" without parameters for a help on usage.

"""

import sys, time, random, gc
import numarray
from tables import *

class Test(IsDescription):
    ngroup = Int32Col(pos=1)
    ntable = Int32Col(pos=2)
    nrow = Int32Col(pos=3)
    string = StringCol(length=500, pos = 4)

def createFileArr(filename, ngroups, ntables, nrows):

    # First, create the groups

    # Open a file in "w"rite mode
    fileh = openFile(filename, mode="w", title="PyTables Stress Test")

    for k in range(ngroups):
        # Create the group
        group = fileh.createGroup("/", 'group%04d'% k, "Group %d" % k)

    fileh.close()

    return (0, 4)

def readFileArr(filename, ngroups, recsize, verbose):

    rowsread = 0
    for ngroup in range(ngroups):
        fileh = openFile(filename, mode="r", rootUEP='group%04d'% ngroup)
        # Get the group
        group = fileh.root
        ntable = 0
        if verbose:
            print "Group ==>", group
        for table in fileh.listNodes(group, 'Array'):
            if verbose > 1:
                print "Array ==>", table
                print "Rows in", table._v_pathname, ":", table.shape

            nrow = 0
            arr = table.read()

            rowsread += len(arr)
            ntable += 1

        # Close the file (eventually destroy the extended type)
        fileh.close()

    return (rowsread, 4, 0)

def createFile(filename, ngroups, ntables, nrows, complevel, complib, recsize):

    # First, create the groups

    # Open a file in "w"rite mode
    fileh = openFile(filename, mode="w", title="PyTables Stress Test")

    for k in range(ngroups):
        # Create the group
        group = fileh.createGroup("/", 'group%04d'% k, "Group %d" % k)

    fileh.close()

    # Now, create the tables
    rowswritten = 0
    for k in range(ngroups):
        fileh = openFile(filename, mode="a", rootUEP='group%04d'% k)
        # Get the group
        group = fileh.root
        for j in range(ntables):
            # Create a table
            table = fileh.createTable(group, 'table%04d'% j, Test,
                                      'Table%04d'%j,
                                      Filters(complevel, complib), nrows)
            rowsize = table.rowsize
            # Get the row object associated with the new table
            row = table.row
            # Fill the table
            for i in xrange(nrows):
                row['ngroup'] = k
                row['ntable'] = j
                row['nrow'] = i
                row.append()

            rowswritten += nrows
            table.flush()

        # Close the file
        fileh.close()

    return (rowswritten, rowsize)

def readFile(filename, ngroups, recsize, verbose):
    # Open the HDF5 file in read-only mode

    rowsread = 0
    for ngroup in range(ngroups):
        fileh = openFile(filename, mode="r", rootUEP='group%04d'% ngroup)
        # Get the group
        group = fileh.root
        ntable = 0
        if verbose:
            print "Group ==>", group
        for table in fileh.listNodes(group, 'Table'):
            rowsize = table.rowsize
            buffersize=table.rowsize * table.nrowsinbuf
            if verbose > 1:
                print "Table ==>", table
                print "Max rows in buf:", table.nrowsinbuf
                print "Rows in", table._v_pathname, ":", table.nrows
                print "Buffersize:", table.rowsize * table.nrowsinbuf
                print "MaxTuples:", table.nrowsinbuf

            nrow = 0
            for row in table:
                try:
                    assert row["ngroup"] == ngroup
                    assert row["ntable"] == ntable
                    assert row["nrow"] == nrow
                except:
                    print "Error in group: %d, table: %d, row: %d" % \
                          (ngroup, ntable, nrow)
                    print "Record ==>", row
                nrow += 1

            assert nrow == table.nrows
            rowsread += table.nrows
            ntable += 1

        # Close the file (eventually destroy the extended type)
        fileh.close()

    return (rowsread, rowsize, buffersize)

def dump_garbage():
    """
    show us waht the garbage is about
    """
    # Force collection
    print "\nGARBAGE:"
    gc.collect()

    print "\nGARBAGE OBJECTS:"
    for x in gc.garbage:
        s = str(x)
        #if len(s) > 80: s = s[:77] + "..."
        print type(x),"\n   ", s

if __name__=="__main__":
    import getopt
    try:
        import psyco
        psyco_imported = 1
    except:
        psyco_imported = 0


    usage = """usage: %s [-d debug] [-v level] [-p] [-r] [-w] [-l complib] [-c complevel] [-g ngroups] [-t ntables] [-i nrows] file
    -d debugging level
    -v verbosity level
    -p use "psyco" if available
    -a use Array objects instead of Table
    -r only read test
    -w only write test
    -l sets the compression library to be used ("zlib", "lzo", "ucl", "bzip2")
    -c sets a compression level (do not set it or 0 for no compression)
    -g number of groups hanging from "/"
    -t number of tables per group
    -i number of rows per table
"""

    try:
        opts, pargs = getopt.getopt(sys.argv[1:], 'd:v:parwl:c:g:t:i:')
    except:
        sys.stderr.write(usage)
        sys.exit(0)

    # if we pass too much parameters, abort
    if len(pargs) <> 1:
        sys.stderr.write(usage)
        sys.exit(0)

    # default options
    ngroups = 5
    ntables = 5
    nrows = 100
    verbose = 0
    debug = 0
    recsize = "medium"
    testread = 1
    testwrite = 1
    usepsyco = 0
    usearray = 0
    complevel = 0
    complib = "zlib"

    # Get the options
    for option in opts:
        if option[0] == '-d':
            debug = int(option[1])
        if option[0] == '-v':
            verbose = int(option[1])
        if option[0] == '-p':
            usepsyco = 1
        if option[0] == '-a':
            usearray = 1
        elif option[0] == '-r':
            testwrite = 0
        elif option[0] == '-w':
            testread = 0
        elif option[0] == '-l':
            complib = option[1]
        elif option[0] == '-c':
            complevel = int(option[1])
        elif option[0] == '-g':
            ngroups = int(option[1])
        elif option[0] == '-t':
            ntables = int(option[1])
        elif option[0] == '-i':
            nrows = int(option[1])

    if debug:
        gc.enable()
        gc.set_debug(gc.DEBUG_LEAK)

    # Catch the hdf5 file passed as the last argument
    file = pargs[0]

    print "Compression level:", complevel
    if complevel > 0:
        print "Compression library:", complib
    if testwrite:
        t1 = time.time()
        cpu1 = time.clock()
        if psyco_imported and usepsyco:
            psyco.bind(createFile)
        if usearray:
            (rowsw, rowsz) = createFileArr(file, ngroups, ntables, nrows)
        else:
            (rowsw, rowsz) = createFile(file, ngroups, ntables, nrows,
                                        complevel, complib, recsize)
        t2 = time.time()
        cpu2 = time.clock()
        tapprows = round(t2-t1, 3)
        cpuapprows = round(cpu2-cpu1, 3)
        tpercent = int(round(cpuapprows/tapprows, 2)*100)
        print "Rows written:", rowsw, " Row size:", rowsz
        print "Time writing rows: %s s (real) %s s (cpu)  %s%%" % \
              (tapprows, cpuapprows, tpercent)
        print "Write rows/sec: ", int(rowsw / float(tapprows))
        print "Write KB/s :", int(rowsw * rowsz / (tapprows * 1024))

    if testread:
        t1 = time.time()
        cpu1 = time.clock()
        if psyco_imported and usepsyco:
            psyco.bind(readFile)
        if usearray:
            (rowsr, rowsz, bufsz)=readFileArr(file, ngroups, recsize, verbose)
        else:
            (rowsr, rowsz, bufsz) = readFile(file, ngroups, recsize, verbose)
        t2 = time.time()
        cpu2 = time.clock()
        treadrows = round(t2-t1, 3)
        cpureadrows = round(cpu2-cpu1, 3)
        tpercent = int(round(cpureadrows/treadrows, 2)*100)
        print "Rows read:", rowsr, " Row size:", rowsz, "Buf size:", bufsz
        print "Time reading rows: %s s (real) %s s (cpu)  %s%%" % \
              (treadrows, cpureadrows, tpercent)
        print "Read rows/sec: ", int(rowsr / float(treadrows))
        print "Read KB/s :", int(rowsr * rowsz / (treadrows * 1024))

    # Show the dirt
    if debug > 1:
        dump_garbage()
python-tables-doc 2.3.1-3 / usr / share / doc / python-tables-doc / bench / stress-test3.py