521 lines
16 KiB
Python
521 lines
16 KiB
Python
"""Contains the functions used to read the input file and print the checkpoint
|
|
files with xml formatting.
|
|
|
|
Copyright (C) 2013, Joshua More and Michele Ceriotti
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http.//www.gnu.org/licenses/>.
|
|
|
|
|
|
Functions:
|
|
xml_node: Class to handle a particular xml tag.
|
|
xml_handler: Class giving general xml data reading methods.
|
|
xml_parse_string: Parses a string made from a section of a xml input file.
|
|
xml_parse_file: Parses an entire xml input file.
|
|
read_type: Reads a string and outputs data of a specified type.
|
|
read_float: Reads a string and outputs a float.
|
|
read_int: Reads a string and outputs an integer.
|
|
read_bool: Reads a string and outputs a boolean.
|
|
read_list: Reads a string and outputs a list.
|
|
read_array: Reads a string and outputs an array.
|
|
read_tuple: Reads a string and outputs a tuple.
|
|
read_dict: Reads a string and outputs a dictionary.
|
|
write_type: Writes a string from data of a specified type.
|
|
write_list: Writes a string from a list.
|
|
write_tuple: Writes a string from a tuple.
|
|
write_float: Writes a string from a float.
|
|
write_bool: Writes a string from a boolean.
|
|
write_dict: Writes a string from a dictionary.
|
|
"""
|
|
|
|
__all__ = ['xml_node', 'xml_handler', 'xml_parse_string', 'xml_parse_file',
|
|
'read_type', 'read_float', 'read_int', 'read_bool', 'read_list',
|
|
'read_array', 'read_tuple', 'read_dict', 'write_type', 'write_list',
|
|
'write_tuple', 'write_float', 'write_bool', 'write_dict']
|
|
|
|
from xml.sax import parseString, parse
|
|
from xml.sax.handler import ContentHandler
|
|
import numpy as np
|
|
import string
|
|
|
|
class xml_node(object):
|
|
"""Class to handle a particular xml tag.
|
|
|
|
Tags are generally written in the form
|
|
<tag_name attribs="attrib_data"> main_data </tag_name>. This class holds
|
|
tag_name, attrib_data and main_data separately so they can be used to
|
|
create the objects with the appropriate names and data.
|
|
|
|
Attributes:
|
|
attribs: The attribute data for the tag.
|
|
fields: The rest of the data.
|
|
name: The tag name.
|
|
"""
|
|
|
|
def __init__(self, attribs=None, name="", fields=None):
|
|
"""Initialises xml_node.
|
|
|
|
Args:
|
|
attribs: An optional dictionary giving attribute data. Defaults to {}.
|
|
fields: An optional dictionary holding all the data between the start
|
|
and end tags, including information about other nodes.
|
|
Defaults to {}.
|
|
name: An optional string giving the tag name. Defaults to ''.
|
|
"""
|
|
|
|
if attribs is None:
|
|
attribs = {}
|
|
if fields is None:
|
|
fields = []
|
|
|
|
self.attribs = attribs
|
|
self.name = name
|
|
self.fields = fields
|
|
|
|
|
|
class xml_handler(ContentHandler):
|
|
"""Class giving general xml_reading methods.
|
|
|
|
Uses the standard python xml_reader to read the different kinds of data.
|
|
Keeps track of the heirarchial nature of an xml file by recording the level
|
|
of nesting, so that the correct data and attributes can be associated with
|
|
the correct tag name.
|
|
|
|
Attributes:
|
|
root: An xml_node object for the root node.
|
|
open: The list of the tags that the parser is currently between the start
|
|
and end tags of.
|
|
level: The level of nesting that the parser is currently at.
|
|
buffer: A list of the data found between the tags at the different levels
|
|
of nesting.
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialises xml_handler."""
|
|
|
|
#root xml node with all the data
|
|
self.root = xml_node(name="root", fields=[])
|
|
self.open = [self.root]
|
|
#current level of the hierarchy
|
|
self.level = 0
|
|
#Holds all the data between each of the tags.
|
|
#If level = 1, then buffer[0] holds all the data collected between the
|
|
#root tags, and buffer[1] holds all the data collected between the
|
|
#first child tag.
|
|
self.buffer = [[""]]
|
|
|
|
def startElement(self, name, attrs):
|
|
"""Reads an opening tag.
|
|
|
|
Adds the opening tag to the list of open tags, adds a new space in the
|
|
buffer, reads the appropriate attributes and adds a new level to the
|
|
heirarchy.
|
|
|
|
Args:
|
|
name: The tag_name.
|
|
attrs: The attribute data.
|
|
"""
|
|
|
|
#creates a new node
|
|
newnode = xml_node(attribs=dict((k,attrs[k]) for k in attrs.keys()), name=name, fields=[])
|
|
#adds it to the list of open nodes
|
|
self.open.append(newnode)
|
|
#adds it to the list of fields of the parent tag
|
|
self.open[self.level].fields.append((name,newnode))
|
|
#gets ready to read new data
|
|
self.buffer.append([""])
|
|
self.level += 1
|
|
|
|
def characters(self, data):
|
|
"""Reads data.
|
|
|
|
Adds the data to the buffer of the current level of the heirarchy.
|
|
Data is read as a string, and needs to be converted to the required
|
|
type later.
|
|
|
|
Args:
|
|
data: The data to be read.
|
|
"""
|
|
|
|
self.buffer[self.level].append(data)
|
|
|
|
def endElement(self, name):
|
|
"""Reads a closing tag.
|
|
|
|
Once all the data has been read, and the closing tag found, the buffer
|
|
is read into the appropriate field.
|
|
|
|
Args:
|
|
name: The tag_name.
|
|
"""
|
|
|
|
#all the text found between the tags stored in the appropriate xml_node
|
|
#object
|
|
self.buffer[self.level] = ''.join(self.buffer[self.level])
|
|
self.open[self.level].fields.append(("_text" , self.buffer[self.level]))
|
|
#'closes' the xml_node object, as we are no longer within its tags, so
|
|
#there is no more data to be added to it.
|
|
#Note that the xml_node is still held within the parent tag, so we
|
|
#no longer require this xml node object.
|
|
self.buffer.pop(self.level)
|
|
self.open.pop(self.level)
|
|
self.level -= 1
|
|
|
|
def xml_parse_string(buf):
|
|
"""Parses a string made from a section of a xml input file.
|
|
|
|
Args:
|
|
buf: A string in correct xml format.
|
|
|
|
Returns:
|
|
A xml_node for the root node of the file.
|
|
"""
|
|
|
|
myhandle = xml_handler()
|
|
parseString(buf, myhandle)
|
|
return myhandle.root
|
|
|
|
def xml_parse_file(stream):
|
|
"""Parses an entire xml input file.
|
|
|
|
Args:
|
|
stream: A string describing a xml formatted file.
|
|
|
|
Returns:
|
|
A xml_node for the root node of the file.
|
|
"""
|
|
|
|
myhandle = xml_handler()
|
|
parse(stream, myhandle)
|
|
return myhandle.root
|
|
|
|
def read_type(type, data):
|
|
"""Reads a string and outputs data of a specified type.
|
|
|
|
Args:
|
|
type: The data type of the target container.
|
|
data: The string to be read in.
|
|
|
|
Raises:
|
|
TypeError: Raised if it tries to read into a data type that has not been
|
|
implemented.
|
|
|
|
Returns:
|
|
An object of type type.
|
|
"""
|
|
|
|
if not type in readtype_funcs:
|
|
raise TypeError("Conversion not available for given type")
|
|
return type(readtype_funcs[type](data))
|
|
|
|
def read_float(data):
|
|
"""Reads a string and outputs a float.
|
|
|
|
Args:
|
|
data: The string to be read in.
|
|
|
|
Raises:
|
|
ValueError: Raised if the input data is not of the correct format.
|
|
|
|
Returns:
|
|
A float.
|
|
"""
|
|
|
|
return float(data)
|
|
|
|
def read_int(data):
|
|
"""Reads a string and outputs a integer.
|
|
|
|
Args:
|
|
data: The string to be read in.
|
|
|
|
Raises:
|
|
ValueError: Raised if the input data is not of the correct format.
|
|
|
|
Returns:
|
|
An integer.
|
|
"""
|
|
|
|
return int(data)
|
|
|
|
def read_bool(data):
|
|
"""Reads a string and outputs a boolean.
|
|
|
|
Takes a string of the form 'true' or 'false', and returns the appropriate
|
|
boolean.
|
|
|
|
Args:
|
|
data: The string to be read in.
|
|
|
|
Raises:
|
|
ValueError: Raised if the string is not 'true' or 'false'.
|
|
|
|
Returns:
|
|
A boolean.
|
|
"""
|
|
|
|
|
|
if data.strip().upper() == "TRUE":
|
|
return True
|
|
elif data.strip().upper() == "FALSE":
|
|
return False
|
|
else:
|
|
raise ValueError(data + " does not represent a bool value")
|
|
|
|
def read_list(data, delims="[]", split=",", strip=" \n\t'"):
|
|
"""Reads a formatted string and outputs a list.
|
|
|
|
The string must be formatted in the correct way.
|
|
The start character must be delimiters[0], the end character
|
|
must be delimiters[1] and each element must be split along
|
|
the character split. Characters at the beginning or
|
|
end of each element in strip are ignored. The standard list format is of the
|
|
form '[array[0], array[1],..., array[n]]', which is used for actual lists.
|
|
Other formats are used for tuples and dictionaries.
|
|
|
|
Args:
|
|
data: The string to be read in. '[]' by default.
|
|
delims: A string of two characters giving the first and last character of
|
|
the list format. ',' by default.
|
|
split: The character between different elements of the list format.
|
|
strip: Characters to be removed from the beginning and end of each
|
|
element. ' \n\t' by default.
|
|
|
|
Raises:
|
|
ValueError: Raised if the input data is not of the correct format.
|
|
|
|
Returns:
|
|
A list of strings.
|
|
"""
|
|
|
|
try:
|
|
begin = data.index(delims[0])
|
|
end = data.index(delims[1])
|
|
except ValueError:
|
|
raise ValueError("Error in list syntax: could not locate delimiters")
|
|
|
|
rlist = data[begin+1:end].split(split)
|
|
for i in range(len(rlist)):
|
|
rlist[i] = rlist[i].strip(strip)
|
|
|
|
# handles empty lists correctly
|
|
if len(rlist) == 1 and rlist[0] == "":
|
|
rlist = []
|
|
|
|
return rlist
|
|
|
|
def read_array(dtype, data):
|
|
"""Reads a formatted string and outputs an array.
|
|
|
|
The format is as for standard python arrays, which is
|
|
[array[0], array[1], ... , array[n]]. Note the use of comma separators, and
|
|
the use of square brackets.
|
|
|
|
Args:
|
|
data: The string to be read in.
|
|
dtype: The data type of the elements of the target array.
|
|
|
|
Raises:
|
|
ValueError: Raised if the input data is not of the correct format.
|
|
|
|
Returns:
|
|
An array of data type dtype.
|
|
"""
|
|
|
|
rlist = read_list(data)
|
|
for i in range(len(rlist)):
|
|
rlist[i] = read_type(dtype,rlist[i])
|
|
|
|
return np.array(rlist, dtype)
|
|
|
|
def read_tuple(data, delims="()", split=",", strip=" \n\t'", arg_type=int):
|
|
"""Reads a formatted string and outputs a tuple.
|
|
|
|
The format is as for standard python tuples, which is
|
|
(tuple[0], tuple[1], ... , tuple[n]). Note the comma
|
|
separators, and the use of brackets.
|
|
|
|
Args:
|
|
data: The string to be read in.
|
|
delims: A string of two characters giving the first and last character of
|
|
the list format. ',' by default.
|
|
split: The character between different elements of the list format.
|
|
strip: Characters to be removed from the beginning and end of each
|
|
element. ' \n\t' by default.
|
|
arg_type: The strings in the input will be converted, and a tuple
|
|
of ar_type will be returned.
|
|
|
|
Raises:
|
|
ValueError: Raised if the input data is not of the correct format.
|
|
|
|
Returns:
|
|
A tuple of elements of the specified data type.
|
|
"""
|
|
|
|
rlist = read_list(data, delims=delims, split=split, strip=strip)
|
|
return tuple([arg_type(i) for i in rlist])
|
|
|
|
def read_dict(data, delims="{}", split=",", key_split=":", strip=" \n\t"):
|
|
"""Reads a formatted string and outputs a dictionary.
|
|
|
|
The format is as for standard python dictionaries, which is
|
|
{keyword[0]: arg[0], keyword[1]: arg[1], ... , keyword[n]: arg[n]}. Note the
|
|
comma separators, and the use of curly brackets.
|
|
|
|
Args:
|
|
data: The string to be read in.
|
|
delims: A string of two characters giving the first and last character of
|
|
the list format. ',' by default.
|
|
split: The character between different elements of the list format.
|
|
key_split: The character between the key word and the value.
|
|
strip: Characters to be removed from the beginning and end of each
|
|
element. ' \n\t' by default.
|
|
|
|
Raises:
|
|
ValueError: Raised if the input data is not of the correct format.
|
|
|
|
Returns:
|
|
A dictionary of strings.
|
|
"""
|
|
|
|
rlist = read_list(data, delims=delims, split=split, strip=strip)
|
|
def mystrip(data):
|
|
return data.strip(strip)
|
|
rdict = {}
|
|
for s in rlist:
|
|
rtuple = map(mystrip,s.split(key_split))
|
|
if not len(rtuple) == 2:
|
|
raise ValueError("Format for a key:value format is wrong for item " + s)
|
|
rdict[rtuple[0]] = rtuple[1]
|
|
|
|
return rdict
|
|
|
|
readtype_funcs = {np.ndarray: read_array, dict: read_dict, float: read_float, int: read_int, bool: read_bool, str: string.strip, tuple: read_tuple, np.uint : read_int}
|
|
|
|
def write_type(type, data):
|
|
"""Writes a formatted string from a value of a specified type.
|
|
|
|
Args:
|
|
type: The data type of the value.
|
|
data: The value to be read in.
|
|
|
|
Raises:
|
|
TypeError: Raised if it tries to write from a data type that has not been
|
|
implemented.
|
|
|
|
Returns:
|
|
A formatted string.
|
|
"""
|
|
|
|
if not type in writetype_funcs:
|
|
raise TypeError("Conversion not available for given type")
|
|
return writetype_funcs[type](data)
|
|
|
|
def write_list(data, delims="[]"):
|
|
"""Writes a formatted string from a list.
|
|
|
|
The format of the output is as for a standard python list,
|
|
[list[0], list[1],..., list[n]]. Note the space after the commas, and the
|
|
use of square brackets.
|
|
|
|
Args:
|
|
data: The value to be read in.
|
|
delims: An optional string of two characters giving the first and last
|
|
character to be printed. Defaults to "[]".
|
|
|
|
Returns:
|
|
A formatted string.
|
|
"""
|
|
|
|
rstr = delims[0]
|
|
|
|
for v in data:
|
|
rstr += str(v) + ", "
|
|
|
|
rstr = rstr.rstrip(", ")
|
|
rstr += delims[1]
|
|
return rstr
|
|
|
|
def write_tuple(data):
|
|
"""Writes a formatted string from a tuple.
|
|
|
|
The format of the output is as for a standard python tuple,
|
|
(tuple[0], tuple[1],..., tuple[n]). Note the space after the commas, and the
|
|
use of brackets.
|
|
|
|
Args:
|
|
data: The value to be read in.
|
|
|
|
Returns:
|
|
A formatted string.
|
|
"""
|
|
|
|
return write_list(data, delims="()")
|
|
|
|
def write_float(data):
|
|
"""Writes a formatted string from a float.
|
|
|
|
Floats are printed out in exponential format, to 8 decimal places and
|
|
filling up any spaces under 16 not used with spaces.
|
|
|
|
For example 1.0 --> ' 1.00000000e+00'
|
|
|
|
Args:
|
|
data: The value to be read in.
|
|
|
|
Returns:
|
|
A formatted string.
|
|
"""
|
|
|
|
return "%16.8e" % (data)
|
|
|
|
def write_bool(data):
|
|
"""Writes a formatted string from a float.
|
|
|
|
Booleans are printed as a string of either ' true' or 'false'. Note that
|
|
both are printed out as exactly 5 characters.
|
|
|
|
Args:
|
|
data: The value to be read in.
|
|
|
|
Returns:
|
|
A formatted string.
|
|
"""
|
|
|
|
return "%5.5s" % (str(data))
|
|
|
|
def write_dict(data, delims="{}"):
|
|
"""Writes a formatted string from a dictionary.
|
|
|
|
The format of the output is as for a standard python dictionary,
|
|
{keyword[0]: arg[0], keyword[1]: arg[1],..., keyword[n]: arg[n]}. Note the
|
|
space after the commas, and the use of curly brackets.
|
|
|
|
Args:
|
|
data: The value to be read in.
|
|
delims: An optional string of two characters giving the first and last
|
|
character to be printed. Defaults to "{}".
|
|
|
|
Returns:
|
|
A formatted string.
|
|
"""
|
|
|
|
rstr = delims[0]
|
|
for v in data:
|
|
rstr += str(v) + ": " + str(data[v]) + ", "
|
|
rstr = rstr.strip(", ")
|
|
rstr += delims[1]
|
|
return rstr
|
|
|
|
writetype_funcs = {float: write_float, dict: write_dict, int: str, bool: write_bool, str: string.strip, tuple: write_tuple, np.uint : str}
|