BinaryCifReader
Reader methods for the binary CIF format.
Source code in mmcif/io/BinaryCifReader.py
class BinaryCifReader(object):
"""Reader methods for the binary CIF format."""
def __init__(self, storeStringsAsBytes=False, defaultStringEncoding="utf-8"):
"""Create an instance of the binary CIF reader class.
Args:
storeStringsAsBytes (bool, optional): strings are stored as lists of bytes. Defaults to False.
defaultStringEncoding (str, optional): default encoding for string data. Defaults to "utf-8".
"""
self.__storeStringsAsBytes = storeStringsAsBytes
self.__defaultStringEncoding = defaultStringEncoding
def deserialize(self, locator, timeout=None):
"""Deserialize the input binary CIF file stored in the file/URL locator path.
Args:
locator (str): input file path or URL
timeout (float): timeout for fetching a remote url
Returns:
list: list DataContainer objects
"""
cL = []
try:
if self.__isLocal(locator):
with gzip.open(locator, mode="rb") if locator[-3:] == ".gz" else open(locator, "rb") as fh:
cL = self.__deserialize(fh, storeStringsAsBytes=self.__storeStringsAsBytes)
else:
if locator.endswith(".gz"):
customHeader = {"Accept-Encoding": "gzip"}
with closing(requests.get(locator, headers=customHeader, timeout=timeout)) as fh:
ufh = gzip.GzipFile(fileobj=io.BytesIO(fh.content))
cL = self.__deserialize(ufh, storeStringsAsBytes=self.__storeStringsAsBytes)
else:
with closing(requests.get(locator, timeout=timeout)) as fh:
cL = self.__deserialize(io.BytesIO(fh.content), storeStringsAsBytes=self.__storeStringsAsBytes)
except Exception as e:
logger.exception("Failing with %s", str(e))
return cL
def __deserialize(self, fh, storeStringsAsBytes=False):
cL = []
try:
dec = BinaryCifDecoders(storeStringsAsBytes=storeStringsAsBytes)
bD = msgpack.unpack(fh)
#
logger.debug("bD.keys() %r", bD.keys())
logger.debug("bD['dataBlocks'] %s", bD[self.__toBytes("dataBlocks")])
#
for dataBlock in bD[self.__toBytes("dataBlocks")]:
header = self.__fromBytes(dataBlock[self.__toBytes("header")]) if self.__toBytes("header") in dataBlock else None
logger.debug("header %r", header)
logger.debug("dataBlock %r", dataBlock)
#
dc = DataContainer(header)
categoryList = dataBlock[self.__toBytes("categories")] if self.__toBytes("categories") in dataBlock else []
for category in categoryList:
catName = self.__fromBytes(category[self.__toBytes("name")])[1:]
colList = category[self.__toBytes("columns")]
logger.debug("catName %r columns %r", catName, colList)
colD = OrderedDict()
atNameList = []
for col in colList:
logger.debug("col.keys() %r", col.keys())
atName = self.__fromBytes(col[self.__toBytes("name")])
atData = col[self.__toBytes("data")]
logger.debug("atData encoding (%d) data (%d)", len(atData[self.__toBytes("encoding")]), len(atData[self.__toBytes("data")]))
atMask = col[self.__toBytes("mask")]
logger.debug("catName %r atName %r", catName, atName)
logger.debug(" >atData.data %r", atData[self.__toBytes("data")])
logger.debug(" >atData.encoding (%d) %r", len(atData[self.__toBytes("encoding")]), atData[self.__toBytes("encoding")])
logger.debug(" >mask %r", atMask)
tVal = dec.decode(col[self.__toBytes("data")][self.__toBytes("data")], col[self.__toBytes("data")][self.__toBytes("encoding")])
if col[self.__toBytes("mask")]:
mVal = dec.decode(col[self.__toBytes("mask")][self.__toBytes("data")], col[self.__toBytes("mask")][self.__toBytes("encoding")])
tVal = ["?" if m == 2 else "." if m == 1 else d for d, m in zip(tVal, mVal)]
colD[atName] = tVal
atNameList.append(atName)
#
cObj = DataCategory(catName, attributeNameList=atNameList)
genL = [colGen for colGen in colD.values()]
for rowTup in zip(*genL):
row = list(rowTup)
logger.debug("row %r", row)
cObj.append(row)
#
dc.append(cObj)
cL.append(dc)
except Exception as e:
logger.exception("Failing with %s", str(e))
return cL
def __isLocal(self, locator):
"""Returns true if input string can be interpreted as a local file path.
Args:
locator (str): url or path string
Returns:
bool: True if locator is a local path
"""
try:
locSp = urlsplit(locator)
return locSp.scheme in ["", "file"]
except Exception as e:
logger.exception("For locator %r failing with %s", locator, str(e))
return None
def __toBytes(self, strVal):
"""Optional conversion of the input string to bytes according to the class setting (storeStringsAsBytes).
Args:
strVal (string): input string
Returns:
string or bytes: optionally converted string.
"""
try:
return strVal.encode(self.__defaultStringEncoding) if self.__storeStringsAsBytes else strVal
except (UnicodeDecodeError, AttributeError):
logger.exception("Bad type for %r", strVal)
return strVal
def __fromBytes(self, byteVal):
"""Optional conversion of the input value according to the class setting (storeStringsAsBytes).
Args:
byteVal (string): input byte object
Returns:
string: optionally converted input value
"""
try:
return byteVal.decode(self.__defaultStringEncoding) if self.__storeStringsAsBytes else byteVal
except (UnicodeDecodeError, AttributeError):
logger.exception("Bad type for %r", byteVal)
return byteVal
__init__(self, storeStringsAsBytes=False, defaultStringEncoding='utf-8')
special
Create an instance of the binary CIF reader class.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
storeStringsAsBytes |
bool |
strings are stored as lists of bytes. Defaults to False. |
False |
defaultStringEncoding |
str |
default encoding for string data. Defaults to "utf-8". |
'utf-8' |
Source code in mmcif/io/BinaryCifReader.py
def __init__(self, storeStringsAsBytes=False, defaultStringEncoding="utf-8"):
"""Create an instance of the binary CIF reader class.
Args:
storeStringsAsBytes (bool, optional): strings are stored as lists of bytes. Defaults to False.
defaultStringEncoding (str, optional): default encoding for string data. Defaults to "utf-8".
"""
self.__storeStringsAsBytes = storeStringsAsBytes
self.__defaultStringEncoding = defaultStringEncoding
deserialize(self, locator, timeout=None)
Deserialize the input binary CIF file stored in the file/URL locator path.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
locator |
str |
input file path or URL |
required |
timeout |
float |
timeout for fetching a remote url |
None |
Returns:
Type | Description |
---|---|
list |
list DataContainer objects |
Source code in mmcif/io/BinaryCifReader.py
def deserialize(self, locator, timeout=None):
"""Deserialize the input binary CIF file stored in the file/URL locator path.
Args:
locator (str): input file path or URL
timeout (float): timeout for fetching a remote url
Returns:
list: list DataContainer objects
"""
cL = []
try:
if self.__isLocal(locator):
with gzip.open(locator, mode="rb") if locator[-3:] == ".gz" else open(locator, "rb") as fh:
cL = self.__deserialize(fh, storeStringsAsBytes=self.__storeStringsAsBytes)
else:
if locator.endswith(".gz"):
customHeader = {"Accept-Encoding": "gzip"}
with closing(requests.get(locator, headers=customHeader, timeout=timeout)) as fh:
ufh = gzip.GzipFile(fileobj=io.BytesIO(fh.content))
cL = self.__deserialize(ufh, storeStringsAsBytes=self.__storeStringsAsBytes)
else:
with closing(requests.get(locator, timeout=timeout)) as fh:
cL = self.__deserialize(io.BytesIO(fh.content), storeStringsAsBytes=self.__storeStringsAsBytes)
except Exception as e:
logger.exception("Failing with %s", str(e))
return cL
Column oriented Binary CIF decoders implementing StringArray, ByteArray, IntegerPacking, Delta, RunLength, FixedPoint, and IntervalQuantization from the BinaryCIF specification described in:
Sehnal D, Bittrich S, Velankar S, Koca J, Svobodova R, Burley SK, Rose AS. BinaryCIF and CIFTools-Lightweight, efficient and extensible macromolecular data management. PLoS Comput Biol. 2020 Oct 19;16(10):e1008247. doi: 10.1371/journal.pcbi.1008247. PMID: 33075050; PMCID: PMC7595629.
and in the specification at https://github.com/molstar/BinaryCIF/blob/master/encoding.md
and from the I/HM Python implementation at https://github.com/ihmwg/python-ihm[summary]
Source code in mmcif/io/BinaryCifReader.py
class BinaryCifDecoders(object):
"""Column oriented Binary CIF decoders implementing
StringArray, ByteArray, IntegerPacking, Delta, RunLength,
FixedPoint, and IntervalQuantization from the BinaryCIF
specification described in:
Sehnal D, Bittrich S, Velankar S, Koca J, Svobodova R, Burley SK, Rose AS.
BinaryCIF and CIFTools-Lightweight, efficient and extensible macromolecular data management.
PLoS Comput Biol. 2020 Oct 19;16(10):e1008247.
doi: 10.1371/journal.pcbi.1008247. PMID: 33075050; PMCID: PMC7595629.
and in the specification at https://github.com/molstar/BinaryCIF/blob/master/encoding.md
and from the I/HM Python implementation at https://github.com/ihmwg/python-ihm[summary]
"""
bCifCodeTypeD = {1: "integer_8", 2: "integer_16", 3: "integer_32", 4: "unsigned_integer_8", 5: "unsigned_integer_16", 6: "unsigned_integer_32", 32: "float_32", 33: "float_64"}
"""Binary CIF protocol internal data type codes to integer and float types
"""
bCifTypeD = {
"integer_8": {"struct_format_code": "b", "min": -0x7F - 1, "max": 0x7F},
"integer_16": {"struct_format_code": "h", "min": -0x7FFF - 1, "max": 0x7FFF},
"integer_32": {"struct_format_code": "i", "min": -0x7FFFFFFF - 1, "max": 0x7FFFFFFF},
"unsigned_integer_8": {"struct_format_code": "B", "min": 0, "max": 0xFF},
"unsigned_integer_16": {"struct_format_code": "H", "min": 0, "max": 0xFFFF},
"unsigned_integer_32": {"struct_format_code": "I", "min": 0, "max": 0xFFFFFFFF},
"float_32": {"struct_format_code": "f", "min": 1.175494351e-38, "max": 3.402823466e38},
"float_64": {"struct_format_code": "d", "min": 2.2250738585072014e-308, "max": 1.7976931348623158e308},
}
"""Binary CIF data type feature dictionary
"""
def __init__(self, storeStringsAsBytes=False, defaultStringEncoding="utf-8", verbose=False):
"""Create an instance of the binary CIF encoder class.
Args:
storeStringsAsBytes (bool, optional): express keys and strings as byte types otherwise follow the default encoding. Defaults to False.
defaultStringEncoding (str, optional): default encoding for string types. Defaults to "utf-8".
verbose(bool, optional): provide tracking of type conversion issues. Defaults to False.
"""
self.__storeStringsAsBytes = storeStringsAsBytes
self.__defaultStringEncoding = defaultStringEncoding
self.__verbose = verbose
#
self.__encodersMethodD = {
"StringArray": self.stringArrayDecoder,
"ByteArray": self.byteArrayDecoder,
"IntegerPacking": self.integerPackingDecoder,
"Delta": self.deltaDecoder,
"RunLength": self.runLengthDecoder,
"FixedPoint": self.fixedPointDecoder,
"IntervalQuantization": self.intervalQuantizationDecoder,
}
def decode(self, colDataList, encodingDictList):
"""Return the decoded input data column using the input list of encodings
Args:
colDataList (list): column of data to be decoded
encodingDictList (list): list of dictionary holding binary CIF encoding details
elements described in the specification at
https://github.com/molstar/BinaryCIF/blob/master/encoding.md
Yields:
list: decoded list of column data
"""
for encoding in reversed(encodingDictList):
encType = self.__fromBytes(encoding[self.__toBytes("kind")])
colDataList = self.__encodersMethodD[encType](colDataList, encoding)
return colDataList
def stringArrayDecoder(self, colDataList, encodingDict):
"""Decode an array of strings stored as a concatenation of all unique
strings, a list of offsets to construct the unique substrings, and indices into
the offset array.
Args:
colDataList (list): column of data to be decoded
encodingDict (dict): dictionary of binary CIF encoding details
elements described in the specification at
https://github.com/molstar/BinaryCIF/blob/master/encoding.md
Yields:
list: decoded list of string data
"""
offsetList = list(self.decode(encodingDict[self.__toBytes("offsets")], encodingDict[self.__toBytes("offsetEncoding")]))
lookupIndexIt = self.decode(colDataList, encodingDict[self.__toBytes("dataEncoding")])
stringData = self.__fromBytes(encodingDict[self.__toBytes("stringData")])
uniqueStringList = []
for iBegin, iEnd in zip(offsetList, offsetList[1:]):
uniqueStringList.append(stringData[iBegin:iEnd])
logger.debug("iBegin %d iEnd %d %r ", iBegin, iEnd, stringData[iBegin:iEnd])
for ii in lookupIndexIt:
yield uniqueStringList[ii] if ii >= 0 else None
def byteArrayDecoder(self, colDataList, encodingDict):
"""Decode input byte list into a list of integers/floats
Args:
colDataList (list): column of data to be decoded
encodingDict (dict): dictionary of binary CIF encoding details
elements described in the specification at
https://github.com/molstar/BinaryCIF/blob/master/encoding.md
Yields:
list: decoded list of integer/float data
"""
structKey = self.bCifCodeTypeD[encodingDict[self.__toBytes("type")]]
structFormatCode = self.bCifTypeD[structKey]["struct_format_code"]
count = len(colDataList) // struct.calcsize(structFormatCode)
# struct.unpack() format string for little-endian = < format_string code * counts
return struct.unpack("<" + structFormatCode * count, colDataList)
def __unsignedDecode(self, colDataList, encodingDict):
upperLimit = self.bCifTypeD["unsigned_integer_8"]["max"] if encodingDict[self.__toBytes("byteCount")] == 1 else self.bCifTypeD["unsigned_integer_16"]["max"]
ii = 0
while ii < len(colDataList):
value = 0
tVal = colDataList[ii]
while tVal == upperLimit:
value += tVal
ii += 1
tVal = colDataList[ii]
yield value + tVal
ii += 1
def __signedDecode(self, colDataList, encodingDict):
upperLimit = self.bCifTypeD["integer_8"]["max"] if encodingDict[self.__toBytes("byteCount")] == 1 else self.bCifTypeD["integer_16"]["max"]
lowerLimit = self.bCifTypeD["integer_8"]["min"] if encodingDict[self.__toBytes("byteCount")] == 1 else self.bCifTypeD["integer_16"]["min"]
ii = 0
while ii < len(colDataList):
value = 0
tVal = colDataList[ii]
while tVal == upperLimit or tVal == lowerLimit:
value += tVal
ii += 1
tVal = colDataList[ii]
yield value + tVal
ii += 1
def integerPackingDecoder(self, colDataList, encodingDict):
"""Decode a (32-bit) integer list packed into 8- or 16-bit values.
Args:
colDataList (list): column of data to be decoded
encodingDict (dict): dictionary of binary CIF encoding details
elements described in the specification at
https://github.com/molstar/BinaryCIF/blob/master/encoding.md
Yields:
list: decoded list of integer data
"""
if encodingDict[self.__toBytes("isUnsigned")]:
return self.__unsignedDecode(colDataList, encodingDict)
else:
return self.__signedDecode(colDataList, encodingDict)
def deltaDecoder(self, colDataList, encodingDict):
"""Decode an integer list stored as a list of consecutive differences.
Args:
colDataList (list): column of data to be decoded
encodingDict (dict): dictionary of binary CIF encoding details
elements described in the specification at
https://github.com/molstar/BinaryCIF/blob/master/encoding.md
Yields:
list: decoded list of integer data
"""
val = encodingDict[self.__toBytes("origin")]
for diff in colDataList:
val += diff
yield val
def runLengthDecoder(self, colDataList, encodingDict):
"""Decode an integer list stored as pairs of (value, number of repeats).
Args:
colDataList (list): column of data to be decoded
encodingDict (dict): dictionary of binary CIF encoding details
elements described in the specification at
https://github.com/molstar/BinaryCIF/blob/master/encoding.md
Yields:
list: decoded list of integer data
"""
_ = encodingDict
colDataList = list(colDataList)
for ii in range(0, len(colDataList), 2):
for _ in range(colDataList[ii + 1]):
yield colDataList[ii]
def fixedPointDecoder(self, colDataList, encodingDict):
"""Decode a floating point list stored as integers and a multiplicative factor.
Args:
colDataList (list): column of data to be decoded
encodingDict (dict): dictionary of binary CIF encoding details
elements described in the specification at
https://github.com/molstar/BinaryCIF/blob/master/encoding.md
Yields:
list: decoded list of float data
"""
factor = float(encodingDict[self.__toBytes("factor")])
for val in colDataList:
yield float(val) / factor
def intervalQuantizationDecoder(self, colDataList, encodingDict):
"""Decode a list of 32-bit integers quantized within a given interval into a list of floats.
Args:
colDataList (list): column of data to be decoded
encodingDict (dict): dictionary of binary CIF encoding details
elements described in the specification at
https://github.com/molstar/BinaryCIF/blob/master/encoding.md
Yields:
list: decoded list of float data
"""
delta = float(encodingDict[self.__toBytes("max")] - encodingDict[self.__toBytes("min")]) / float(encodingDict[self.__toBytes("numSteps")] - 1.0)
minVal = encodingDict[self.__toBytes("min")]
for val in colDataList:
yield minVal + delta * val
def __toBytes(self, strVal):
"""Optional conversion of the input string to bytes according to the class setting (storeStringsAsBytes).
Args:
strVal (string): input string
Returns:
string or bytes: optionally converted string.
"""
try:
return strVal.encode(self.__defaultStringEncoding) if self.__storeStringsAsBytes else strVal
except (UnicodeDecodeError, AttributeError):
if self.__verbose:
logger.exception("Bad type for %r", strVal)
return strVal
def __fromBytes(self, byteVal):
"""Optional conversion of the input value according to the class setting (storeStringsAsBytes).
Args:
byteVal (string): input byte object
Returns:
string: optionally converted input value
"""
try:
return byteVal.decode(self.__defaultStringEncoding) if self.__storeStringsAsBytes else byteVal
except (UnicodeDecodeError, AttributeError):
if self.__verbose:
logger.exception("Bad type for %r", byteVal)
return byteVal
bCifCodeTypeD
Binary CIF protocol internal data type codes to integer and float types
bCifTypeD
Binary CIF data type feature dictionary
__init__(self, storeStringsAsBytes=False, defaultStringEncoding='utf-8', verbose=False)
special
Create an instance of the binary CIF encoder class.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
storeStringsAsBytes |
bool |
express keys and strings as byte types otherwise follow the default encoding. Defaults to False. |
False |
defaultStringEncoding |
str |
default encoding for string types. Defaults to "utf-8". |
'utf-8' |
verbose(bool, |
optional |
provide tracking of type conversion issues. Defaults to False. |
required |
Source code in mmcif/io/BinaryCifReader.py
def __init__(self, storeStringsAsBytes=False, defaultStringEncoding="utf-8", verbose=False):
"""Create an instance of the binary CIF encoder class.
Args:
storeStringsAsBytes (bool, optional): express keys and strings as byte types otherwise follow the default encoding. Defaults to False.
defaultStringEncoding (str, optional): default encoding for string types. Defaults to "utf-8".
verbose(bool, optional): provide tracking of type conversion issues. Defaults to False.
"""
self.__storeStringsAsBytes = storeStringsAsBytes
self.__defaultStringEncoding = defaultStringEncoding
self.__verbose = verbose
#
self.__encodersMethodD = {
"StringArray": self.stringArrayDecoder,
"ByteArray": self.byteArrayDecoder,
"IntegerPacking": self.integerPackingDecoder,
"Delta": self.deltaDecoder,
"RunLength": self.runLengthDecoder,
"FixedPoint": self.fixedPointDecoder,
"IntervalQuantization": self.intervalQuantizationDecoder,
}
byteArrayDecoder(self, colDataList, encodingDict)
Decode input byte list into a list of integers/floats
Parameters:
Name | Type | Description | Default |
---|---|---|---|
colDataList |
list |
column of data to be decoded |
required |
encodingDict |
dict |
dictionary of binary CIF encoding details elements described in the specification at https://github.com/molstar/BinaryCIF/blob/master/encoding.md |
required |
Yields:
Type | Description |
---|---|
list |
decoded list of integer/float data |
Source code in mmcif/io/BinaryCifReader.py
def byteArrayDecoder(self, colDataList, encodingDict):
"""Decode input byte list into a list of integers/floats
Args:
colDataList (list): column of data to be decoded
encodingDict (dict): dictionary of binary CIF encoding details
elements described in the specification at
https://github.com/molstar/BinaryCIF/blob/master/encoding.md
Yields:
list: decoded list of integer/float data
"""
structKey = self.bCifCodeTypeD[encodingDict[self.__toBytes("type")]]
structFormatCode = self.bCifTypeD[structKey]["struct_format_code"]
count = len(colDataList) // struct.calcsize(structFormatCode)
# struct.unpack() format string for little-endian = < format_string code * counts
return struct.unpack("<" + structFormatCode * count, colDataList)
decode(self, colDataList, encodingDictList)
Return the decoded input data column using the input list of encodings
Parameters:
Name | Type | Description | Default |
---|---|---|---|
colDataList |
list |
column of data to be decoded |
required |
encodingDictList |
list |
list of dictionary holding binary CIF encoding details elements described in the specification at https://github.com/molstar/BinaryCIF/blob/master/encoding.md |
required |
Yields:
Type | Description |
---|---|
list |
decoded list of column data |
Source code in mmcif/io/BinaryCifReader.py
def decode(self, colDataList, encodingDictList):
"""Return the decoded input data column using the input list of encodings
Args:
colDataList (list): column of data to be decoded
encodingDictList (list): list of dictionary holding binary CIF encoding details
elements described in the specification at
https://github.com/molstar/BinaryCIF/blob/master/encoding.md
Yields:
list: decoded list of column data
"""
for encoding in reversed(encodingDictList):
encType = self.__fromBytes(encoding[self.__toBytes("kind")])
colDataList = self.__encodersMethodD[encType](colDataList, encoding)
return colDataList
deltaDecoder(self, colDataList, encodingDict)
Decode an integer list stored as a list of consecutive differences.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
colDataList |
list |
column of data to be decoded |
required |
encodingDict |
dict |
dictionary of binary CIF encoding details elements described in the specification at https://github.com/molstar/BinaryCIF/blob/master/encoding.md |
required |
Yields:
Type | Description |
---|---|
list |
decoded list of integer data |
Source code in mmcif/io/BinaryCifReader.py
def deltaDecoder(self, colDataList, encodingDict):
"""Decode an integer list stored as a list of consecutive differences.
Args:
colDataList (list): column of data to be decoded
encodingDict (dict): dictionary of binary CIF encoding details
elements described in the specification at
https://github.com/molstar/BinaryCIF/blob/master/encoding.md
Yields:
list: decoded list of integer data
"""
val = encodingDict[self.__toBytes("origin")]
for diff in colDataList:
val += diff
yield val
fixedPointDecoder(self, colDataList, encodingDict)
Decode a floating point list stored as integers and a multiplicative factor.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
colDataList |
list |
column of data to be decoded |
required |
encodingDict |
dict |
dictionary of binary CIF encoding details elements described in the specification at https://github.com/molstar/BinaryCIF/blob/master/encoding.md |
required |
Yields:
Type | Description |
---|---|
list |
decoded list of float data |
Source code in mmcif/io/BinaryCifReader.py
def fixedPointDecoder(self, colDataList, encodingDict):
"""Decode a floating point list stored as integers and a multiplicative factor.
Args:
colDataList (list): column of data to be decoded
encodingDict (dict): dictionary of binary CIF encoding details
elements described in the specification at
https://github.com/molstar/BinaryCIF/blob/master/encoding.md
Yields:
list: decoded list of float data
"""
factor = float(encodingDict[self.__toBytes("factor")])
for val in colDataList:
yield float(val) / factor
integerPackingDecoder(self, colDataList, encodingDict)
Decode a (32-bit) integer list packed into 8- or 16-bit values.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
colDataList |
list |
column of data to be decoded |
required |
encodingDict |
dict |
dictionary of binary CIF encoding details elements described in the specification at https://github.com/molstar/BinaryCIF/blob/master/encoding.md |
required |
Yields:
Type | Description |
---|---|
list |
decoded list of integer data |
Source code in mmcif/io/BinaryCifReader.py
def integerPackingDecoder(self, colDataList, encodingDict):
"""Decode a (32-bit) integer list packed into 8- or 16-bit values.
Args:
colDataList (list): column of data to be decoded
encodingDict (dict): dictionary of binary CIF encoding details
elements described in the specification at
https://github.com/molstar/BinaryCIF/blob/master/encoding.md
Yields:
list: decoded list of integer data
"""
if encodingDict[self.__toBytes("isUnsigned")]:
return self.__unsignedDecode(colDataList, encodingDict)
else:
return self.__signedDecode(colDataList, encodingDict)
intervalQuantizationDecoder(self, colDataList, encodingDict)
Decode a list of 32-bit integers quantized within a given interval into a list of floats.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
colDataList |
list |
column of data to be decoded |
required |
encodingDict |
dict |
dictionary of binary CIF encoding details elements described in the specification at https://github.com/molstar/BinaryCIF/blob/master/encoding.md |
required |
Yields:
Type | Description |
---|---|
list |
decoded list of float data |
Source code in mmcif/io/BinaryCifReader.py
def intervalQuantizationDecoder(self, colDataList, encodingDict):
"""Decode a list of 32-bit integers quantized within a given interval into a list of floats.
Args:
colDataList (list): column of data to be decoded
encodingDict (dict): dictionary of binary CIF encoding details
elements described in the specification at
https://github.com/molstar/BinaryCIF/blob/master/encoding.md
Yields:
list: decoded list of float data
"""
delta = float(encodingDict[self.__toBytes("max")] - encodingDict[self.__toBytes("min")]) / float(encodingDict[self.__toBytes("numSteps")] - 1.0)
minVal = encodingDict[self.__toBytes("min")]
for val in colDataList:
yield minVal + delta * val
runLengthDecoder(self, colDataList, encodingDict)
Decode an integer list stored as pairs of (value, number of repeats).
Parameters:
Name | Type | Description | Default |
---|---|---|---|
colDataList |
list |
column of data to be decoded |
required |
encodingDict |
dict |
dictionary of binary CIF encoding details elements described in the specification at https://github.com/molstar/BinaryCIF/blob/master/encoding.md |
required |
Yields:
Type | Description |
---|---|
list |
decoded list of integer data |
Source code in mmcif/io/BinaryCifReader.py
def runLengthDecoder(self, colDataList, encodingDict):
"""Decode an integer list stored as pairs of (value, number of repeats).
Args:
colDataList (list): column of data to be decoded
encodingDict (dict): dictionary of binary CIF encoding details
elements described in the specification at
https://github.com/molstar/BinaryCIF/blob/master/encoding.md
Yields:
list: decoded list of integer data
"""
_ = encodingDict
colDataList = list(colDataList)
for ii in range(0, len(colDataList), 2):
for _ in range(colDataList[ii + 1]):
yield colDataList[ii]
stringArrayDecoder(self, colDataList, encodingDict)
Decode an array of strings stored as a concatenation of all unique strings, a list of offsets to construct the unique substrings, and indices into the offset array.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
colDataList |
list |
column of data to be decoded |
required |
encodingDict |
dict |
dictionary of binary CIF encoding details elements described in the specification at https://github.com/molstar/BinaryCIF/blob/master/encoding.md |
required |
Yields:
Type | Description |
---|---|
list |
decoded list of string data |
Source code in mmcif/io/BinaryCifReader.py
def stringArrayDecoder(self, colDataList, encodingDict):
"""Decode an array of strings stored as a concatenation of all unique
strings, a list of offsets to construct the unique substrings, and indices into
the offset array.
Args:
colDataList (list): column of data to be decoded
encodingDict (dict): dictionary of binary CIF encoding details
elements described in the specification at
https://github.com/molstar/BinaryCIF/blob/master/encoding.md
Yields:
list: decoded list of string data
"""
offsetList = list(self.decode(encodingDict[self.__toBytes("offsets")], encodingDict[self.__toBytes("offsetEncoding")]))
lookupIndexIt = self.decode(colDataList, encodingDict[self.__toBytes("dataEncoding")])
stringData = self.__fromBytes(encodingDict[self.__toBytes("stringData")])
uniqueStringList = []
for iBegin, iEnd in zip(offsetList, offsetList[1:]):
uniqueStringList.append(stringData[iBegin:iEnd])
logger.debug("iBegin %d iEnd %d %r ", iBegin, iEnd, stringData[iBegin:iEnd])
for ii in lookupIndexIt:
yield uniqueStringList[ii] if ii >= 0 else None