Lichen

Annotated encoders.py

336:8c75cdf1a764
2016-12-06 Paul Boddie Introduced stream classes employing C-level FILE pointers, changing the sys stdin, stdout and stderr objects to be instances of these stream classes. Added fread and fwrite support to the native functions. Added support for raising EOFError.
paul@0 1
#!/usr/bin/env python
paul@0 2
paul@0 3
"""
paul@0 4
Encoder functions, producing representations of program objects.
paul@0 5
paul@0 6
Copyright (C) 2016 Paul Boddie <paul@boddie.org.uk>
paul@0 7
paul@0 8
This program is free software; you can redistribute it and/or modify it under
paul@0 9
the terms of the GNU General Public License as published by the Free Software
paul@0 10
Foundation; either version 3 of the License, or (at your option) any later
paul@0 11
version.
paul@0 12
paul@0 13
This program is distributed in the hope that it will be useful, but WITHOUT
paul@0 14
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@0 15
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
paul@0 16
details.
paul@0 17
paul@0 18
You should have received a copy of the GNU General Public License along with
paul@0 19
this program.  If not, see <http://www.gnu.org/licenses/>.
paul@0 20
"""
paul@0 21
paul@56 22
from common import first
paul@56 23
paul@0 24
# Output encoding and decoding for the summary files.
paul@0 25
paul@0 26
def encode_attrnames(attrnames):
paul@0 27
paul@0 28
    "Encode the 'attrnames' representing usage."
paul@0 29
paul@0 30
    return ", ".join(attrnames) or "{}"
paul@0 31
paul@0 32
def encode_constrained(constrained):
paul@0 33
paul@0 34
    "Encode the 'constrained' status for program summaries."
paul@0 35
paul@0 36
    return constrained and "constrained" or "deduced"
paul@0 37
paul@0 38
def encode_usage(usage):
paul@0 39
paul@0 40
    "Encode attribute details from 'usage'."
paul@0 41
paul@0 42
    all_attrnames = []
paul@0 43
    for t in usage:
paul@107 44
        attrname, invocation, assignment = t
paul@107 45
        all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or ""))
paul@0 46
    return ", ".join(all_attrnames) or "{}"
paul@0 47
paul@88 48
def decode_usage(s):
paul@88 49
paul@88 50
    "Decode attribute details from 's'."
paul@88 51
paul@88 52
    all_attrnames = set()
paul@88 53
    for attrname_str in s.split(", "):
paul@107 54
        all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("=")))
paul@88 55
paul@88 56
    all_attrnames = list(all_attrnames)
paul@88 57
    all_attrnames.sort()
paul@88 58
    return tuple(all_attrnames)
paul@88 59
paul@0 60
def encode_access_location(t):
paul@0 61
paul@0 62
    "Encode the access location 't'."
paul@0 63
paul@0 64
    path, name, attrname, version = t
paul@0 65
    return "%s %s %s:%d" % (path, name or "{}", attrname, version)
paul@0 66
paul@0 67
def encode_location(t):
paul@0 68
paul@0 69
    "Encode the general location 't' in a concise form."
paul@0 70
paul@0 71
    path, name, attrname, version = t
paul@0 72
    if name is not None and version is not None:
paul@0 73
        return "%s %s:%d" % (path, name, version)
paul@0 74
    elif name is not None:
paul@0 75
        return "%s %s" % (path, name)
paul@0 76
    else:
paul@0 77
        return "%s :%s" % (path, attrname)
paul@0 78
paul@0 79
def encode_modifiers(modifiers):
paul@0 80
paul@0 81
    "Encode assignment details from 'modifiers'."
paul@0 82
paul@0 83
    all_modifiers = []
paul@0 84
    for t in modifiers:
paul@0 85
        all_modifiers.append(encode_modifier_term(t))
paul@0 86
    return "".join(all_modifiers)
paul@0 87
paul@0 88
def encode_modifier_term(t):
paul@0 89
paul@0 90
    "Encode modifier 't' representing assignment status."
paul@0 91
paul@117 92
    assignment, invocation = t
paul@117 93
    return assignment and "=" or invocation and "!" or "_"
paul@0 94
paul@0 95
def decode_modifier_term(s):
paul@0 96
paul@0 97
    "Decode modifier term 's' representing assignment status."
paul@0 98
paul@117 99
    return (s == "=", s == "!")
paul@0 100
paul@56 101
paul@56 102
paul@56 103
# Test generation functions.
paul@56 104
paul@56 105
def get_kinds(all_types):
paul@56 106
paul@56 107
    """ 
paul@56 108
    Return object kind details for 'all_types', being a collection of
paul@56 109
    references for program types.
paul@56 110
    """
paul@56 111
paul@56 112
    return map(lambda ref: ref.get_kind(), all_types)
paul@56 113
paul@237 114
def test_label_for_kind(kind):
paul@56 115
paul@237 116
    "Return the label used for 'kind' in test details."
paul@56 117
paul@237 118
    return kind == "<instance>" and "instance" or "type"
paul@56 119
paul@237 120
def test_label_for_type(ref):
paul@56 121
paul@237 122
    "Return the label used for 'ref' in test details."
paul@56 123
paul@237 124
    return test_label_for_kind(ref.get_kind())
paul@56 125
paul@56 126
paul@56 127
paul@94 128
# Instruction representation encoding.
paul@94 129
paul@94 130
def encode_instruction(instruction):
paul@94 131
paul@94 132
    """
paul@94 133
    Encode the 'instruction' - a sequence starting with an operation and
paul@94 134
    followed by arguments, each of which may be an instruction sequence or a
paul@94 135
    plain value - to produce a function call string representation.
paul@94 136
    """
paul@94 137
paul@94 138
    op = instruction[0]
paul@94 139
    args = instruction[1:]
paul@94 140
paul@94 141
    if args:
paul@94 142
        a = []
paul@113 143
        for arg in args:
paul@113 144
            if isinstance(arg, tuple):
paul@113 145
                a.append(encode_instruction(arg))
paul@94 146
            else:
paul@113 147
                a.append(arg or "{}")
paul@94 148
        argstr = "(%s)" % ", ".join(a)
paul@94 149
        return "%s%s" % (op, argstr)
paul@94 150
    else:
paul@94 151
        return op
paul@94 152
paul@94 153
paul@94 154
paul@0 155
# Output program encoding.
paul@0 156
paul@153 157
attribute_loading_ops = (
paul@153 158
    "__load_via_class", "__load_via_object", "__get_class_and_load",
paul@153 159
    )
paul@153 160
paul@153 161
attribute_ops = attribute_loading_ops + (
paul@113 162
    "__store_via_object",
paul@113 163
    )
paul@113 164
paul@153 165
checked_loading_ops = (
paul@113 166
    "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any",
paul@153 167
    )
paul@153 168
paul@153 169
checked_ops = checked_loading_ops + (
paul@113 170
    "__check_and_store_via_class", "__check_and_store_via_object", "__check_and_store_via_any",
paul@113 171
    )
paul@113 172
paul@113 173
typename_ops = (
paul@144 174
    "__test_common_instance", "__test_common_object", "__test_common_type",
paul@113 175
    )
paul@113 176
paul@141 177
static_ops = (
paul@141 178
    "__load_static",
paul@141 179
    )
paul@141 180
paul@153 181
reference_acting_ops = attribute_ops + checked_ops + typename_ops
paul@153 182
attribute_producing_ops = attribute_loading_ops + checked_loading_ops
paul@153 183
paul@113 184
def encode_access_instruction(instruction, subs):
paul@113 185
paul@113 186
    """
paul@113 187
    Encode the 'instruction' - a sequence starting with an operation and
paul@113 188
    followed by arguments, each of which may be an instruction sequence or a
paul@113 189
    plain value - to produce a function call string representation.
paul@113 190
paul@113 191
    The 'subs' parameter defines a mapping of substitutions for special values
paul@113 192
    used in instructions.
paul@113 193
    """
paul@113 194
paul@113 195
    op = instruction[0]
paul@113 196
    args = instruction[1:]
paul@113 197
paul@113 198
    if not args:
paul@113 199
        argstr = ""
paul@113 200
paul@113 201
    else:
paul@113 202
        # Encode the arguments.
paul@113 203
paul@113 204
        a = []
paul@153 205
        converting_op = op
paul@113 206
        for arg in args:
paul@153 207
            a.append(encode_access_instruction_arg(arg, subs, converting_op))
paul@153 208
            converting_op = None
paul@113 209
paul@113 210
        # Modify certain arguments.
paul@113 211
paul@113 212
        # Convert attribute name arguments to position symbols.
paul@113 213
paul@113 214
        if op in attribute_ops:
paul@113 215
            arg = a[1]
paul@113 216
            a[1] = encode_symbol("pos", arg)
paul@113 217
paul@113 218
        # Convert attribute name arguments to position and code symbols.
paul@113 219
paul@113 220
        elif op in checked_ops:
paul@113 221
            arg = a[1]
paul@113 222
            a[1] = encode_symbol("pos", arg)
paul@113 223
            a.insert(2, encode_symbol("code", arg))
paul@113 224
paul@113 225
        # Convert type name arguments to position and code symbols.
paul@113 226
paul@113 227
        elif op in typename_ops:
paul@131 228
            arg = encode_type_attribute(a[1])
paul@113 229
            a[1] = encode_symbol("pos", arg)
paul@113 230
            a.insert(2, encode_symbol("code", arg))
paul@113 231
paul@141 232
        # Obtain addresses of static objects.
paul@141 233
paul@141 234
        elif op in static_ops:
paul@141 235
            a[0] = "&%s" % a[0]
paul@200 236
            a[1] = "&%s" % a[1]
paul@141 237
paul@113 238
        argstr = "(%s)" % ", ".join(a)
paul@113 239
paul@113 240
    # Substitute the first element of the instruction, which may not be an
paul@113 241
    # operation at all.
paul@113 242
paul@144 243
    if subs.has_key(op):
paul@144 244
        op = subs[op]
paul@144 245
    elif not args:
paul@144 246
        op = "&%s" % encode_path(op)
paul@144 247
paul@144 248
    return "%s%s" % (op, argstr)
paul@113 249
paul@153 250
def encode_access_instruction_arg(arg, subs, op):
paul@113 251
paul@113 252
    "Encode 'arg' using 'subs' to define substitutions."
paul@113 253
paul@113 254
    if isinstance(arg, tuple):
paul@153 255
        encoded = encode_access_instruction(arg, subs)
paul@153 256
paul@153 257
        # Convert attribute results to references where required.
paul@153 258
paul@153 259
        if op and op in reference_acting_ops and arg[0] in attribute_producing_ops:
paul@153 260
            return "%s.value" % encoded
paul@153 261
        else:
paul@153 262
            return encoded
paul@113 263
paul@113 264
    # Special values only need replacing, not encoding.
paul@113 265
paul@113 266
    elif subs.has_key(arg):
paul@113 267
        return subs.get(arg)
paul@113 268
paul@258 269
    # Convert static references to the appropriate type.
paul@258 270
paul@258 271
    elif op and op in reference_acting_ops and arg != "<accessor>":
paul@258 272
        return "&%s" % encode_path(arg)
paul@258 273
paul@113 274
    # Other values may need encoding.
paul@113 275
paul@113 276
    else:
paul@113 277
        return encode_path(arg)
paul@113 278
paul@126 279
def encode_bound_reference(path):
paul@126 280
paul@126 281
    "Encode 'path' as a bound method name."
paul@126 282
paul@126 283
    return "__bound_%s" % encode_path(path)
paul@126 284
paul@0 285
def encode_function_pointer(path):
paul@0 286
paul@0 287
    "Encode 'path' as a reference to an output program function."
paul@0 288
paul@0 289
    return "__fn_%s" % encode_path(path)
paul@0 290
paul@149 291
def encode_initialiser_pointer(path):
paul@149 292
paul@149 293
    "Encode 'path' as a reference to an initialiser function structure."
paul@149 294
paul@149 295
    return encode_path("%s.__init__" % path)
paul@149 296
paul@0 297
def encode_instantiator_pointer(path):
paul@0 298
paul@0 299
    "Encode 'path' as a reference to an output program instantiator."
paul@0 300
paul@0 301
    return "__new_%s" % encode_path(path)
paul@0 302
paul@136 303
def encode_literal_constant(n):
paul@136 304
paul@136 305
    "Encode a name for the literal constant with the number 'n'."
paul@136 306
paul@136 307
    return "__const%d" % n
paul@136 308
paul@136 309
def encode_literal_constant_member(value):
paul@136 310
paul@136 311
    "Encode the member name for the 'value' in the final program."
paul@136 312
paul@136 313
    return "%svalue" % value.__class__.__name__
paul@136 314
paul@136 315
def encode_literal_constant_value(value):
paul@136 316
paul@136 317
    "Encode the given 'value' in the final program."
paul@136 318
paul@136 319
    if isinstance(value, (int, float)):
paul@136 320
        return str(value)
paul@136 321
    else:
paul@168 322
        return '"%s"' % str(value).replace('"', '\\"').replace("\n", "\\n").replace("\t", "\\t").replace("\r", "\\r")
paul@136 323
paul@283 324
def encode_literal_data_initialiser(style):
paul@283 325
paul@283 326
    """
paul@283 327
    Encode a reference to a function populating the data for a literal having
paul@283 328
    the given 'style' ("mapping" or "sequence").
paul@283 329
    """
paul@283 330
paul@283 331
    return "__newdata_%s" % style
paul@283 332
paul@159 333
def encode_literal_instantiator(path):
paul@159 334
paul@159 335
    """
paul@159 336
    Encode a reference to an instantiator for a literal having the given 'path'.
paul@159 337
    """
paul@159 338
paul@159 339
    return "__newliteral_%s" % encode_path(path)
paul@159 340
paul@136 341
def encode_literal_reference(n):
paul@136 342
paul@136 343
    "Encode a reference to a literal constant with the number 'n'."
paul@136 344
paul@136 345
    return "__constvalue%d" % n
paul@136 346
paul@0 347
def encode_path(path):
paul@0 348
paul@0 349
    "Encode 'path' as an output program object, translating special symbols."
paul@0 350
paul@0 351
    if path in reserved_words:
paul@0 352
        return "__%s" % path
paul@0 353
    else:
paul@0 354
        return path.replace("#", "__").replace("$", "__").replace(".", "_")
paul@0 355
paul@136 356
def encode_predefined_reference(path):
paul@136 357
paul@136 358
    "Encode a reference to a predefined constant value for 'path'."
paul@136 359
paul@136 360
    return "__predefined_%s" % encode_path(path)
paul@136 361
paul@150 362
def encode_size(kind, path=None):
paul@150 363
paul@150 364
    """
paul@150 365
    Encode a structure size reference for the given 'kind' of structure, with
paul@150 366
    'path' indicating a specific structure name.
paul@150 367
    """
paul@150 368
paul@150 369
    return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "")
paul@150 370
paul@0 371
def encode_symbol(symbol_type, path=None):
paul@0 372
paul@0 373
    "Encode a symbol with the given 'symbol_type' and optional 'path'."
paul@0 374
paul@0 375
    return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "")
paul@0 376
paul@150 377
def encode_tablename(kind, path):
paul@150 378
paul@150 379
    """
paul@150 380
    Encode a table reference for the given 'kind' of table structure, indicating
paul@150 381
    a 'path' for the specific object concerned.
paul@150 382
    """
paul@150 383
paul@150 384
    return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path))
paul@150 385
paul@131 386
def encode_type_attribute(path):
paul@131 387
paul@131 388
    "Encode the special type attribute for 'path'."
paul@131 389
paul@131 390
    return "#%s" % path
paul@131 391
paul@318 392
def decode_type_attribute(s):
paul@318 393
paul@318 394
    "Decode the special type attribute 's'."
paul@318 395
paul@318 396
    return s[1:]
paul@318 397
paul@318 398
def is_type_attribute(s):
paul@318 399
paul@318 400
    "Return whether 's' is a type attribute name."
paul@318 401
paul@318 402
    return s.startswith("#")
paul@318 403
paul@56 404
paul@56 405
paul@150 406
# A mapping from kinds to structure size reference prefixes.
paul@150 407
paul@150 408
structure_size_prefixes = {
paul@150 409
    "<class>" : "c",
paul@150 410
    "<module>" : "m",
paul@150 411
    "<instance>" : "i"
paul@150 412
    }
paul@150 413
paul@150 414
# A mapping from kinds to table name prefixes.
paul@150 415
paul@150 416
table_name_prefixes = {
paul@150 417
    "<class>" : "Class",
paul@150 418
    "<function>" : "Function",
paul@150 419
    "<module>" : "Module",
paul@150 420
    "<instance>" : "Instance"
paul@150 421
    }
paul@150 422
paul@150 423
paul@150 424
paul@0 425
# Output language reserved words.
paul@0 426
paul@0 427
reserved_words = [
paul@0 428
    "break", "char", "const", "continue",
paul@0 429
    "default", "double", "else",
paul@0 430
    "float", "for",
paul@0 431
    "if", "int", "long",
paul@0 432
    "NULL",
paul@0 433
    "return", "struct",
paul@0 434
    "typedef",
paul@0 435
    "void", "while",
paul@0 436
    ]
paul@0 437
paul@0 438
# vim: tabstop=4 expandtab shiftwidth=4