Lichen

Annotated encoders.py

469:0e7b5712a29b
2017-01-12 Paul Boddie Raise an exception when a module is not found.
paul@0 1
#!/usr/bin/env python
paul@0 2
paul@0 3
"""
paul@0 4
Encoder functions, producing representations of program objects.
paul@0 5
paul@0 6
Copyright (C) 2016 Paul Boddie <paul@boddie.org.uk>
paul@0 7
paul@0 8
This program is free software; you can redistribute it and/or modify it under
paul@0 9
the terms of the GNU General Public License as published by the Free Software
paul@0 10
Foundation; either version 3 of the License, or (at your option) any later
paul@0 11
version.
paul@0 12
paul@0 13
This program is distributed in the hope that it will be useful, but WITHOUT
paul@0 14
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@0 15
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
paul@0 16
details.
paul@0 17
paul@0 18
You should have received a copy of the GNU General Public License along with
paul@0 19
this program.  If not, see <http://www.gnu.org/licenses/>.
paul@0 20
"""
paul@0 21
paul@56 22
from common import first
paul@56 23
paul@0 24
# Output encoding and decoding for the summary files.
paul@0 25
paul@0 26
def encode_attrnames(attrnames):
paul@0 27
paul@0 28
    "Encode the 'attrnames' representing usage."
paul@0 29
paul@0 30
    return ", ".join(attrnames) or "{}"
paul@0 31
paul@0 32
def encode_constrained(constrained):
paul@0 33
paul@0 34
    "Encode the 'constrained' status for program summaries."
paul@0 35
paul@0 36
    return constrained and "constrained" or "deduced"
paul@0 37
paul@0 38
def encode_usage(usage):
paul@0 39
paul@0 40
    "Encode attribute details from 'usage'."
paul@0 41
paul@0 42
    all_attrnames = []
paul@0 43
    for t in usage:
paul@107 44
        attrname, invocation, assignment = t
paul@107 45
        all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or ""))
paul@0 46
    return ", ".join(all_attrnames) or "{}"
paul@0 47
paul@88 48
def decode_usage(s):
paul@88 49
paul@88 50
    "Decode attribute details from 's'."
paul@88 51
paul@88 52
    all_attrnames = set()
paul@88 53
    for attrname_str in s.split(", "):
paul@107 54
        all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("=")))
paul@88 55
paul@88 56
    all_attrnames = list(all_attrnames)
paul@88 57
    all_attrnames.sort()
paul@88 58
    return tuple(all_attrnames)
paul@88 59
paul@0 60
def encode_access_location(t):
paul@0 61
paul@0 62
    "Encode the access location 't'."
paul@0 63
paul@0 64
    path, name, attrname, version = t
paul@0 65
    return "%s %s %s:%d" % (path, name or "{}", attrname, version)
paul@0 66
paul@0 67
def encode_location(t):
paul@0 68
paul@0 69
    "Encode the general location 't' in a concise form."
paul@0 70
paul@0 71
    path, name, attrname, version = t
paul@0 72
    if name is not None and version is not None:
paul@0 73
        return "%s %s:%d" % (path, name, version)
paul@0 74
    elif name is not None:
paul@0 75
        return "%s %s" % (path, name)
paul@0 76
    else:
paul@0 77
        return "%s :%s" % (path, attrname)
paul@0 78
paul@0 79
def encode_modifiers(modifiers):
paul@0 80
paul@0 81
    "Encode assignment details from 'modifiers'."
paul@0 82
paul@0 83
    all_modifiers = []
paul@0 84
    for t in modifiers:
paul@0 85
        all_modifiers.append(encode_modifier_term(t))
paul@0 86
    return "".join(all_modifiers)
paul@0 87
paul@0 88
def encode_modifier_term(t):
paul@0 89
paul@0 90
    "Encode modifier 't' representing assignment status."
paul@0 91
paul@117 92
    assignment, invocation = t
paul@117 93
    return assignment and "=" or invocation and "!" or "_"
paul@0 94
paul@0 95
def decode_modifier_term(s):
paul@0 96
paul@0 97
    "Decode modifier term 's' representing assignment status."
paul@0 98
paul@117 99
    return (s == "=", s == "!")
paul@0 100
paul@56 101
paul@56 102
paul@56 103
# Test generation functions.
paul@56 104
paul@56 105
def get_kinds(all_types):
paul@56 106
paul@56 107
    """ 
paul@56 108
    Return object kind details for 'all_types', being a collection of
paul@56 109
    references for program types.
paul@56 110
    """
paul@56 111
paul@56 112
    return map(lambda ref: ref.get_kind(), all_types)
paul@56 113
paul@237 114
def test_label_for_kind(kind):
paul@56 115
paul@237 116
    "Return the label used for 'kind' in test details."
paul@56 117
paul@237 118
    return kind == "<instance>" and "instance" or "type"
paul@56 119
paul@237 120
def test_label_for_type(ref):
paul@56 121
paul@237 122
    "Return the label used for 'ref' in test details."
paul@56 123
paul@237 124
    return test_label_for_kind(ref.get_kind())
paul@56 125
paul@56 126
paul@56 127
paul@94 128
# Instruction representation encoding.
paul@94 129
paul@94 130
def encode_instruction(instruction):
paul@94 131
paul@94 132
    """
paul@94 133
    Encode the 'instruction' - a sequence starting with an operation and
paul@94 134
    followed by arguments, each of which may be an instruction sequence or a
paul@94 135
    plain value - to produce a function call string representation.
paul@94 136
    """
paul@94 137
paul@94 138
    op = instruction[0]
paul@94 139
    args = instruction[1:]
paul@94 140
paul@94 141
    if args:
paul@94 142
        a = []
paul@113 143
        for arg in args:
paul@113 144
            if isinstance(arg, tuple):
paul@113 145
                a.append(encode_instruction(arg))
paul@94 146
            else:
paul@113 147
                a.append(arg or "{}")
paul@94 148
        argstr = "(%s)" % ", ".join(a)
paul@94 149
        return "%s%s" % (op, argstr)
paul@94 150
    else:
paul@94 151
        return op
paul@94 152
paul@94 153
paul@94 154
paul@0 155
# Output program encoding.
paul@0 156
paul@153 157
attribute_loading_ops = (
paul@153 158
    "__load_via_class", "__load_via_object", "__get_class_and_load",
paul@153 159
    )
paul@153 160
paul@153 161
attribute_ops = attribute_loading_ops + (
paul@113 162
    "__store_via_object",
paul@113 163
    )
paul@113 164
paul@153 165
checked_loading_ops = (
paul@113 166
    "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any",
paul@153 167
    )
paul@153 168
paul@153 169
checked_ops = checked_loading_ops + (
paul@113 170
    "__check_and_store_via_class", "__check_and_store_via_object", "__check_and_store_via_any",
paul@113 171
    )
paul@113 172
paul@113 173
typename_ops = (
paul@144 174
    "__test_common_instance", "__test_common_object", "__test_common_type",
paul@113 175
    )
paul@113 176
paul@385 177
type_ops = (
paul@385 178
    "__test_specific_instance", "__test_specific_object", "__test_specific_type",
paul@385 179
    )
paul@385 180
paul@141 181
static_ops = (
paul@141 182
    "__load_static",
paul@141 183
    )
paul@141 184
paul@153 185
reference_acting_ops = attribute_ops + checked_ops + typename_ops
paul@153 186
attribute_producing_ops = attribute_loading_ops + checked_loading_ops
paul@153 187
paul@113 188
def encode_access_instruction(instruction, subs):
paul@113 189
paul@113 190
    """
paul@113 191
    Encode the 'instruction' - a sequence starting with an operation and
paul@113 192
    followed by arguments, each of which may be an instruction sequence or a
paul@113 193
    plain value - to produce a function call string representation.
paul@113 194
paul@113 195
    The 'subs' parameter defines a mapping of substitutions for special values
paul@113 196
    used in instructions.
paul@113 197
    """
paul@113 198
paul@113 199
    op = instruction[0]
paul@113 200
    args = instruction[1:]
paul@113 201
paul@113 202
    if not args:
paul@113 203
        argstr = ""
paul@113 204
paul@113 205
    else:
paul@113 206
        # Encode the arguments.
paul@113 207
paul@113 208
        a = []
paul@153 209
        converting_op = op
paul@113 210
        for arg in args:
paul@153 211
            a.append(encode_access_instruction_arg(arg, subs, converting_op))
paul@153 212
            converting_op = None
paul@113 213
paul@113 214
        # Modify certain arguments.
paul@113 215
paul@113 216
        # Convert attribute name arguments to position symbols.
paul@113 217
paul@113 218
        if op in attribute_ops:
paul@113 219
            arg = a[1]
paul@113 220
            a[1] = encode_symbol("pos", arg)
paul@113 221
paul@113 222
        # Convert attribute name arguments to position and code symbols.
paul@113 223
paul@113 224
        elif op in checked_ops:
paul@113 225
            arg = a[1]
paul@113 226
            a[1] = encode_symbol("pos", arg)
paul@113 227
            a.insert(2, encode_symbol("code", arg))
paul@113 228
paul@113 229
        # Convert type name arguments to position and code symbols.
paul@113 230
paul@113 231
        elif op in typename_ops:
paul@339 232
            arg = encode_type_attribute(args[1])
paul@113 233
            a[1] = encode_symbol("pos", arg)
paul@113 234
            a.insert(2, encode_symbol("code", arg))
paul@113 235
paul@385 236
        # Obtain addresses of type arguments.
paul@385 237
paul@385 238
        elif op in type_ops:
paul@385 239
            a[1] = "&%s" % a[1]
paul@385 240
paul@141 241
        # Obtain addresses of static objects.
paul@141 242
paul@141 243
        elif op in static_ops:
paul@141 244
            a[0] = "&%s" % a[0]
paul@200 245
            a[1] = "&%s" % a[1]
paul@141 246
paul@113 247
        argstr = "(%s)" % ", ".join(a)
paul@113 248
paul@113 249
    # Substitute the first element of the instruction, which may not be an
paul@113 250
    # operation at all.
paul@113 251
paul@144 252
    if subs.has_key(op):
paul@144 253
        op = subs[op]
paul@144 254
    elif not args:
paul@144 255
        op = "&%s" % encode_path(op)
paul@144 256
paul@144 257
    return "%s%s" % (op, argstr)
paul@113 258
paul@153 259
def encode_access_instruction_arg(arg, subs, op):
paul@113 260
paul@113 261
    "Encode 'arg' using 'subs' to define substitutions."
paul@113 262
paul@113 263
    if isinstance(arg, tuple):
paul@153 264
        encoded = encode_access_instruction(arg, subs)
paul@153 265
paul@153 266
        # Convert attribute results to references where required.
paul@153 267
paul@153 268
        if op and op in reference_acting_ops and arg[0] in attribute_producing_ops:
paul@153 269
            return "%s.value" % encoded
paul@153 270
        else:
paul@153 271
            return encoded
paul@113 272
paul@113 273
    # Special values only need replacing, not encoding.
paul@113 274
paul@113 275
    elif subs.has_key(arg):
paul@113 276
        return subs.get(arg)
paul@113 277
paul@258 278
    # Convert static references to the appropriate type.
paul@258 279
paul@258 280
    elif op and op in reference_acting_ops and arg != "<accessor>":
paul@258 281
        return "&%s" % encode_path(arg)
paul@258 282
paul@113 283
    # Other values may need encoding.
paul@113 284
paul@113 285
    else:
paul@113 286
        return encode_path(arg)
paul@113 287
paul@126 288
def encode_bound_reference(path):
paul@126 289
paul@126 290
    "Encode 'path' as a bound method name."
paul@126 291
paul@126 292
    return "__bound_%s" % encode_path(path)
paul@126 293
paul@0 294
def encode_function_pointer(path):
paul@0 295
paul@0 296
    "Encode 'path' as a reference to an output program function."
paul@0 297
paul@0 298
    return "__fn_%s" % encode_path(path)
paul@0 299
paul@149 300
def encode_initialiser_pointer(path):
paul@149 301
paul@149 302
    "Encode 'path' as a reference to an initialiser function structure."
paul@149 303
paul@149 304
    return encode_path("%s.__init__" % path)
paul@149 305
paul@0 306
def encode_instantiator_pointer(path):
paul@0 307
paul@0 308
    "Encode 'path' as a reference to an output program instantiator."
paul@0 309
paul@0 310
    return "__new_%s" % encode_path(path)
paul@0 311
paul@136 312
def encode_literal_constant(n):
paul@136 313
paul@136 314
    "Encode a name for the literal constant with the number 'n'."
paul@136 315
paul@136 316
    return "__const%d" % n
paul@136 317
paul@378 318
def encode_literal_constant_size(value):
paul@378 319
paul@378 320
    "Encode a size for the literal constant with the given 'value'."
paul@378 321
paul@378 322
    if isinstance(value, basestring):
paul@378 323
        return len(value)
paul@378 324
    else:
paul@378 325
        return 0
paul@378 326
paul@136 327
def encode_literal_constant_member(value):
paul@136 328
paul@136 329
    "Encode the member name for the 'value' in the final program."
paul@136 330
paul@136 331
    return "%svalue" % value.__class__.__name__
paul@136 332
paul@136 333
def encode_literal_constant_value(value):
paul@136 334
paul@136 335
    "Encode the given 'value' in the final program."
paul@136 336
paul@136 337
    if isinstance(value, (int, float)):
paul@136 338
        return str(value)
paul@136 339
    else:
paul@451 340
        l = []
paul@451 341
paul@451 342
        # Encode characters including non-ASCII ones.
paul@451 343
paul@451 344
        for c in str(value):
paul@451 345
            if c == '"': l.append('\\"')
paul@451 346
            elif c == '\n': l.append('\\n')
paul@451 347
            elif c == '\t': l.append('\\t')
paul@451 348
            elif c == '\r': l.append('\\r')
paul@451 349
            elif 0x20 <= ord(c) < 0x80: l.append(c)
paul@451 350
            else: l.append("\\x%02x" % ord(c))
paul@451 351
paul@451 352
        return '"%s"' % "".join(l)
paul@136 353
paul@283 354
def encode_literal_data_initialiser(style):
paul@283 355
paul@283 356
    """
paul@283 357
    Encode a reference to a function populating the data for a literal having
paul@283 358
    the given 'style' ("mapping" or "sequence").
paul@283 359
    """
paul@283 360
paul@283 361
    return "__newdata_%s" % style
paul@283 362
paul@159 363
def encode_literal_instantiator(path):
paul@159 364
paul@159 365
    """
paul@159 366
    Encode a reference to an instantiator for a literal having the given 'path'.
paul@159 367
    """
paul@159 368
paul@159 369
    return "__newliteral_%s" % encode_path(path)
paul@159 370
paul@136 371
def encode_literal_reference(n):
paul@136 372
paul@136 373
    "Encode a reference to a literal constant with the number 'n'."
paul@136 374
paul@136 375
    return "__constvalue%d" % n
paul@136 376
paul@340 377
# Track all encoded paths, detecting and avoiding conflicts.
paul@340 378
paul@340 379
all_encoded_paths = {}
paul@340 380
paul@0 381
def encode_path(path):
paul@0 382
paul@0 383
    "Encode 'path' as an output program object, translating special symbols."
paul@0 384
paul@0 385
    if path in reserved_words:
paul@0 386
        return "__%s" % path
paul@0 387
    else:
paul@340 388
        part_encoded = path.replace("#", "__").replace("$", "__")
paul@349 389
paul@349 390
        if "." not in path:
paul@349 391
            return part_encoded
paul@349 392
paul@340 393
        encoded = part_encoded.replace(".", "_")
paul@340 394
paul@340 395
        # Test for a conflict with the encoding of a different path, re-encoding
paul@340 396
        # if necessary.
paul@340 397
paul@340 398
        previous = all_encoded_paths.get(encoded)
paul@340 399
        replacement = "_"
paul@340 400
paul@340 401
        while previous:
paul@340 402
            if path == previous:
paul@340 403
                return encoded
paul@340 404
            replacement += "_"
paul@340 405
            encoded = part_encoded.replace(".", replacement)
paul@340 406
            previous = all_encoded_paths.get(encoded)
paul@340 407
paul@340 408
        # Store any new or re-encoded path.
paul@340 409
paul@340 410
        all_encoded_paths[encoded] = path
paul@340 411
        return encoded
paul@0 412
paul@136 413
def encode_predefined_reference(path):
paul@136 414
paul@136 415
    "Encode a reference to a predefined constant value for 'path'."
paul@136 416
paul@136 417
    return "__predefined_%s" % encode_path(path)
paul@136 418
paul@150 419
def encode_size(kind, path=None):
paul@150 420
paul@150 421
    """
paul@150 422
    Encode a structure size reference for the given 'kind' of structure, with
paul@150 423
    'path' indicating a specific structure name.
paul@150 424
    """
paul@150 425
paul@150 426
    return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "")
paul@150 427
paul@0 428
def encode_symbol(symbol_type, path=None):
paul@0 429
paul@0 430
    "Encode a symbol with the given 'symbol_type' and optional 'path'."
paul@0 431
paul@0 432
    return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "")
paul@0 433
paul@150 434
def encode_tablename(kind, path):
paul@150 435
paul@150 436
    """
paul@150 437
    Encode a table reference for the given 'kind' of table structure, indicating
paul@150 438
    a 'path' for the specific object concerned.
paul@150 439
    """
paul@150 440
paul@150 441
    return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path))
paul@150 442
paul@131 443
def encode_type_attribute(path):
paul@131 444
paul@131 445
    "Encode the special type attribute for 'path'."
paul@131 446
paul@131 447
    return "#%s" % path
paul@131 448
paul@318 449
def decode_type_attribute(s):
paul@318 450
paul@318 451
    "Decode the special type attribute 's'."
paul@318 452
paul@318 453
    return s[1:]
paul@318 454
paul@318 455
def is_type_attribute(s):
paul@318 456
paul@318 457
    "Return whether 's' is a type attribute name."
paul@318 458
paul@318 459
    return s.startswith("#")
paul@318 460
paul@56 461
paul@56 462
paul@150 463
# A mapping from kinds to structure size reference prefixes.
paul@150 464
paul@150 465
structure_size_prefixes = {
paul@150 466
    "<class>" : "c",
paul@150 467
    "<module>" : "m",
paul@150 468
    "<instance>" : "i"
paul@150 469
    }
paul@150 470
paul@150 471
# A mapping from kinds to table name prefixes.
paul@150 472
paul@150 473
table_name_prefixes = {
paul@150 474
    "<class>" : "Class",
paul@150 475
    "<function>" : "Function",
paul@150 476
    "<module>" : "Module",
paul@150 477
    "<instance>" : "Instance"
paul@150 478
    }
paul@150 479
paul@150 480
paul@150 481
paul@0 482
# Output language reserved words.
paul@0 483
paul@0 484
reserved_words = [
paul@0 485
    "break", "char", "const", "continue",
paul@0 486
    "default", "double", "else",
paul@0 487
    "float", "for",
paul@0 488
    "if", "int", "long",
paul@0 489
    "NULL",
paul@0 490
    "return", "struct",
paul@0 491
    "typedef",
paul@0 492
    "void", "while",
paul@0 493
    ]
paul@0 494
paul@0 495
# vim: tabstop=4 expandtab shiftwidth=4