paulb@200 | 1 | #!/usr/bin/env python |
paulb@200 | 2 | |
paulb@200 | 3 | # The Great Computer Language Shootout |
paulb@200 | 4 | # http://shootout.alioth.debian.org/ |
paulb@200 | 5 | # |
paulb@200 | 6 | # modified by Ian Osgood |
paulb@200 | 7 | |
paulb@200 | 8 | import sys |
paulb@200 | 9 | import itertools |
paulb@200 | 10 | import bisect |
paulb@200 | 11 | |
paulb@200 | 12 | def gen_random(max_, last=[42], ia=3877, ic=29573, im=139968): |
paulb@200 | 13 | last[0] = (last[0] * ia + ic) % im |
paulb@200 | 14 | return (max_ * last[0]) / im |
paulb@200 | 15 | |
paulb@200 | 16 | def make_cumulative(table): |
paulb@200 | 17 | l = [] |
paulb@200 | 18 | prob = 0.0 |
paulb@200 | 19 | for char, p in table: |
paulb@200 | 20 | prob += p |
paulb@200 | 21 | l.append((prob, char)) |
paulb@200 | 22 | return l |
paulb@200 | 23 | |
paulb@200 | 24 | alu = ( |
paulb@200 | 25 | "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" |
paulb@200 | 26 | "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" |
paulb@200 | 27 | "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" |
paulb@200 | 28 | "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" |
paulb@200 | 29 | "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" |
paulb@200 | 30 | "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" |
paulb@200 | 31 | "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA" |
paulb@200 | 32 | ) |
paulb@200 | 33 | |
paulb@200 | 34 | iub = [ |
paulb@200 | 35 | ("a", 0.27), |
paulb@200 | 36 | ("c", 0.12), |
paulb@200 | 37 | ("g", 0.12), |
paulb@200 | 38 | ("t", 0.27), |
paulb@200 | 39 | |
paulb@200 | 40 | ("B", 0.02), |
paulb@200 | 41 | ("D", 0.02), |
paulb@200 | 42 | ("H", 0.02), |
paulb@200 | 43 | ("K", 0.02), |
paulb@200 | 44 | ("M", 0.02), |
paulb@200 | 45 | ("N", 0.02), |
paulb@200 | 46 | ("R", 0.02), |
paulb@200 | 47 | ("S", 0.02), |
paulb@200 | 48 | ("V", 0.02), |
paulb@200 | 49 | ("W", 0.02), |
paulb@200 | 50 | ("Y", 0.02), |
paulb@200 | 51 | ] |
paulb@200 | 52 | |
paulb@200 | 53 | homosapiens = [ |
paulb@200 | 54 | ("a", 0.3029549426680), |
paulb@200 | 55 | ("c", 0.1979883004921), |
paulb@200 | 56 | ("g", 0.1975473066391), |
paulb@200 | 57 | ("t", 0.3015094502008), |
paulb@200 | 58 | ] |
paulb@200 | 59 | |
paulb@200 | 60 | |
paulb@200 | 61 | def make_repeat_fasta(id_, desc, src, n): |
paulb@200 | 62 | print ">%s %s" % (id_, desc) |
paulb@200 | 63 | width = 60 |
paulb@200 | 64 | l = len(src) |
paulb@200 | 65 | s = src * (n // l) |
paulb@200 | 66 | s += src[:n % l] |
paulb@200 | 67 | #assert len(s) == n |
paulb@200 | 68 | print "\n".join([s[i:i+width] for i in xrange(0, n, width)]) |
paulb@200 | 69 | |
paulb@200 | 70 | def make_random_fasta(id_, desc, table, n): |
paulb@200 | 71 | print ">%s %s" % (id_, desc) |
paulb@200 | 72 | width = 60 |
paulb@200 | 73 | chunk = 1 * width |
paulb@200 | 74 | _gr = gen_random |
paulb@200 | 75 | _table = make_cumulative(table) |
paulb@200 | 76 | probs = [t[0] for t in _table] |
paulb@200 | 77 | chars = [t[1] for t in _table] |
paulb@200 | 78 | _bisect = bisect.bisect |
paulb@200 | 79 | r = range(width) |
paulb@200 | 80 | for j in xrange(n // width): |
paulb@200 | 81 | print "".join([chars[_bisect(probs, _gr(1.0))] for i in r]) |
paulb@200 | 82 | if n % width: |
paulb@200 | 83 | print "".join([chars[_bisect(probs, _gr(1.0))] for i in xrange(n % width)]) |
paulb@200 | 84 | |
paulb@200 | 85 | def main(): |
paulb@200 | 86 | try: |
paulb@200 | 87 | n = int(sys.argv[1]) |
paulb@200 | 88 | except: |
paulb@200 | 89 | print "Usage: %s <N>" % sys.argv[0] |
paulb@200 | 90 | make_repeat_fasta('ONE', 'Homo sapiens alu', alu, n*2) |
paulb@200 | 91 | make_random_fasta('TWO', 'IUB ambiguity codes', iub, n*3) |
paulb@200 | 92 | make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, n*5) |
paulb@200 | 93 | |
paulb@200 | 94 | main() |