1 #!/usr/bin/env python 2 3 # The Great Computer Language Shootout 4 # http://shootout.alioth.debian.org/ 5 # 6 # modified by Ian Osgood 7 8 import sys 9 import itertools 10 import bisect 11 12 def gen_random(max_, last=[42], ia=3877, ic=29573, im=139968): 13 last[0] = (last[0] * ia + ic) % im 14 return (max_ * last[0]) / im 15 16 def make_cumulative(table): 17 l = [] 18 prob = 0.0 19 for char, p in table: 20 prob += p 21 l.append((prob, char)) 22 return l 23 24 alu = ( 25 "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" 26 "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" 27 "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" 28 "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" 29 "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" 30 "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" 31 "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA" 32 ) 33 34 iub = [ 35 ("a", 0.27), 36 ("c", 0.12), 37 ("g", 0.12), 38 ("t", 0.27), 39 40 ("B", 0.02), 41 ("D", 0.02), 42 ("H", 0.02), 43 ("K", 0.02), 44 ("M", 0.02), 45 ("N", 0.02), 46 ("R", 0.02), 47 ("S", 0.02), 48 ("V", 0.02), 49 ("W", 0.02), 50 ("Y", 0.02), 51 ] 52 53 homosapiens = [ 54 ("a", 0.3029549426680), 55 ("c", 0.1979883004921), 56 ("g", 0.1975473066391), 57 ("t", 0.3015094502008), 58 ] 59 60 61 def make_repeat_fasta(id_, desc, src, n): 62 print ">%s %s" % (id_, desc) 63 width = 60 64 l = len(src) 65 s = src * (n // l) 66 s += src[:n % l] 67 #assert len(s) == n 68 print "\n".join([s[i:i+width] for i in xrange(0, n, width)]) 69 70 def make_random_fasta(id_, desc, table, n): 71 print ">%s %s" % (id_, desc) 72 width = 60 73 chunk = 1 * width 74 _gr = gen_random 75 _table = make_cumulative(table) 76 probs = [t[0] for t in _table] 77 chars = [t[1] for t in _table] 78 _bisect = bisect.bisect 79 r = range(width) 80 for j in xrange(n // width): 81 print "".join([chars[_bisect(probs, _gr(1.0))] for i in r]) 82 if n % width: 83 print "".join([chars[_bisect(probs, _gr(1.0))] for i in xrange(n % width)]) 84 85 def main(): 86 try: 87 n = int(sys.argv[1]) 88 except: 89 print "Usage: %s <N>" % sys.argv[0] 90 make_repeat_fasta('ONE', 'Homo sapiens alu', alu, n*2) 91 make_random_fasta('TWO', 'IUB ambiguity codes', iub, n*3) 92 make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, n*5) 93 94 main()