1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/tests/shootout/fasta.py Sun Feb 18 01:39:11 2007 +0100
1.3 @@ -0,0 +1,94 @@
1.4 +#!/usr/bin/env python
1.5 +
1.6 +# The Great Computer Language Shootout
1.7 +# http://shootout.alioth.debian.org/
1.8 +#
1.9 +# modified by Ian Osgood
1.10 +
1.11 +import sys
1.12 +import itertools
1.13 +import bisect
1.14 +
1.15 +def gen_random(max_, last=[42], ia=3877, ic=29573, im=139968):
1.16 + last[0] = (last[0] * ia + ic) % im
1.17 + return (max_ * last[0]) / im
1.18 +
1.19 +def make_cumulative(table):
1.20 + l = []
1.21 + prob = 0.0
1.22 + for char, p in table:
1.23 + prob += p
1.24 + l.append((prob, char))
1.25 + return l
1.26 +
1.27 +alu = (
1.28 + "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"
1.29 + "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"
1.30 + "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"
1.31 + "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"
1.32 + "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"
1.33 + "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"
1.34 + "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
1.35 +)
1.36 +
1.37 +iub = [
1.38 + ("a", 0.27),
1.39 + ("c", 0.12),
1.40 + ("g", 0.12),
1.41 + ("t", 0.27),
1.42 +
1.43 + ("B", 0.02),
1.44 + ("D", 0.02),
1.45 + ("H", 0.02),
1.46 + ("K", 0.02),
1.47 + ("M", 0.02),
1.48 + ("N", 0.02),
1.49 + ("R", 0.02),
1.50 + ("S", 0.02),
1.51 + ("V", 0.02),
1.52 + ("W", 0.02),
1.53 + ("Y", 0.02),
1.54 +]
1.55 +
1.56 +homosapiens = [
1.57 + ("a", 0.3029549426680),
1.58 + ("c", 0.1979883004921),
1.59 + ("g", 0.1975473066391),
1.60 + ("t", 0.3015094502008),
1.61 +]
1.62 +
1.63 +
1.64 +def make_repeat_fasta(id_, desc, src, n):
1.65 + print ">%s %s" % (id_, desc)
1.66 + width = 60
1.67 + l = len(src)
1.68 + s = src * (n // l)
1.69 + s += src[:n % l]
1.70 + #assert len(s) == n
1.71 + print "\n".join([s[i:i+width] for i in xrange(0, n, width)])
1.72 +
1.73 +def make_random_fasta(id_, desc, table, n):
1.74 + print ">%s %s" % (id_, desc)
1.75 + width = 60
1.76 + chunk = 1 * width
1.77 + _gr = gen_random
1.78 + _table = make_cumulative(table)
1.79 + probs = [t[0] for t in _table]
1.80 + chars = [t[1] for t in _table]
1.81 + _bisect = bisect.bisect
1.82 + r = range(width)
1.83 + for j in xrange(n // width):
1.84 + print "".join([chars[_bisect(probs, _gr(1.0))] for i in r])
1.85 + if n % width:
1.86 + print "".join([chars[_bisect(probs, _gr(1.0))] for i in xrange(n % width)])
1.87 +
1.88 +def main():
1.89 + try:
1.90 + n = int(sys.argv[1])
1.91 + except:
1.92 + print "Usage: %s <N>" % sys.argv[0]
1.93 + make_repeat_fasta('ONE', 'Homo sapiens alu', alu, n*2)
1.94 + make_random_fasta('TWO', 'IUB ambiguity codes', iub, n*3)
1.95 + make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, n*5)
1.96 +
1.97 +main()