Commit 5950b8bd authored by HankG's avatar HankG

Straight port of the fasta test

parent e642b019
/*
* The Computer Language Benchmarks Game
* http://benchmarksgame.alioth.debian.org/
*
* Based on Java (4) implementation modified by
* Mehmet D. AKIN and Rikard Mustajärvi
*
* Contributed by Hank Grabowski
*/
import java.io.IOException
import java.io.OutputStream
internal object fasta {
val IM = 139968
val IA = 3877
val IC = 29573
val LINE_LENGTH = 60
val BUFFER_SIZE = (LINE_LENGTH + 1) * 1024 // add 1 for '\n'
// Weighted selection from alphabet
// Weighted selection from alphabet
var ALU =
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" +
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" +
"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" +
"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" +
"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" +
"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" +
"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
private val IUB = FloatProbFreq(
byteArrayOf('a'.toByte(), 'c'.toByte(), 'g'.toByte(), 't'.toByte(), 'B'.toByte(), 'D'.toByte(),
'H'.toByte(), 'K'.toByte(), 'M'.toByte(), 'N'.toByte(), 'R'.toByte(), 'S'.toByte(),
'V'.toByte(), 'W'.toByte(), 'Y'.toByte()),
doubleArrayOf(0.27, 0.12, 0.12, 0.27, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02)
)
private val HOMO_SAPIENS = FloatProbFreq(
byteArrayOf('a'.toByte(), 'c'.toByte(), 'g'.toByte(), 't'.toByte()),
doubleArrayOf(0.3029549426680, 0.1979883004921, 0.1975473066391, 0.3015094502008)
)
@Throws(IOException::class)
fun makeRandomFasta(id: String, desc: String,
fpf: FloatProbFreq, nChars: Int, writer: OutputStream) {
var nChars = nChars
val LINE_LENGTH = fasta.LINE_LENGTH
val BUFFER_SIZE = fasta.BUFFER_SIZE
val buffer = ByteArray(BUFFER_SIZE)
if (buffer.size % (LINE_LENGTH + 1) != 0) {
throw IllegalStateException(
"buffer size must be a multiple of " + "line length (including line break)")
}
val descStr = ">$id $desc\n"
writer.write(descStr.toByteArray())
var bufferIndex = 0
while (nChars > 0) {
val chunkSize: Int
if (nChars >= LINE_LENGTH) {
chunkSize = LINE_LENGTH
} else {
chunkSize = nChars
}
if (bufferIndex == BUFFER_SIZE) {
writer.write(buffer, 0, bufferIndex)
bufferIndex = 0
}
bufferIndex = fpf
.selectRandomIntoBuffer(buffer, bufferIndex, chunkSize)
buffer[bufferIndex++] = '\n'.toByte()
nChars -= chunkSize
}
writer.write(buffer, 0, bufferIndex)
}
@Throws(IOException::class)
fun makeRepeatFasta(
id: String, desc: String, alu: String,
nChars: Int, writer: OutputStream) {
var nChars = nChars
val aluBytes = alu.toByteArray()
var aluIndex = 0
val LINE_LENGTH = fasta.LINE_LENGTH
val BUFFER_SIZE = fasta.BUFFER_SIZE
val buffer = ByteArray(BUFFER_SIZE)
if (buffer.size % (LINE_LENGTH + 1) != 0) {
throw IllegalStateException(
"buffer size must be a multiple " + "of line length (including line break)")
}
val descStr = ">$id $desc\n"
writer.write(descStr.toByteArray())
var bufferIndex = 0
while (nChars > 0) {
val chunkSize: Int
if (nChars >= LINE_LENGTH) {
chunkSize = LINE_LENGTH
} else {
chunkSize = nChars
}
if (bufferIndex == BUFFER_SIZE) {
writer.write(buffer, 0, bufferIndex)
bufferIndex = 0
}
for (i in 0..chunkSize - 1) {
if (aluIndex == aluBytes.size) {
aluIndex = 0
}
buffer[bufferIndex++] = aluBytes[aluIndex++]
}
buffer[bufferIndex++] = '\n'.toByte()
nChars -= chunkSize
}
writer.write(buffer, 0, bufferIndex)
}
@Throws(IOException::class)
@JvmStatic
fun main(args: Array<String>) {
var n = 1000
// int n = 25000000;
if (args.size > 0) {
n = Integer.parseInt(args[0])
}
val out = System.out
makeRepeatFasta("ONE", "Homo sapiens alu", ALU, n * 2, out)
makeRandomFasta("TWO", "IUB ambiguity codes", IUB, n * 3, out)
makeRandomFasta("THREE", "Homo sapiens frequency", HOMO_SAPIENS, n * 5, out)
out.close()
}
class FloatProbFreq(internal val chars: ByteArray, probs: DoubleArray) {
internal val probs: FloatArray
init {
this.probs = FloatArray(probs.size)
for (i in probs.indices) {
this.probs[i] = probs[i].toFloat()
}
makeCumulative()
}
private fun makeCumulative() {
var cp = 0.0
for (i in probs.indices) {
cp += probs[i].toDouble()
probs[i] = cp.toFloat()
}
}
fun selectRandomIntoBuffer(
buffer: ByteArray, bufferIndex: Int, nRandom: Int): Int {
var bufferIndex = bufferIndex
val chars = this.chars
val probs = this.probs
val len = probs.size
outer@ for (rIndex in 0..nRandom - 1) {
val r = random(1.0f)
for (i in 0..len - 1) {
if (r < probs[i]) {
buffer[bufferIndex++] = chars[i]
continue@outer
}
}
buffer[bufferIndex++] = chars[len - 1]
}
return bufferIndex
}
companion object {
internal var last = 42
// pseudo-random number generator
fun random(max: Float): Float {
val oneOverIM = 1.0f / IM
last = (last * IA + IC) % IM
return max * last.toFloat() * oneOverIM
}
}
}
}
...@@ -70,6 +70,7 @@ onlydirs = ...@@ -70,6 +70,7 @@ onlydirs =
binarytrees binarytrees
chameneosredux chameneosredux
fannkuchredux fannkuchredux
fasta
knucleotide knucleotide
mandelbrot mandelbrot
pidigits pidigits
...@@ -134,6 +135,7 @@ make = ...@@ -134,6 +135,7 @@ make =
nbody = 10000 20000 30000 40000 50000 nbody = 10000 20000 30000 40000 50000
regexdna = 10000 regexdna = 10000
binarytrees = 1 2 5 10 15 20 binarytrees = 1 2 5 10 15 20
fasta = 1000 2000 3000 5000 8000
pidigits = 1 100 1000 2500 5000 10000 pidigits = 1 100 1000 2500 5000 10000
spectralnorm = 1000 2000 3000 5000 8000 spectralnorm = 1000 2000 3000 5000 8000
......
Revised BSD license
This is a specific instance of the Open Source Initiative (OSI) BSD license template
http://www.opensource.org/licenses/bsd-license.php
Copyright © 2004-2008 Brent Fulgham, 2005-2017 Isaac Gouy
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
Neither the name of "The Computer Language Benchmarks Game" nor the name of "The Computer Language Shootout Benchmarks" nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/*
* The Computer Language Benchmarks Game
* http://benchmarksgame.alioth.debian.org/
*
* modified by Mehmet D. AKIN
* modified by Rikard Mustajärvi
*/
import java.io.IOException;
import java.io.OutputStream;
class fasta {
static final int IM = 139968;
static final int IA = 3877;
static final int IC = 29573;
static final int LINE_LENGTH = 60;
static final int BUFFER_SIZE = (LINE_LENGTH + 1)*1024; // add 1 for '\n'
// Weighted selection from alphabet
public static String ALU =
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"
+ "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"
+ "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"
+ "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"
+ "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"
+ "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"
+ "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
private static final FloatProbFreq IUB = new FloatProbFreq(
new byte[]{
'a', 'c', 'g', 't',
'B', 'D', 'H', 'K',
'M', 'N', 'R', 'S',
'V', 'W', 'Y'},
new double[]{
0.27, 0.12, 0.12, 0.27,
0.02, 0.02, 0.02, 0.02,
0.02, 0.02, 0.02, 0.02,
0.02, 0.02, 0.02,
}
);
private static final FloatProbFreq HOMO_SAPIENS = new FloatProbFreq(
new byte[]{
'a',
'c',
'g',
't'},
new double[]{
0.3029549426680d,
0.1979883004921d,
0.1975473066391d,
0.3015094502008d}
);
static final void makeRandomFasta(String id, String desc,
FloatProbFreq fpf, int nChars, OutputStream writer)
throws IOException
{
final int LINE_LENGTH = fasta.LINE_LENGTH;
final int BUFFER_SIZE = fasta.BUFFER_SIZE;
byte[] buffer = new byte[BUFFER_SIZE];
if (buffer.length % (LINE_LENGTH + 1) != 0) {
throw new IllegalStateException(
"buffer size must be a multiple of " +
"line length (including line break)");
}
String descStr = ">" + id + " " + desc + '\n';
writer.write(descStr.getBytes());
int bufferIndex = 0;
while (nChars > 0) {
int chunkSize;
if (nChars >= LINE_LENGTH) {
chunkSize = LINE_LENGTH;
} else {
chunkSize = nChars;
}
if (bufferIndex == BUFFER_SIZE) {
writer.write(buffer, 0, bufferIndex);
bufferIndex = 0;
}
bufferIndex = fpf
.selectRandomIntoBuffer(buffer, bufferIndex, chunkSize);
buffer[bufferIndex++] = '\n';
nChars -= chunkSize;
}
writer.write(buffer, 0, bufferIndex);
}
static final void makeRepeatFasta(
String id, String desc, String alu,
int nChars, OutputStream writer) throws IOException
{
final byte[] aluBytes = alu.getBytes();
int aluIndex = 0;
final int LINE_LENGTH = fasta.LINE_LENGTH;
final int BUFFER_SIZE = fasta.BUFFER_SIZE;
byte[] buffer = new byte[BUFFER_SIZE];
if (buffer.length % (LINE_LENGTH + 1) != 0) {
throw new IllegalStateException(
"buffer size must be a multiple " +
"of line length (including line break)");
}
String descStr = ">" + id + " " + desc + '\n';
writer.write(descStr.getBytes());
int bufferIndex = 0;
while (nChars > 0) {
final int chunkSize;
if (nChars >= LINE_LENGTH) {
chunkSize = LINE_LENGTH;
} else {
chunkSize = nChars;
}
if (bufferIndex == BUFFER_SIZE) {
writer.write(buffer, 0, bufferIndex);
bufferIndex = 0;
}
for (int i = 0; i < chunkSize; i++) {
if (aluIndex == aluBytes.length) {
aluIndex = 0;
}
buffer[bufferIndex++] = aluBytes[aluIndex++];
}
buffer[bufferIndex++] = '\n';
nChars -= chunkSize;
}
writer.write(buffer, 0, bufferIndex);
}
public static void main(String[] args) throws IOException
{
int n = 1000;
// int n = 25000000;
if (args.length > 0) {
n = Integer.parseInt(args[0]);
}
OutputStream out = System.out;
makeRepeatFasta("ONE", "Homo sapiens alu", ALU, n * 2, out);
makeRandomFasta("TWO", "IUB ambiguity codes", IUB, n * 3, out);
makeRandomFasta("THREE", "Homo sapiens frequency", HOMO_SAPIENS, n * 5, out);
out.close();
}
public static final class FloatProbFreq {
static int last = 42;
final byte[] chars;
final float[] probs;
public FloatProbFreq(byte[] chars, double[] probs) {
this.chars = chars;
this.probs = new float[probs.length];
for (int i = 0; i < probs.length; i++) {
this.probs[i] = (float)probs[i];
}
makeCumulative();
}
private final void makeCumulative() {
double cp = 0.0;
for (int i = 0; i < probs.length; i++) {
cp += probs[i];
probs[i] = (float)cp;
}
}
public final int selectRandomIntoBuffer(
byte[] buffer, int bufferIndex, final int nRandom) {
final byte[] chars = this.chars;
final float[] probs = this.probs;
final int len = probs.length;
outer:
for (int rIndex = 0; rIndex < nRandom; rIndex++) {
final float r = random(1.0f);
for (int i = 0; i < len; i++) {
if (r < probs[i]) {
buffer[bufferIndex++] = chars[i];
continue outer;
}
}
buffer[bufferIndex++] = chars[len-1];
}
return bufferIndex;
}
// pseudo-random number generator
public static final float random(final float max) {
final float oneOverIM = (1.0f/ IM);
last = (last * IA + IC) % IM;
return max * last * oneOverIM;
}
}
}
/*
* The Computer Language Benchmarks Game
* http://benchmarksgame.alioth.debian.org/
*
* Based on Java (4) implementation modified by
* Mehmet D. AKIN and Rikard Mustajärvi
*
* Contributed by Hank Grabowski
*/
import java.io.IOException
import java.io.OutputStream
internal object fasta {
val IM = 139968
val IA = 3877
val IC = 29573
val LINE_LENGTH = 60
val BUFFER_SIZE = (LINE_LENGTH + 1) * 1024 // add 1 for '\n'
// Weighted selection from alphabet
// Weighted selection from alphabet
var ALU =
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" +
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" +
"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" +
"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" +
"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" +
"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" +
"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
private val IUB = FloatProbFreq(
byteArrayOf('a'.toByte(), 'c'.toByte(), 'g'.toByte(), 't'.toByte(), 'B'.toByte(), 'D'.toByte(),
'H'.toByte(), 'K'.toByte(), 'M'.toByte(), 'N'.toByte(), 'R'.toByte(), 'S'.toByte(),
'V'.toByte(), 'W'.toByte(), 'Y'.toByte()),
doubleArrayOf(0.27, 0.12, 0.12, 0.27, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02)
)
private val HOMO_SAPIENS = FloatProbFreq(
byteArrayOf('a'.toByte(), 'c'.toByte(), 'g'.toByte(), 't'.toByte()),
doubleArrayOf(0.3029549426680, 0.1979883004921, 0.1975473066391, 0.3015094502008)
)
@Throws(IOException::class)
fun makeRandomFasta(id: String, desc: String,
fpf: FloatProbFreq, nChars: Int, writer: OutputStream) {
var nChars = nChars
val LINE_LENGTH = fasta.LINE_LENGTH
val BUFFER_SIZE = fasta.BUFFER_SIZE
val buffer = ByteArray(BUFFER_SIZE)
if (buffer.size % (LINE_LENGTH + 1) != 0) {
throw IllegalStateException(
"buffer size must be a multiple of " + "line length (including line break)")
}
val descStr = ">$id $desc\n"
writer.write(descStr.toByteArray())
var bufferIndex = 0
while (nChars > 0) {
val chunkSize: Int
if (nChars >= LINE_LENGTH) {
chunkSize = LINE_LENGTH
} else {
chunkSize = nChars
}
if (bufferIndex == BUFFER_SIZE) {
writer.write(buffer, 0, bufferIndex)
bufferIndex = 0
}
bufferIndex = fpf
.selectRandomIntoBuffer(buffer, bufferIndex, chunkSize)
buffer[bufferIndex++] = '\n'.toByte()
nChars -= chunkSize
}
writer.write(buffer, 0, bufferIndex)
}
@Throws(IOException::class)
fun makeRepeatFasta(
id: String, desc: String, alu: String,
nChars: Int, writer: OutputStream) {
var nChars = nChars
val aluBytes = alu.toByteArray()
var aluIndex = 0
val LINE_LENGTH = fasta.LINE_LENGTH
val BUFFER_SIZE = fasta.BUFFER_SIZE
val buffer = ByteArray(BUFFER_SIZE)
if (buffer.size % (LINE_LENGTH + 1) != 0) {
throw IllegalStateException(
"buffer size must be a multiple " + "of line length (including line break)")
}
val descStr = ">$id $desc\n"
writer.write(descStr.toByteArray())
var bufferIndex = 0
while (nChars > 0) {
val chunkSize: Int
if (nChars >= LINE_LENGTH) {
chunkSize = LINE_LENGTH
} else {
chunkSize = nChars
}
if (bufferIndex == BUFFER_SIZE) {
writer.write(buffer, 0, bufferIndex)
bufferIndex = 0
}
for (i in 0..chunkSize - 1) {
if (aluIndex == aluBytes.size) {
aluIndex = 0
}
buffer[bufferIndex++] = aluBytes[aluIndex++]
}
buffer[bufferIndex++] = '\n'.toByte()
nChars -= chunkSize
}
writer.write(buffer, 0, bufferIndex)
}
@Throws(IOException::class)
@JvmStatic
fun main(args: Array<String>) {
var n = 1000
// int n = 25000000;
if (args.size > 0) {
n = Integer.parseInt(args[0])
}
val out = System.out
makeRepeatFasta("ONE", "Homo sapiens alu", ALU, n * 2, out)
makeRandomFasta("TWO", "IUB ambiguity codes", IUB, n * 3, out)
makeRandomFasta("THREE", "Homo sapiens frequency", HOMO_SAPIENS, n * 5, out)
out.close()
}
class FloatProbFreq(internal val chars: ByteArray, probs: DoubleArray) {
internal val probs: FloatArray
init {
this.probs = FloatArray(probs.size)
for (i in probs.indices) {
this.probs[i] = probs[i].toFloat()
}
makeCumulative()
}
private fun makeCumulative() {
var cp = 0.0
for (i in probs.indices) {
cp += probs[i].toDouble()
probs[i] = cp.toFloat()
}
}
fun selectRandomIntoBuffer(
buffer: ByteArray, bufferIndex: Int, nRandom: Int): Int {
var bufferIndex = bufferIndex
val chars = this.chars
val probs = this.probs
val len = probs.size
outer@ for (rIndex in 0..nRandom - 1) {
val r = random(1.0f)
for (i in 0..len - 1) {
if (r < probs[i]) {
buffer[bufferIndex++] = chars[i]
continue@outer
}
}
buffer[bufferIndex++] = chars[len - 1]
}
return bufferIndex
}
companion object {
internal var last = 42
// pseudo-random number generator
fun random(max: Float): Float {
val oneOverIM = 1.0f / IM
last = (last * IA + IC) % IM
return max * last.toFloat() * oneOverIM
}
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment