...
 
Commits (5)
......@@ -7,24 +7,43 @@ This project contains the different software layers of the Gesall big data platf
2. Data Partitioning schemes (with MapReduce wrappers) are in `program.{alignment|clean|md}.latest` packages.
3. Error Diagnosis programs are in `correctness.*` packages.
### Building
##### Eclipse IDE
### Automated Building
Note the path of the base directory where the gesall-core, gesall-htsjdk, gesall-picard and gesall-libs is present.
After going into the gesall-core dir simply run 'build.sh' with this path:
```
$> bash ./build.sh -dir=<path-to-your-base-dir>
```
#### Pre-requisites
To build the gesall-core code base, these things needs to be ensured first.
1. gesall-htsjdk, gesall-picard, gesall-libs folders are present parallel to gesall-core
2. gesall-htsjdk and gesall-picard needs to have been built first! The build process picks up jar files from the dist/ directory of these two modules.
3. Make sure your JAVA_HOME environment variables are setup, so as to use standard java commands (eg: jar, ant etc.)
#### What it does?
1. Replaces the path of your base directory in the Ant buils files to pickup the dependencies.
2. Uses ant-build/build_clean.xml and ant-build/build_md.xml, to build Runnable jar files for the program.clean.latest and program.md.latest module respectively.
3. Use precompiled java files present in build/build_alignment/ directory to package them into a jar file.
Note that this jar is not Runnable, it contains compressed pre-compiled java classes.
#### Output format
The dist/ direcotry contains the 3 jar files.
These can now be used with the Hadoop Infrastructure.
Note that the jar itself does *not* contain any hadoop libraries. It has to be used inside a existing Hadoop Installation.
### Building using Eclipse IDE
##### Import the code
1. Import the code from `gesall-core` repository into Eclipse.
2. Add `gesall-htsjdk` and `gesall-picard` Eclipse projects to dependencies in `Project->Properties->Java Build Path->Projects`.
3. Add all the external JAR files from `gesall-libs` into `Project->Properties->Java Build Path->Libraries`.
#### Exporting code as JAR files
##### Eclipse IDE
##### Building the jars
1. Use `File->Export->Runnable JAR` option with library handling set to `Extract required libraries into generated JAR`.
2. This will create a self-contained, fat JAR file.
3. Apache Hadoop JAR files in `gesall-libs` should be of the same version as the deployment Hadoop cluster.
##### Command line
1. There are some example `ant` build files in the `ant-build` directory.
2. These build files were generated using `Export->Ant buildfiles` option in Eclipse. But references to specific versions of Hadoop libraries were removed.
### License
Our code is released under MIT license.
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<project default="create_run_jar" name="Create Runnable Jar for Project gesall-core">
<!--this file was created by Eclipse Runnable JAR Export Wizard-->
<!--ANT 1.7 is required -->
<!--define folder properties-->
<property name="dir.buildfile" value="."/>
<property name="dir.jarfile" value="${dir.buildfile}"/>
<property name="homedir" value="HOMEDIR" />
<target name="create_run_jar">
<jar destfile="${homedir}/gesall-core/dist/program_clean.jar" filesetmanifest="mergewithoutmain">
<manifest>
<attribute name="Main-Class" value="program.clean.latest.CleanMRMain"/>
<attribute name="Class-Path" value="."/>
</manifest>
<fileset dir="${homedir}/gesall-core/bin"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-libs/testng/testng-5.5-jdk15.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-libs/kryo/kryo-2.24.0.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-libs/json/json-20140107.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-libs/jopt/jopt-simple-4.8.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-libs/guava-18.0/guava-18.0.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-libs/apache/cmdline/commons-exec-1.2.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-htsjdk/dist/htsjdk-1.120.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-picard/dist/picard-1.117.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-picard/lib/ant/bcel-5.2.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-htsjdk/lib/commons-jexl-2.1.1.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-htsjdk/lib/commons-logging-1.1.1.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-htsjdk/lib/snappy-java-1.0.3-rc3.jar"/>
</jar>
</target>
</project>
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<project default="create_run_jar" name="Create Runnable Jar for Project gesall-core">
<!--this file was created by Eclipse Runnable JAR Export Wizard-->
<!--ANT 1.7 is required -->
<!--define folder properties-->
<property name="dir.buildfile" value="."/>
<property name="dir.jarfile" value="${dir.buildfile}"/>
<property name="homedir" value="HOMEDIR" />
<target name="create_run_jar">
<jar destfile="${homedir}/gesall-core/dist/program_md.jar" filesetmanifest="mergewithoutmain">
<manifest>
<attribute name="Main-Class" value="program.md.latest.MarkDuplicatesMain"/>
<attribute name="Class-Path" value="."/>
</manifest>
<fileset dir="${homedir}/gesall-core/bin"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-libs/testng/testng-5.5-jdk15.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-libs/kryo/kryo-2.24.0.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-libs/json/json-20140107.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-libs/jopt/jopt-simple-4.8.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-libs/guava-18.0/guava-18.0.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-libs/apache/cmdline/commons-exec-1.2.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-htsjdk/dist/htsjdk-1.120.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-picard/dist/picard-1.117.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-picard/lib/ant/bcel-5.2.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-htsjdk/lib/commons-jexl-2.1.1.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-htsjdk/lib/commons-logging-1.1.1.jar"/>
<zipfileset excludes="META-INF/*.SF" src="${homedir}/gesall-htsjdk/lib/snappy-java-1.0.3-rc3.jar"/>
</jar>
</target>
</project>
#!/bin/bash
if [[ $# -eq 0 || $# -gt 1 ]]
then
echo "Correct usage ./build.sh -d=<path-to-base-dir>"
exit 1
fi
for i in "$@"
do
case $i in
-d=*|--dir=*)
homedir="${i#*=}"
gesallcore=$homedir'/gesall-core'
shift # past argument=value
;;
--default)
DEFAULT=YES
shift # past argument with no value
;;
*)
echo "Correct usage ./build.sh -d=<path-to-base-dir>"
exit 1 # unknown option
;;
esac
done
# echo 'test '$homedir
# echo 'test 1' $gesallcore
sed -i "s@HOMEDIR@$homedir@" $gesallcore/ant-build/build_clean.xml
sed -i "s@HOMEDIR@$homedir@" $gesallcore/ant-build/build_md.xml
ant -f $gesallcore/ant-build/build_clean.xml
ant -f $gesallcore/ant-build/build_md.xml
cd $gesallcore/build/build_alignment/
jar cvf $gesallcore/dist/program_aligment.jar ./
# replace back with place holder - so that in case of a mistake,
# the same script can be run again
sed -i "s@$homedir@HOMEDIR@" $gesallcore/ant-build/build_clean.xml
sed -i "s@$homedir@HOMEDIR@" $gesallcore/ant-build/build_md.xml
\ No newline at end of file