summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordavor <vorgrimmlerdavid@gmx.de>2011-11-25 09:09:00 +0100
committerdavor <vorgrimmlerdavid@gmx.de>2011-11-25 09:09:00 +0100
commitee226c837a6457e32b63cbbda40424d986b42bf6 (patch)
treed4a8e4215676d7c8fb6a9da6efbb1dbdaf371efa
parent00a79f3fce848587d2fd8bfdde2ac9d597b41214 (diff)
Added scripts for crossvalidation with new param pc_type.
-rw-r--r--5x_cv/5x_crossvalidation.rb60
-rw-r--r--5x_cv/dataset_config13
-rw-r--r--5x_cv/factors_config4
-rw-r--r--5x_cv/logs/cv_am.rb170
-rwxr-xr-x5x_cv/wrapper5cv.sh38
5 files changed, 285 insertions, 0 deletions
diff --git a/5x_cv/5x_crossvalidation.rb b/5x_cv/5x_crossvalidation.rb
new file mode 100644
index 0000000..29ecc28
--- /dev/null
+++ b/5x_cv/5x_crossvalidation.rb
@@ -0,0 +1,60 @@
+# Do a five times 10-fold crossvalidation
+# # Author: Andreas Maunz, David Vorgrimmler
+# # @params: CSV-File, Method (LAST, BBRC), Minimum Frequency
+
+require 'rubygems'
+require 'opentox-ruby'
+require 'lib/cv_am.rb'
+
+subjectid = nil
+
+if ARGV.size != 1
+ puts
+ puts "Error! Arguments: <algorithm_params> in the form p1=v1;p2=v2;...;pn=vn"
+ exit 1
+end
+
+# Arguments for lib/cv.rb: file_or_dataset_uri feature_generation min_frequency min_chisq_significance backbone stratified random_seed prediction_algorithm local_svm_kernel nr_hits conf_stdev pc_type
+position_mapper={
+ "dataset_uri" => 0,
+ "feature_generation_uri" => 1,
+ "min_frequency" => 2,
+ "min_chisq_significance" => 3,
+ "backbone" => 4,
+ "stratified" => 5,
+ "random_seed" => 6,
+ "prediction_algorithm" => 7,
+ "local_svm_kernel" => 8,
+ "nr_hits" => 9,
+ "conf_stdev" => 10,
+ "pc_type" => 11
+}
+
+param_str=$ARGV[0]
+puts param_str
+params = Array.new(position_mapper.size,"")
+param_str.split(";").each { |param|
+ k,v = param.split("=")
+ params[position_mapper[k]] = v
+}
+params[5]="false" # stratified
+
+exception_config = YAML.load_file("exceptions_config.yaml")
+if ! exception_config[params[0]].nil?
+ exception_config[params[0]].each { |k,v|
+ puts "Setting exception: #{k} => #{v}"
+ params[position_mapper[k]] = v
+ }
+end
+
+i=1
+#for i in 1..5
+ begin
+ puts
+ puts "Round #{i.to_s}."
+ params[6]=i # random seed
+ cv(params)
+ rescue Exception => e
+ puts "Error in 5xCV: #{e.message}: #{e.backtrace}"
+ end
+#end
diff --git a/5x_cv/dataset_config b/5x_cv/dataset_config
new file mode 100644
index 0000000..b8bb45e
--- /dev/null
+++ b/5x_cv/dataset_config
@@ -0,0 +1,13 @@
+#CPDBAS_v5d_20Nov2008_rat_TD50
+http://toxcreate3.in-silico.ch:8080/dataset/1408
+#CPDBAS_v5d_20Nov2008_mouse_TD50
+http://toxcreate3.in-silico.ch:8080/dataset/1384
+#EPA v4b Fathead Minnow Acute Toxicity LC50_mmol
+#http://toxcreate3.in-silico.ch:8080/dataset/2093
+#MultiCellCall: DSSTox Carcinogenic Potency DBS MultiCellCall_no_duplicates.csv
+#http://toxcreate3.in-silico.ch:8080/dataset/130
+#Bloodbarr: bloodbarr_no_duplicate.csv
+#http://toxcreate3.in-silico.ch:8080/dataset/271
+#Salmonella Mutagenicity: DSSTox Carcinogenic Potency DBS Mutagenicity_no_duplicates.csv
+#http://toxcreate3.in-silico.ch:8080/dataset/233
+
diff --git a/5x_cv/factors_config b/5x_cv/factors_config
new file mode 100644
index 0000000..5ea1c57
--- /dev/null
+++ b/5x_cv/factors_config
@@ -0,0 +1,4 @@
+feature_generation_uri=http://toxcreate3.in-silico.ch:8082/algorithm/fminer/bbrc;prediction_algorithm=local_mlr_prop;pc_type=electronic
+feature_generation_uri=http://toxcreate3.in-silico.ch:8082/algorithm/fminer/bbrc;prediction_algorithm=local_mlr_prop;pc_type=topological
+feature_generation_uri=http://toxcreate3.in-silico.ch:8082/algorithm/fminer/bbrc;prediction_algorithm=local_mlr_prop;pc_type=geometrical
+feature_generation_uri=http://toxcreate3.in-silico.ch:8082/algorithm/fminer/bbrc;prediction_algorithm=local_mlr_prop;pc_type=constitutional
diff --git a/5x_cv/logs/cv_am.rb b/5x_cv/logs/cv_am.rb
new file mode 100644
index 0000000..86b1689
--- /dev/null
+++ b/5x_cv/logs/cv_am.rb
@@ -0,0 +1,170 @@
+# Do a 10-fold crossvalidation with mutiple datasets
+# Author: Andreas Maunz, David Vorgrimmler
+# @params: CSV-File, Method (LAST, BBRC), Minimum Frequency
+
+def cv (args)
+
+ #subjectid = OpenTox::Authorization.authenticate("guest","guest")
+ subjectid = nil
+
+ if args.size != 12
+ puts
+ puts "Error! Arguments: file_or_dataset_uri feature_generation min_frequency min_chisq_significance backbone stratified random_seed prediction_algorithm local_svm_kernel nr_hits conf_stdev pc_type"
+ exit 1
+ end
+
+ reg=/^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(:[0-9]{1,5})?(\/.*)?$/ix
+
+ file=args[0]
+
+
+ # dataset_is_uri=false
+ # if reg.match(file)? true : false
+ # #file.include? "http"
+ # puts "Uri is valid"
+ dataset_is_uri=true
+ # files = [ file ]
+ # elsif ! File.exists? file
+ # puts "File #{file} missing"
+ # exit 1
+ # end
+
+ # if args[1].to_s != "last" && args[1].to_s != "bbrc"
+ if !(args[1].to_s.include? "/algorithm/fminer/bbrc") && !(args[1].to_s.include? "/algorithm/fminer/last")
+ puts "feature_generation_uri must contain '/algorithm/fminer/last' or '/algorithm/fminer/bbrc'"
+ # puts "feature_generation must be 'last' or 'bbrc'"
+ exit 1
+ end
+
+ if ! args[2] == ""
+ if args[2].to_i < 2
+ puts "min_frequency must be at least 2 or \"\""
+ exit 1
+ end
+ end
+
+ if ! args[3] == ""
+ if ! (args[3].to_f <= 1.0 && args[3].to_f >= 0.0)
+ puts "min_chisq_significance must be between 0 and 1 or \"\""
+ exit 1
+ end
+ end
+
+ if ! args[4] == ""
+ if args[4].to_s != "true" && args[4].to_s != "false"
+ puts "backbone must be 'true' or 'false'."
+ exit 1
+ end
+ end
+
+
+ if args[5].to_s != "true" && args[5].to_s != "false"
+ puts "stratified must be 'true' or 'false'"
+ exit 1
+ end
+
+ if ! args[6] == ""
+ if ! (args[6].to_i <= 1)
+ puts "random_seed must be a natural number or \"\""
+ exit 1
+ end
+ end
+
+ if ! args[7] == ""
+ if ! (args[7] == "local_svm_classification")
+ puts "lazar_prediction_method must be \"local_svm_classification\""
+ exit 1
+ end
+ end
+
+ if ! args[8] == ""
+ if ! (args[8] == "weighted_tanimoto" || args[8] == "propositionalized")
+ puts "local_svm_kernel must be \"weighted_tanimoto\" or \"propositionalized\""
+ exit 1
+ end
+ end
+
+ if ! args[9] == ""
+ if ! (args[9] == "true")
+ puts "nr_hits must be \"true\""
+ exit 1
+ end
+ end
+
+ if ! args[10] == ""
+ if ! (args[10] == "true")
+ puts "conf_stdev must be \"true\""
+ exit 1
+ end
+ end
+
+ if ! args[11] == ""
+ if ! (args[11] == "electronic" || args[11] == "geometrical" || args[11] == "topological" || args[11] == "constitutional")
+ puts "pc_type must be \"electronic\", \"geometrical\", \"topological\" or \"constitutional\""
+ exit 1
+ end
+ end
+
+
+ #if !dataset_is_uri
+ # # Upload a dataset
+ # training_dataset = OpenTox::Dataset.create_from_csv_file(file, subjectid)
+ # prediction_feature = training_dataset.features.keys[0]
+ # training_dataset_uri=training_dataset.uri
+ # puts prediction_feature
+ #else
+ training_dataset_uri=file
+ puts training_dataset_uri
+ prediction_feature = OpenTox::Dataset.find(training_dataset_uri).features.keys.first
+ puts prediction_feature
+ # end
+ puts training_dataset_uri
+
+
+ # Crossvalidation
+ # @param [Hash] params (required:algorithm_uri,dataset_uri,prediction_feature, optional:algorithm_params,num_folds(10),random_seed(1),stratified(false))
+ alg_params = "feature_generation_uri=#{args[1]}";
+ alg_params = alg_params << ";min_frequency=#{args[2]}" unless args[2]==""
+ alg_params = alg_params << ";min_chisq_significance=#{args[3]}" unless args[3]==""
+ alg_params = alg_params << ";backbone=#{args[4]}" unless args[4]==""
+ alg_params = alg_params << ";prediction_algorithm=#{args[7]}" unless args[7]==""
+ alg_params = alg_params << ";local_svm_kernel=#{args[8]}" unless args[8]==""
+ alg_params = alg_params << ";nr_hits=#{args[9]}" unless args[9]==""
+ alg_params = alg_params << ";conf_stdev=#{args[10]}" unless args[10]==""
+ alg_params = alg_params << ";pc_type=#{args[11]}" unless args[10]==""
+
+ stratified_param = args[5]
+ random_seed_param = args[6]
+
+ cv_args = {:dataset_uri => training_dataset_uri, :prediction_feature => prediction_feature, :algorithm_uri => args[1].split('fminer')[0] + "lazar", :algorithm_params => alg_params, :stratified => stratified_param }
+ cv_args[:random_seed] = random_seed_param unless random_seed_param == ""
+ puts file
+ puts cv_args.to_yaml
+ puts
+ begin
+ lazar_single_args = {}
+ lazar_single_args[:feature_generation_uri] = "#{args[1]}";
+ lazar_single_args[:min_frequency] = args[2] unless args[2]==""
+ lazar_single_args[:min_chisq_significance] = args[3] unless args[3]==""
+ lazar_single_args[:backbone] = args[4] unless args[4]==""
+ lazar_single_args[:prediction_algorithm] = args[7] unless args[7]==""
+ lazar_single_args[:local_svm_kernel] = args[8] unless args[8]==""
+ lazar_single_args[:nr_hits] = args[9] unless args[9]==""
+ lazar_single_args[:conf_stdev] = args[10] unless args[10]==""
+ lazar_single_args[:pc_type] = args[11] unless args[10]==""
+ #m = OpenTox::Algorithm::Lazar.new.run({:dataset_uri => training_dataset_uri, :subjectid => subjectid}.merge lazar_single_args ).to_s
+ #puts m
+ cv = OpenTox::Crossvalidation.create(cv_args).uri
+ puts cv
+ cvr = OpenTox::CrossvalidationReport.create( cv , subjectid).uri
+ puts cvr
+ #qmrfr = OpenTox::QMRFReport.create(m).uri
+ #puts qmrfr
+ #cv_stat = OpenTox::Validation.from_cv_statistics( cv, subjectid )
+ #puts cv_stat.metadata.to_yaml
+ #[ cv_stat, training_dataset_uri ]
+ rescue Exception => e
+ puts "cv failed: #{e.message} #{e.backtrace}"
+ end
+
+end
diff --git a/5x_cv/wrapper5cv.sh b/5x_cv/wrapper5cv.sh
new file mode 100755
index 0000000..a122869
--- /dev/null
+++ b/5x_cv/wrapper5cv.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# Wrapper Skript for CV
+# Set Factors, Datasets, Exceptions in the respective config_files
+# AM,DV 2011
+
+if [ $# -lt 2 ]; then
+ echo "Usage: $0 factors datasets"
+ exit
+fi
+
+# Configure basics
+source $HOME/.bash_aliases
+otconfig
+THIS_DATE=`date +%Y%m%d_%H_`
+FACTORS="$1"
+DATASETS="$2"
+
+# Don't start when running
+while ps x | grep 5x | grep -v grep >/dev/null 2>&1; do sleep 3; done
+
+LOGFILE="$THIS_DATE""$USER""_wrapper5cv.log"
+rm "$LOGFILE" >/dev/null 2>&1
+
+cat $DATASETS | while read dataset_uri; do
+ if ! [[ "$dataset_uri" =~ "#" ]]; then # allow comments
+ cat $FACTORS | while read factor; do
+ if ! [[ "$factor" =~ "#" ]]; then # allow comments
+ echo "${THIS_DATE}: $factor" >> $LOGFILE>&1
+ factor="$factor;dataset_uri=$dataset_uri"
+ echo "ruby 5x_crossvalidation.rb $factor" >> $LOGFILE 2>&1
+ ruby 5x_crossvalidation.rb $factor >> $LOGFILE 2>&1
+ fi
+ done
+ else
+ echo >> $LOGFILE 2>&1
+ echo $dataset_uri >> $LOGFILE 2>&1
+ fi
+done