summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2017-02-14 12:16:29 +0100
committerChristoph Helma <helma@in-silico.ch>2017-02-14 12:16:29 +0100
commit03ee143bbcdc6f97533ae9f4d3b47711fdae625b (patch)
tree45b5a10a7ccc57943b344a033896e31da1d51d4f /scripts
parent04baa2d6ddab1963759f99c87cf8f87cbd435831 (diff)
rf models, similarity 0.1
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/create-median-correlation.rb6
-rwxr-xr-xscripts/create-test.rb6
-rwxr-xr-xscripts/create-training.rb6
-rwxr-xr-xscripts/crossvalidation.rb5
-rwxr-xr-xscripts/dataset-variability.R7
-rwxr-xr-xscripts/mazzatorta-unique-smiles.rb6
-rwxr-xr-xscripts/noael_loael2mmol.rb6
-rwxr-xr-xscripts/test-validation-results.rb6
-rwxr-xr-xscripts/test-validation.rb4
9 files changed, 31 insertions, 21 deletions
diff --git a/scripts/create-median-correlation.rb b/scripts/create-median-correlation.rb
index 2b932d0..5a68be6 100755
--- a/scripts/create-median-correlation.rb
+++ b/scripts/create-median-correlation.rb
@@ -8,7 +8,6 @@ new = Dataset.from_csv_file File.join("data","swiss_log10.csv")
common_compounds = (old.compounds & new.compounds).uniq
data = []
-puts ["SMILES","mazzatorta","swiss"].join(",")
common_compounds.each do |c|
old_values = old.values(c,old.features.first)
new_values = new.values(c,new.features.first)
@@ -23,4 +22,7 @@ common_compounds.each do |c|
end
data.sort!{|a,b| a[1] <=> b[1]}
-puts data.collect{|r| r.join ","}.join("\n")
+File.open(File.join("data","median-correlation.csv","w+")) do |f|
+ f.puts ["SMILES","mazzatorta","swiss"].join(",")
+ f.puts data.collect{|r| r.join ","}.join("\n")
+end
diff --git a/scripts/create-test.rb b/scripts/create-test.rb
index 151b5e8..9b3afbb 100755
--- a/scripts/create-test.rb
+++ b/scripts/create-test.rb
@@ -7,7 +7,6 @@ new = Dataset.from_csv_file File.join("data","swiss_log10.csv")
common_compounds = (old.compounds & new.compounds).uniq
-puts ["SMILES","-log10(LOAEL)","Dataset"].join ","
data = []
common_compounds.each do |c|
old_values = old.values(c,old.features.first)
@@ -30,4 +29,7 @@ end
data.sort!{|a,b| a[1] <=> b[1]}
-puts data.collect{|r| r.join ","}.join "\n"
+File.open(File.join("data","test_log10.csv","w+")) do |f|
+ f.puts ["SMILES","LOAEL","Dataset"].join ","
+ f.puts data.collect{|r| r.join ","}.join "\n"
+end
diff --git a/scripts/create-training.rb b/scripts/create-training.rb
index 8fca3f4..2542273 100755
--- a/scripts/create-training.rb
+++ b/scripts/create-training.rb
@@ -7,7 +7,6 @@ new = Dataset.from_csv_file File.join("data","swiss_log10.csv")
common_compounds = (old.compounds + new.compounds).uniq
-puts ["SMILES","-log10(LOAEL)","Dataset"].join ","
data = []
common_compounds.each do |c|
old_values = old.values(c,old.features.first)
@@ -30,4 +29,7 @@ end
data.sort!{|a,b| a[1] <=> b[1]}
-puts data.collect{|r| r.join ","}.join "\n"
+File.open(File.join("data","training_log10.csv","w+")) do |f|
+ f.puts ["SMILES","LOAEL","Dataset"].join ","
+ f.puts data.collect{|r| r.join ","}.join "\n"
+end
diff --git a/scripts/crossvalidation.rb b/scripts/crossvalidation.rb
index 9657af1..c1653dc 100755
--- a/scripts/crossvalidation.rb
+++ b/scripts/crossvalidation.rb
@@ -5,9 +5,8 @@ require 'yaml'
name = File.basename ARGV[0], ".csv"
file = File.join "data",ARGV[0]
dataset = Dataset.from_csv_file file
-model = Model::LazarRegression.create(training_dataset: dataset)#, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_fingerprint_regression")
-#model = Model::LazarRegression.create(dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression")
-#model = Model::LazarRegression.create(dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average")
+#model = Model::LazarRegression.create(training_dataset: dataset)#, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_fingerprint_regression")
+model = Model::LazarRegression.create(training_dataset: dataset, algorithms: { :similarity => { :min => 0.3 }})
csv_file = File.join("data",ARGV[0].sub(/.csv/,"-cv-#{ARGV[1]}.csv"))
id_file = File.join("data",ARGV[0].sub(/.csv/,"-cv-#{ARGV[1]}.id"))
cv = Validation::RegressionCrossValidation.create model
diff --git a/scripts/dataset-variability.R b/scripts/dataset-variability.R
index 65234a4..f663b65 100755
--- a/scripts/dataset-variability.R
+++ b/scripts/dataset-variability.R
@@ -3,6 +3,7 @@ library(ggplot2)
library(grid)
library(gridExtra)
+if (FALSE) {
m = read.csv("data/mazzatorta_log10.csv",header=T)
s = read.csv("data/swiss_log10.csv",header=T)
@@ -11,8 +12,6 @@ s.dupsmi = unique(s$SMILES[duplicated(s$SMILES)])
m.dup = m[m$SMILES %in% m.dupsmi,]
s.dup = s[s$SMILES %in% s.dupsmi,]
-#m.dup$LOAEL= -log10(m.dup$LOAEL)
-#s.dup$LOAEL= -log10(s.dup$LOAEL)
m.dup$SMILES <- reorder(m.dup$SMILES,m.dup$LOAEL)
s.dup$SMILES <- reorder(s.dup$SMILES,s.dup$LOAEL)
@@ -22,13 +21,13 @@ p2 <- ggplot(s.dup, aes(SMILES,LOAEL),ymin = min(LOAEL), ymax=max(LOAEL)) + ylab
#pdf('figure/dataset-variability.pdf')
#grid.arrange(p1,p2,ncol=1)
#dev.off()
+}
data <- read.csv("data/test_log10.csv",header=T)
-#data$LOAEL = -log(data$LOAEL)
data$SMILES <- reorder(data$SMILES,data$LOAEL)
img = ggplot(data,aes(SMILES,LOAEL,ymin = min(LOAEL), ymax=max(LOAEL),color=Dataset)) + geom_point()
img <- img + ylab('-log(LOAEL mg/kg_bw/day)') + xlab('Compound') + theme(axis.text.x = element_blank()) + theme(legend.title=element_blank())
img = img + scale_fill_discrete(breaks=c("Mazzatorta", "Both", "Swiss Federal Office"))
-img = img
+#img = img
ggsave(file='figures/dataset-variability.pdf', plot=img, width=12,height=8)
diff --git a/scripts/mazzatorta-unique-smiles.rb b/scripts/mazzatorta-unique-smiles.rb
index 0b6db2a..c4c9275 100755
--- a/scripts/mazzatorta-unique-smiles.rb
+++ b/scripts/mazzatorta-unique-smiles.rb
@@ -13,5 +13,7 @@ csv_in.each do |line|
data << [c.smiles,mmol,"mazzatorta"] #if c
end
data.sort!{|a,b| a[1] <=> b[1]}
-puts ["SMILES","LOAEL","Dataset"].join ","
-puts data.collect{|row| row.join ","}.join "\n"
+File.open(File.join("data","mazzatorta.csv","w+")) do |f|
+ f.puts ["SMILES","LOAEL","Dataset"].join ","
+ f.puts data.collect{|row| row.join ","}.join "\n"
+end
diff --git a/scripts/noael_loael2mmol.rb b/scripts/noael_loael2mmol.rb
index 3d79aae..299434c 100755
--- a/scripts/noael_loael2mmol.rb
+++ b/scripts/noael_loael2mmol.rb
@@ -15,5 +15,7 @@ csv_in.each do |line|
end
end
data.sort!{|a,b| a[1] <=> b[1]}
-puts ["SMILES","LOAEL","Dataset"].join ","
-puts data.collect{|row| row.join ","}.join "\n"
+File.open(File.join("data","swiss.csv","w+")) do |f|
+ f.puts ["SMILES","LOAEL","Dataset"].join ","
+ f.puts data.collect{|row| row.join ","}.join "\n"
+end
diff --git a/scripts/test-validation-results.rb b/scripts/test-validation-results.rb
index 2750019..7ebea89 100755
--- a/scripts/test-validation-results.rb
+++ b/scripts/test-validation-results.rb
@@ -5,10 +5,12 @@ include OpenTox
validation = Validation::TrainTest.find File.read("data/training-test-predictions.id").chomp
data = []
-puts ["SMILES","LOAEL_measured_median","LOAEL_predicted","Error","Dataset"].join(",")
validation.predictions.each do |id,p|
data << [Compound.find(id).smiles, p["measurements"].median, p["value"], (p["measurements"].median-p["value"]).abs,"test-prediction"]
end
data.sort!{|a,b| a[1] <=> b[1]}
-puts data.collect{|r| r.join ","}.join("\n")
+File.open(File.join("data","training-test-predictions.csv","w+")) do |f|
+ f.puts ["SMILES","LOAEL_measured_median","LOAEL_predicted","Error","Dataset"].join(",")
+ f.puts data.collect{|r| r.join ","}.join("\n")
+end
diff --git a/scripts/test-validation.rb b/scripts/test-validation.rb
index 5c07449..8e0d6a9 100755
--- a/scripts/test-validation.rb
+++ b/scripts/test-validation.rb
@@ -5,6 +5,6 @@ include OpenTox
test = Dataset.from_csv_file(File.join("data","test_log10.csv"))
train = Dataset.from_csv_file(File.join("data","training_log10.csv"))
-model = Model::LazarRegression.create(training_dataset: train)
+model = Model::LazarRegression.create(training_dataset: train, algorithms: { :similarity => { :min => 0.3 }})
validation = Validation::TrainTest.create model, train, test
-puts validation.id
+File.open(File.join("data","training-test-predictions.id","w+")) { |f| f.puts validation.id }