diff options
author | Christoph Helma <helma@in-silico.ch> | 2017-02-14 12:16:29 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2017-02-14 12:16:29 +0100 |
commit | 03ee143bbcdc6f97533ae9f4d3b47711fdae625b (patch) | |
tree | 45b5a10a7ccc57943b344a033896e31da1d51d4f /scripts | |
parent | 04baa2d6ddab1963759f99c87cf8f87cbd435831 (diff) |
rf models, similarity 0.1
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/create-median-correlation.rb | 6 | ||||
-rwxr-xr-x | scripts/create-test.rb | 6 | ||||
-rwxr-xr-x | scripts/create-training.rb | 6 | ||||
-rwxr-xr-x | scripts/crossvalidation.rb | 5 | ||||
-rwxr-xr-x | scripts/dataset-variability.R | 7 | ||||
-rwxr-xr-x | scripts/mazzatorta-unique-smiles.rb | 6 | ||||
-rwxr-xr-x | scripts/noael_loael2mmol.rb | 6 | ||||
-rwxr-xr-x | scripts/test-validation-results.rb | 6 | ||||
-rwxr-xr-x | scripts/test-validation.rb | 4 |
9 files changed, 31 insertions, 21 deletions
diff --git a/scripts/create-median-correlation.rb b/scripts/create-median-correlation.rb index 2b932d0..5a68be6 100755 --- a/scripts/create-median-correlation.rb +++ b/scripts/create-median-correlation.rb @@ -8,7 +8,6 @@ new = Dataset.from_csv_file File.join("data","swiss_log10.csv") common_compounds = (old.compounds & new.compounds).uniq data = [] -puts ["SMILES","mazzatorta","swiss"].join(",") common_compounds.each do |c| old_values = old.values(c,old.features.first) new_values = new.values(c,new.features.first) @@ -23,4 +22,7 @@ common_compounds.each do |c| end data.sort!{|a,b| a[1] <=> b[1]} -puts data.collect{|r| r.join ","}.join("\n") +File.open(File.join("data","median-correlation.csv","w+")) do |f| + f.puts ["SMILES","mazzatorta","swiss"].join(",") + f.puts data.collect{|r| r.join ","}.join("\n") +end diff --git a/scripts/create-test.rb b/scripts/create-test.rb index 151b5e8..9b3afbb 100755 --- a/scripts/create-test.rb +++ b/scripts/create-test.rb @@ -7,7 +7,6 @@ new = Dataset.from_csv_file File.join("data","swiss_log10.csv") common_compounds = (old.compounds & new.compounds).uniq -puts ["SMILES","-log10(LOAEL)","Dataset"].join "," data = [] common_compounds.each do |c| old_values = old.values(c,old.features.first) @@ -30,4 +29,7 @@ end data.sort!{|a,b| a[1] <=> b[1]} -puts data.collect{|r| r.join ","}.join "\n" +File.open(File.join("data","test_log10.csv","w+")) do |f| + f.puts ["SMILES","LOAEL","Dataset"].join "," + f.puts data.collect{|r| r.join ","}.join "\n" +end diff --git a/scripts/create-training.rb b/scripts/create-training.rb index 8fca3f4..2542273 100755 --- a/scripts/create-training.rb +++ b/scripts/create-training.rb @@ -7,7 +7,6 @@ new = Dataset.from_csv_file File.join("data","swiss_log10.csv") common_compounds = (old.compounds + new.compounds).uniq -puts ["SMILES","-log10(LOAEL)","Dataset"].join "," data = [] common_compounds.each do |c| old_values = old.values(c,old.features.first) @@ -30,4 +29,7 @@ end data.sort!{|a,b| a[1] <=> b[1]} -puts data.collect{|r| r.join ","}.join "\n" +File.open(File.join("data","training_log10.csv","w+")) do |f| + f.puts ["SMILES","LOAEL","Dataset"].join "," + f.puts data.collect{|r| r.join ","}.join "\n" +end diff --git a/scripts/crossvalidation.rb b/scripts/crossvalidation.rb index 9657af1..c1653dc 100755 --- a/scripts/crossvalidation.rb +++ b/scripts/crossvalidation.rb @@ -5,9 +5,8 @@ require 'yaml' name = File.basename ARGV[0], ".csv" file = File.join "data",ARGV[0] dataset = Dataset.from_csv_file file -model = Model::LazarRegression.create(training_dataset: dataset)#, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_fingerprint_regression") -#model = Model::LazarRegression.create(dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression") -#model = Model::LazarRegression.create(dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average") +#model = Model::LazarRegression.create(training_dataset: dataset)#, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_fingerprint_regression") +model = Model::LazarRegression.create(training_dataset: dataset, algorithms: { :similarity => { :min => 0.3 }}) csv_file = File.join("data",ARGV[0].sub(/.csv/,"-cv-#{ARGV[1]}.csv")) id_file = File.join("data",ARGV[0].sub(/.csv/,"-cv-#{ARGV[1]}.id")) cv = Validation::RegressionCrossValidation.create model diff --git a/scripts/dataset-variability.R b/scripts/dataset-variability.R index 65234a4..f663b65 100755 --- a/scripts/dataset-variability.R +++ b/scripts/dataset-variability.R @@ -3,6 +3,7 @@ library(ggplot2) library(grid) library(gridExtra) +if (FALSE) { m = read.csv("data/mazzatorta_log10.csv",header=T) s = read.csv("data/swiss_log10.csv",header=T) @@ -11,8 +12,6 @@ s.dupsmi = unique(s$SMILES[duplicated(s$SMILES)]) m.dup = m[m$SMILES %in% m.dupsmi,] s.dup = s[s$SMILES %in% s.dupsmi,] -#m.dup$LOAEL= -log10(m.dup$LOAEL) -#s.dup$LOAEL= -log10(s.dup$LOAEL) m.dup$SMILES <- reorder(m.dup$SMILES,m.dup$LOAEL) s.dup$SMILES <- reorder(s.dup$SMILES,s.dup$LOAEL) @@ -22,13 +21,13 @@ p2 <- ggplot(s.dup, aes(SMILES,LOAEL),ymin = min(LOAEL), ymax=max(LOAEL)) + ylab #pdf('figure/dataset-variability.pdf') #grid.arrange(p1,p2,ncol=1) #dev.off() +} data <- read.csv("data/test_log10.csv",header=T) -#data$LOAEL = -log(data$LOAEL) data$SMILES <- reorder(data$SMILES,data$LOAEL) img = ggplot(data,aes(SMILES,LOAEL,ymin = min(LOAEL), ymax=max(LOAEL),color=Dataset)) + geom_point() img <- img + ylab('-log(LOAEL mg/kg_bw/day)') + xlab('Compound') + theme(axis.text.x = element_blank()) + theme(legend.title=element_blank()) img = img + scale_fill_discrete(breaks=c("Mazzatorta", "Both", "Swiss Federal Office")) -img = img +#img = img ggsave(file='figures/dataset-variability.pdf', plot=img, width=12,height=8) diff --git a/scripts/mazzatorta-unique-smiles.rb b/scripts/mazzatorta-unique-smiles.rb index 0b6db2a..c4c9275 100755 --- a/scripts/mazzatorta-unique-smiles.rb +++ b/scripts/mazzatorta-unique-smiles.rb @@ -13,5 +13,7 @@ csv_in.each do |line| data << [c.smiles,mmol,"mazzatorta"] #if c end data.sort!{|a,b| a[1] <=> b[1]} -puts ["SMILES","LOAEL","Dataset"].join "," -puts data.collect{|row| row.join ","}.join "\n" +File.open(File.join("data","mazzatorta.csv","w+")) do |f| + f.puts ["SMILES","LOAEL","Dataset"].join "," + f.puts data.collect{|row| row.join ","}.join "\n" +end diff --git a/scripts/noael_loael2mmol.rb b/scripts/noael_loael2mmol.rb index 3d79aae..299434c 100755 --- a/scripts/noael_loael2mmol.rb +++ b/scripts/noael_loael2mmol.rb @@ -15,5 +15,7 @@ csv_in.each do |line| end end data.sort!{|a,b| a[1] <=> b[1]} -puts ["SMILES","LOAEL","Dataset"].join "," -puts data.collect{|row| row.join ","}.join "\n" +File.open(File.join("data","swiss.csv","w+")) do |f| + f.puts ["SMILES","LOAEL","Dataset"].join "," + f.puts data.collect{|row| row.join ","}.join "\n" +end diff --git a/scripts/test-validation-results.rb b/scripts/test-validation-results.rb index 2750019..7ebea89 100755 --- a/scripts/test-validation-results.rb +++ b/scripts/test-validation-results.rb @@ -5,10 +5,12 @@ include OpenTox validation = Validation::TrainTest.find File.read("data/training-test-predictions.id").chomp data = [] -puts ["SMILES","LOAEL_measured_median","LOAEL_predicted","Error","Dataset"].join(",") validation.predictions.each do |id,p| data << [Compound.find(id).smiles, p["measurements"].median, p["value"], (p["measurements"].median-p["value"]).abs,"test-prediction"] end data.sort!{|a,b| a[1] <=> b[1]} -puts data.collect{|r| r.join ","}.join("\n") +File.open(File.join("data","training-test-predictions.csv","w+")) do |f| + f.puts ["SMILES","LOAEL_measured_median","LOAEL_predicted","Error","Dataset"].join(",") + f.puts data.collect{|r| r.join ","}.join("\n") +end diff --git a/scripts/test-validation.rb b/scripts/test-validation.rb index 5c07449..8e0d6a9 100755 --- a/scripts/test-validation.rb +++ b/scripts/test-validation.rb @@ -5,6 +5,6 @@ include OpenTox test = Dataset.from_csv_file(File.join("data","test_log10.csv")) train = Dataset.from_csv_file(File.join("data","training_log10.csv")) -model = Model::LazarRegression.create(training_dataset: train) +model = Model::LazarRegression.create(training_dataset: train, algorithms: { :similarity => { :min => 0.3 }}) validation = Validation::TrainTest.create model, train, test -puts validation.id +File.open(File.join("data","training-test-predictions.id","w+")) { |f| f.puts validation.id } |