summaryrefslogtreecommitdiff
path: root/paper/loael-dataset-comparison.rb
diff options
context:
space:
mode:
Diffstat (limited to 'paper/loael-dataset-comparison.rb')
-rw-r--r--paper/loael-dataset-comparison.rb73
1 files changed, 73 insertions, 0 deletions
diff --git a/paper/loael-dataset-comparison.rb b/paper/loael-dataset-comparison.rb
new file mode 100644
index 0000000..c1602ad
--- /dev/null
+++ b/paper/loael-dataset-comparison.rb
@@ -0,0 +1,73 @@
+require_relative '../../lazar/lib/lazar'
+include OpenTox
+#$mongo.database.drop
+#$gridfs = $mongo.database.fs # recreate GridFS indexes
+old = Dataset.from_csv_file File.join(File.dirname(__FILE__),"..","regression","LOAEL_mg_corrected_smiles_mmol.csv")
+new = Dataset.from_csv_file File.join(File.dirname(__FILE__),"..","regression","swissRat_chron_LOAEL_mmol.csv")
+
+combined_compounds = old.compound_ids & new.compound_ids
+
+compound_vector = []
+value_vector = []
+dataset_vector = []
+
+old_median = []
+new_median = []
+
+errors = []
+combined_compounds.each do |cid|
+ c = Compound.find cid
+ old_values = old.values(c,old.features.first)
+ old_median << -Math.log(old_values.mean)
+ old_values.each do |v|
+ compound_vector << c.smiles
+ value_vector << -Math.log(v.to_f)
+ dataset_vector << old.name
+ end
+ new_values = new.values(c,new.features.first)
+ new_median << -Math.log(new_values.mean)
+ new_values.each do |v|
+ compound_vector << c.smiles
+ value_vector << -Math.log(v)
+ dataset_vector << new.name
+ end
+end
+old_median.each_index do |i|
+ errors[i] = (old_median[i] - new_median[i]).abs unless old_median[i] == new_median[i]
+end
+rmse = 0
+mae = 0
+errors.compact.each do |e|
+ rmse += e**2
+ mae += e
+end
+rmse = Math.sqrt(rmse/errors.size)
+mae = mae/errors.size
+
+=begin
+R.assign "smi", compound_vector
+R.assign "values", value_vector
+R.assign "dataset", dataset_vector
+R.eval "df <- data.frame(factor(smi),values,factor(dataset))"
+R.eval "df$smi <- reorder(df$factor.smi,df$values)"
+R.eval "img <- ggplot(df, aes(smi,values,ymin = min(values), ymax=max(values),color=dataset))"
+R.eval "img <- img + ylab('-log(LOAEL mg/kg_bw/day)') + xlab('Compound') + theme(axis.text.x = element_blank())"
+R.eval "img <- img + geom_point()"
+
+R.eval "ggsave(file='/home/ch/opentox/lazar-nestec-data/loael-dataset-comparison-mmol_kg_day.svg', plot=img,width=12, height=8)"
+=end
+
+R.assign "old", old_median
+R.assign "new", new_median
+=begin
+R.eval "df <- data.frame(old,new)"
+R.eval "img <- ggplot(df, aes(old,new))"
+R.eval "img <- img + geom_point()"
+#R.eval "img <- img + geom_smooth(method=lm) "
+R.eval "img <- img + geom_abline(intercept=0.0) "
+R.eval "ggsave(file='/home/ch/opentox/lazar-nestec-data/loael-dataset-correlation.svg', plot=img,width=12, height=8)"
+=end
+puts "Correlation old/new:"
+puts "\tr^2: #{R.eval("cor(old,new,use='complete')").to_f**2}"
+puts "\tRMSE: #{rmse}"
+puts "\tMAE: #{mae}"