diff options
Diffstat (limited to 'paper/loael-variability.rb')
-rw-r--r-- | paper/loael-variability.rb | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/paper/loael-variability.rb b/paper/loael-variability.rb new file mode 100644 index 0000000..ae74c3f --- /dev/null +++ b/paper/loael-variability.rb @@ -0,0 +1,61 @@ +require_relative '../lazar/lib/lazar' +include OpenTox +#$mongo.database.drop +#$gridfs = $mongo.database.fs # recreate GridFS indexes +# compare duplicates within datasets +#old = Dataset.from_csv_file File.join(File.dirname(__FILE__),"regression","LOAEL_mmol_corrected_smiles.csv") +old = Dataset.from_csv_file File.join(File.dirname(__FILE__),"regression","LOAEL_mg_corrected_smiles_mmol.csv") +#new = Dataset.from_csv_file File.join(File.dirname(__FILE__),"regression","swissRat_chron_LOAEL.csv") +new = Dataset.from_csv_file File.join(File.dirname(__FILE__),"regression","swissRat_chron_LOAEL_mmol.csv") +#combined = Dataset.from_csv_file File.join(File.dirname(__FILE__),"regression","LOAEL-rat-combined.csv") + +compound_vector = [] +value_vector = [] +dataset_vector = [] + +vars = [] + +[old, new].each do |dataset| + vars << [] + #vars[dataset.name] = [] + p dataset.name + p dataset.compounds.size + p dataset.duplicates(dataset.features.first).size + dataset.duplicates.each do |cid,values| + R.assign "values", values + var = R.eval("var(-log(values))").to_f + vars.last << var + #smi = Compound.find(cid).smiles + smi = cid.to_s + values.each do |val| + compound_vector << smi + value_vector << - Math.log(val) + dataset_vector << dataset.name + end + #vars << { :var => var, :values => values, :smiles => smi } + end + #vars.sort!{|a,b| a[:var] <=> b[:var]} + #vars.each do |dup| + #dup[:values].each do |v| + #compound_vector << dup[:smiles] + #value_vector << v + #end + #end +end +#p vars +# TODO statistical test for variances +R.assign "vars1", vars[0] +R.assign "vars2", vars[1] +print "p-value: #{R.eval("t.test(vars1,vars2)$p.value").to_f}" + +R.assign "smi", compound_vector +R.assign "values", value_vector +R.assign "dataset", dataset_vector +R.eval "df <- data.frame(factor(smi),values,factor(dataset))" +R.eval "df$smi <- reorder(df$factor.smi,df$values)" +R.eval "img <- ggplot(df, aes(smi,values,ymin = min(values), ymax=max(values),color=dataset))" +R.eval "img <- img + ylab('-log(LOAEL mg/kg_bw/day)') + xlab('Compound') + theme(axis.text.x = element_blank())" +R.eval "img <- img + geom_point()" +#R.eval "img <- img + scale_x_discrete(breaks=NULL) + geom_point() + coord_flip()"# + xlab('-log(LOAEL)'), ylab('Compound')" +#R.eval "ggsave(file='/home/ch/opentox/lazar-nestec-data/loael_variance.svg', plot=img)" +R.eval "ggsave(file='/home/ch/opentox/lazar-nestec-data/loael-variance.svg', plot=img,width=12, height=8)" |