diff options
author | gebele <gebele@in-silico.ch> | 2017-02-07 14:05:17 +0000 |
---|---|---|
committer | gebele <gebele@in-silico.ch> | 2017-02-07 14:05:17 +0000 |
commit | d8b69f89bf99a62aa15a2b828b63fe4ccfd8e9ad (patch) | |
tree | 28cf8bd689ba43d012cf678fde34ebeafdddf589 /scripts |
re-init
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/bbp2blood-brain-barrier.rb | 18 | ||||
-rw-r--r-- | scripts/modena2lazar.rb | 31 |
2 files changed, 49 insertions, 0 deletions
diff --git a/scripts/bbp2blood-brain-barrier.rb b/scripts/bbp2blood-brain-barrier.rb new file mode 100644 index 0000000..aab8872 --- /dev/null +++ b/scripts/bbp2blood-brain-barrier.rb @@ -0,0 +1,18 @@ +require_relative '../../lazar/lib/lazar' +include OpenTox + +CSV.open("../classification/blood-brain-barrier.csv", "wb") do |csv| + CSV.read("../classification/bbp2.smi",{:col_sep => "\t"}).each do |line| + smi = line.first + act = line[-2] + p smi, act + if act =~ /p/i + csv << [smi, "penetrating"] + elsif act =~ /n/i + csv << [smi, "nonpenetrating"] + else + p "unknown act '#{act}'" + end + end +end + diff --git a/scripts/modena2lazar.rb b/scripts/modena2lazar.rb new file mode 100644 index 0000000..801106b --- /dev/null +++ b/scripts/modena2lazar.rb @@ -0,0 +1,31 @@ +require_relative '../../lazar/lib/lazar' +include OpenTox + +descriptors = ["Material","Coating","Primary size 1st Dimension [nm]","Primary size 2nd Dimension [nm]","Aspect ratio","Surface area [m2/g]","Zeta potential [mV]","Size in situ [nm]"] + +conditions = ["Assay","Treatment time","Cell type","Serum concentration","Dispersion protocol"] +endpoints = ["EC25 (ug/ml)","EC50 (ug/ml)","slope EC50 (ug/ml)","EC25 (mm2/ml)","EC50 (mm2/ml)","slope EC50 (surface area)","EC25 (10E12 particles/ml)","EC50 (10E12 particles/ml)","slope EC50 (number)"] + +descriptor_csv = CSV.open("../regression/MODENA-descriptors.csv", "wb") +#endpoint_csvs +#endpoins.each do |e| + +#descriptor_csv << ["ID"]+descriptors +input = CSV.read("../src/MODENA-EC50_EC25.csv") +header = input.shift +descriptor_indices = descriptors.collect{|d| header.index d} +condition_indices = conditions.collect{|c| header.index c} +condition_parameters = [] +condition_counts = {} +input.each do |line| + id = line[0]+"_"+line[1].gsub(" ","_") + #descriptor_csv << [id]+descriptor_indices.collect{|i| line[i]} + conds = condition_indices.collect{|i| line[i]} + condition_parameters << conds + condition_counts[conds] ||= 0 + condition_counts[conds] += 1 +end +#descriptor_csv.close +p condition_parameters.size +p condition_parameters.uniq.size +p condition_counts.sort{|a,b| b.last <=> a.last} |