summaryrefslogtreecommitdiff
path: root/models/mutagenicity/download/merge.rb
diff options
context:
space:
mode:
Diffstat (limited to 'models/mutagenicity/download/merge.rb')
-rw-r--r--models/mutagenicity/download/merge.rb23
1 files changed, 23 insertions, 0 deletions
diff --git a/models/mutagenicity/download/merge.rb b/models/mutagenicity/download/merge.rb
new file mode 100644
index 0000000..51f2f5e
--- /dev/null
+++ b/models/mutagenicity/download/merge.rb
@@ -0,0 +1,23 @@
+#!/usr/bin/env ruby
+
+ # merge datasets
+ hansen = Dataset.from_csv_file File.join(parts,"hansen.csv")
+ efsa = Dataset.from_csv_file File.join(parts,"efsa.csv")
+ kazius = Dataset.from_sdf_file File.join(parts,"cas_4337.sdf")
+ datasets = [hansen,efsa,kazius]
+ map = {"mutagen" => "mutagenic", "nonmutagen" => "non-mutagenic"}
+ dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,nil,map], keep_original_features: false, remove_duplicates: true
+ dataset.merged_features.first.name = "Mutagenicity"
+ File.open(File.join(DATA,"Mutagenicity-Salmonella_typhimurium.csv"),"w+"){|f| f.puts dataset.to_training_csv}
+ meta = {
+ :species => "Salmonella typhimurium",
+ :endpoint => "Mutagenicity",
+ :source => [kazius_url,hansen_url,efsa_url].join(", "),
+ :qmrf => { "group": "QMRF 4.10. Mutagenicity", "name": "OECD 471 Bacterial Reverse Mutation Test"},
+ }
+ File.open(File.join(DATA,"Mutagenicity-Salmonella_typhimurium.json"),"w+"){|f| f.puts meta.to_json}
+
+ # cleanup
+ datasets << dataset
+ datasets.each{|d| d.delete }
+ File.join(DATA,"Mutagenicity-Salmonella_typhimurium.csv")