From a29eb3e38414cd252850c9c4fb356f8b2bef6fb4 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 12 Feb 2021 19:54:07 +0100 Subject: model.rb refactored, mp2d models updated --- models/mutagenicity-mp2d/download/merge.rb | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 models/mutagenicity-mp2d/download/merge.rb (limited to 'models/mutagenicity-mp2d/download/merge.rb') diff --git a/models/mutagenicity-mp2d/download/merge.rb b/models/mutagenicity-mp2d/download/merge.rb new file mode 100644 index 0000000..51f2f5e --- /dev/null +++ b/models/mutagenicity-mp2d/download/merge.rb @@ -0,0 +1,23 @@ +#!/usr/bin/env ruby + + # merge datasets + hansen = Dataset.from_csv_file File.join(parts,"hansen.csv") + efsa = Dataset.from_csv_file File.join(parts,"efsa.csv") + kazius = Dataset.from_sdf_file File.join(parts,"cas_4337.sdf") + datasets = [hansen,efsa,kazius] + map = {"mutagen" => "mutagenic", "nonmutagen" => "non-mutagenic"} + dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,nil,map], keep_original_features: false, remove_duplicates: true + dataset.merged_features.first.name = "Mutagenicity" + File.open(File.join(DATA,"Mutagenicity-Salmonella_typhimurium.csv"),"w+"){|f| f.puts dataset.to_training_csv} + meta = { + :species => "Salmonella typhimurium", + :endpoint => "Mutagenicity", + :source => [kazius_url,hansen_url,efsa_url].join(", "), + :qmrf => { "group": "QMRF 4.10. Mutagenicity", "name": "OECD 471 Bacterial Reverse Mutation Test"}, + } + File.open(File.join(DATA,"Mutagenicity-Salmonella_typhimurium.json"),"w+"){|f| f.puts meta.to_json} + + # cleanup + datasets << dataset + datasets.each{|d| d.delete } + File.join(DATA,"Mutagenicity-Salmonella_typhimurium.csv") -- cgit v1.2.3