1 files changed, 0 insertions, 165 deletions
diff --git a/Rakefile b/Rakefile
deleted file mode 100644
index 42ab363..0000000
--- a/Rakefile
+++ /dev/null
@@ -1,165 +0,0 @@
-#!/usr/bin/env ruby
-require_relative '../lazar/lib/lazar.rb'
-include OpenTox
-
-#task :default => ["predictions/PA_mutagenicity.id","validations/mutagenicity-merged.id"]
-task :default => ["predictions/PA_mutagenicity.id"]
-
-# summaries
-# predictions
-
-file "predictions/PA_mutagenicity.id" => ["models/mutagenicity-merged.id", "data/PA.id"] do |t| predict t end
-file "predictions/PA_carcinogenicity.id" => ["models/carcinogenicity.id", "data/PA.id"] do |t| predict t end
-
-# validations
-
-file "validations/mutagenicity-merged.id" => "models/mutagenicity-merged.id" do |t| validate_model t end
-file "validations/carcinogenicity.id" => "models/carcinogenicity.id" do |t| validate_model t end
-
-# models
-
-file "models/mutagenicity-merged.id" => "data/mutagenicity-merged.id" do |t| create_model t end
-file "models/carcinogenicity.id" => "data/carcinogenicity.id" do |t| create_model t end
-
-# test data
-
-file "data/PA.id" => "data/PA.sdf" do |t| import_sdf t end
-	
-# training data
-
-file "data/mutagenicity-merged.id" => ["data/hansen.id", "data/kazius.id", "data/efsa.id"] do |t|
-  input = t.prerequisites.collect{|id| Dataset.find(File.read(id).chomp)}
-  source_feature = Feature.where(:name => "Ames test categorisation").first # Kazius
-  target_feature = Feature.where(:name => "Mutagenicity").first
-  merged = Dataset.merge input, {source_feature => target_feature}, {1 => "mutagen", 0 => "nonmutagen"}
-  File.open(t.name,"w+") { |f| f.puts merged.id }
-end
-
-file "data/carcinogenicity.id" do |t|
-  File.open(t.name,"w+") do |f| 
-    d = Dataset.from_pubchem 1205
-    f.puts d.id
-  end
-end
-
-# kazius
-
-file "data/kazius.id" => "data/cas_4337.sdf" do |t| import_sdf t end
-
-file "data/cas_4337.sdf" => "data/cas_4337.zip" do |t|
-	`cd data && unzip cas_4337.zip`
-end
-
-file "data/cas_4337.zip" do
-	`cd data && wget "http://cheminformatics.org/datasets/bursi/cas_4337.zip"`
-end
-
-# efsa
-
-file "data/efsa.id" => "data/efsa.csv" do |t| import_csv t end
-
-file "data/efsa.csv" => "data/GENOTOX_data_and_dictionary.tsv" do |t| efsa2csv t end
-
-file "data/GENOTOX_data_and_dictionary.tsv" => "data/GENOTOX_data_and_dictionary.xls" do |t|
-  `xls2csv -s cp1252 -d utf-8 -x -c "	" #{t.name} > #{t.prerequisites[0]}`
-end
-
-file "data/GENOTOX_data_and_dictionary.xls" do |t|
-  `cd data && wget "https://data.europa.eu/euodp/data/storage/f/2017-07-19T142131/GENOTOX%20data%20and%20dictionary.xls" -o #{t.name}`
-end
-
-# hansen
-
-file "data/hansen.id" => "data/hansen.csv" do |t| import_csv t end
-
-file "data/hansen.csv" => "data/Mutagenicity_N6512.csv" do |t|
-  File.open(t.name,"w+") do |f| 
-    i = 0
-    CSV.foreach(t.prerequisites[0]) do |row|
-      if i == 0
-        f.puts "ID,SMILES,Mutagenicity"
-      else
-        c = OpenTox::Compound.from_smiles(row[5]).smiles
-        f.puts ["hansen_#{row[0]}",c, row[2]].join ","
-      end
-      i += 1
-    end
-  end
-end
-
-file "data/Mutagenicity_N6512.csv" do
-  `cd data && wget "http://doc.ml.tu-berlin.de/toxbenchmark/Mutagenicity_N6512.csv"`
-end
-
-# cleanup
-
-task :clean do `rm data/*id` end
-task :cleanall do `rm data/*` end
-
-# functions
-
-def import_csv t
-  File.open(t.name,"w+") do |f| 
-    d = Dataset.from_csv_file t.prerequisites[0]
-    f.puts d.id
-  end
-end
-
-def import_sdf t
-  File.open(t.name,"w+") do |f| 
-    d = Dataset.from_sdf_file t.prerequisites[0]
-    f.puts d.id
-  end
-end
-
-def efsa2csv t
-  File.open(t.name,"w+") do |f| 
-    i = 0
-    db = {}
-    CSV.foreach(t.prerequisites[0], :encoding => "UTF-8", :col_sep => "\t", :liberal_parsing => true) do |row|
-      if i > 0 and row[11] and !row[11].empty? and row[24].match(/Salmonella/i) and ( row[25].match("TA 98") or row[25].match("TA 100") ) and row[33]
-        begin
-          c = OpenTox::Compound.from_smiles(row[11].gsub('"','')).smiles
-        rescue
-          c = OpenTox::Compound.from_inchi(row[12]).smiles # some smiles (row[11]) contain non-parseable characters
-        end
-        db[c] ||= {}
-        db[c][:id] ||= "efsa_#{row[2]}"
-        if row[33].match(/Positiv/i)
-          db[c][:value] = 1 # at least one positive result in TA 98 or TA 100
-        elsif row[33].match(/Negativ/i)
-          db[c][:value] ||= 0
-        end
-      end
-      i += 1
-    end
-
-    f.puts "ID,SMILES,Mutagenicity"
-    db.each do |s,v|
-      f.puts [v[:id],s,v[:value]].join ","
-    end
-  end
-end
-
-def create_model t
-  File.open(t.name,"w+") do |f| 
-    model = Model::LazarClassification.create(training_dataset: Dataset.find(File.read(t.prerequisites.first).chomp))
-    f.puts model.id
-  end
-end
-
-def validate_model t
-  File.open(t.name,"w+") do |f| 
-    rcv = Validation::RepeatedCrossValidation.create(Model.find(File.read(t.prerequisites.first).chomp))
-    f.puts rcv.id
-  end
-end
-
-def predict t
-  File.open(t.name,"w+") do |f| 
-    model = Model::LazarClassification.find t.prerequisites[0]
-    dataset = Dataset.find t.prerequisites[1]
-    prediction = model.predict dataset
-    f.puts prediction.id
-  end
-end