summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorhelma@in-silico.ch <helma@in-silico.ch>2018-10-30 17:26:59 +0100
committerhelma@in-silico.ch <helma@in-silico.ch>2018-10-30 17:26:59 +0100
commitd61f78093f4ddf03c27a2c8ae0bab9c1f10c80f5 (patch)
tree5e04969abc61ba1cc0f889ea3a4b2496878741c1 /lib
parentd7d57cb83dbc78b2aade173bde5fa893ebf634fe (diff)
tests fixed
Diffstat (limited to 'lib')
-rw-r--r--lib/compound.rb10
-rw-r--r--lib/dataset.rb42
-rw-r--r--lib/import.rb~ (renamed from lib/import.rb)0
-rw-r--r--lib/lazar.rb2
-rw-r--r--lib/model.rb11
-rw-r--r--lib/opentox.rb7
-rw-r--r--lib/train-test-validation.rb2
7 files changed, 40 insertions, 34 deletions
diff --git a/lib/compound.rb b/lib/compound.rb
index 9c07626..8b4bb48 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -10,7 +10,6 @@ module OpenTox
field :inchikey, type: String
field :names, type: Array
field :cid, type: String
- #field :chemblid, type: String
field :png_id, type: BSON::ObjectId
field :svg_id, type: BSON::ObjectId
field :sdf_id, type: BSON::ObjectId
@@ -232,15 +231,6 @@ module OpenTox
self["cid"]
end
-=begin
- # Get ChEMBL database compound id, obtained via REST call to ChEMBL
- # @return [String]
- def chemblid
- update(:chemblid => JSON.parse(RestClientWrapper.get(File.join CHEMBL_URI,URI.escape(smiles)+".json"))["molecule_chembl_id"])
- self["chemblid"]
- end
-=end
-
def db_neighbors min_sim: 0.2, dataset_id:
#p fingerprints[DEFAULT_FINGERPRINT]
# from http://blog.matt-swain.com/post/87093745652/chemical-similarity-search-in-mongodb
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 78f5633..4543e42 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -41,12 +41,14 @@ module OpenTox
end
# Get all values for a given substance and feature
- # @param [OpenTox::Substance,BSON::ObjectId] substance or substance id
- # @param [OpenTox::Feature,BSON::ObjectId] feature or feature id
+ # @param [OpenTox::Substance,BSON::ObjectId,String] substance or substance id
+ # @param [OpenTox::Feature,BSON::ObjectId,String] feature or feature id
# @return [Array<TrueClass,FalseClass,Float>] values
def values substance,feature
substance = substance.id if substance.is_a? Substance
feature = feature.id if feature.is_a? Feature
+ substance = BSON::ObjectId.from_string(substance) if substance.is_a? String
+ feature = BSON::ObjectId.from_string(feature) if feature.is_a? String
data_entries.select{|row| row[0] == substance and row[1] == feature}.collect{|row| row[2]}
end
@@ -86,6 +88,8 @@ module OpenTox
features.select{|f| f._type.match("SubstanceProperty")}
end
+ # Get nominal and numeric prediction features
+ # @return [Array<OpenTox::NominalLazarPrediction,OpenTox::NumericLazarPrediction>]
def prediction_features
features.select{|f| f._type.match("Prediction")}
end
@@ -377,19 +381,6 @@ module OpenTox
# Dataset operations
- # Merge an array of datasets
- # @param [Array<OpenTox::Dataset>] datasets to be merged
- # @return [OpenTox::Dataset] merged dataset
- def self.merge datasets
- dataset = self.create(:source => datasets.collect{|d| d.id.to_s}.join(", "), :name => datasets.collect{|d| d.name}.uniq.join(", "))
- datasets.each do |d|
- dataset.data_entries += d.data_entries
- dataset.warnings += d.warnings
- end
- dataset.save
- dataset
- end
-
# Copy a dataset
# @return OpenTox::Dataset dataset copy
def copy
@@ -434,6 +425,27 @@ module OpenTox
end
chunks
end
+=begin
+ # Merge an array of datasets
+ # @param [Array<OpenTox::Dataset>] datasets to be merged
+ # @return [OpenTox::Dataset] merged dataset
+ def self.merge datasets: datasets, features: features, value_maps: value_maps, keep_original_features: keep_original_features, remove_duplicates: remove_duplicates
+ dataset = self.create(:source => datasets.collect{|d| d.id.to_s}.join(", "), :name => datasets.collect{|d| d.name}.uniq.join(", ")+" merged")
+ datasets.each_with_index do |d,i|
+ dataset.data_entries += d.data_entries
+ dataset.warnings += d.warnings
+ end
+ feature_classes = features.collect{|f| f.class}.uniq
+ if feature_classes.size == 1
+ if features.first.nominal?
+ merged_feature = MergedNominalBioActivity.find_or_create_by(:name => features.collect{|f| f.name} + " (merged)", :original_feature_id => feature.id, :transformation => map, :accept_values => map.values.sort)
+ compounds.each do |c|
+ values(c,feature).each { |v| dataset.add c, new_feature, map[v] }
+ end
+ dataset.save
+ dataset
+ end
+=end
# Change nominal feature values
# @param [NominalFeature] Original feature
diff --git a/lib/import.rb b/lib/import.rb~
index 0857717..0857717 100644
--- a/lib/import.rb
+++ b/lib/import.rb~
diff --git a/lib/lazar.rb b/lib/lazar.rb
index 7e813e4..69a6f15 100644
--- a/lib/lazar.rb
+++ b/lib/lazar.rb
@@ -97,5 +97,5 @@ CLASSES = ["Feature","Substance","Dataset","CrossValidation","LeaveOneOutValidat
"train-test-validation.rb",
"leave-one-out-validation.rb",
"crossvalidation.rb",
- "import.rb",
+ #"import.rb",
].each{ |f| require_relative f }
diff --git a/lib/model.rb b/lib/model.rb
index 7eaa469..6d5cf7b 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -46,6 +46,7 @@ module OpenTox
model.prediction_feature_id = prediction_feature.id
model.training_dataset_id = training_dataset.id
model.name = "#{prediction_feature.name} (#{training_dataset.name})"
+
# git or gem versioning
dir = File.dirname(__FILE__)
path = File.expand_path("../", File.expand_path(dir))
@@ -485,6 +486,8 @@ module OpenTox
model.is_a? LazarClassification
end
+ # TODO from_pubchem_aid
+
# Create and validate a lazar model from a csv file with training data and a json file with metadata
# @param [File] CSV file with two columns. The first line should contain either SMILES or InChI (first column) and the endpoint (second column). The first column should contain either the SMILES or InChI of the training compounds, the second column the training compounds toxic activities (qualitative or quantitative). Use -log10 transformed values for regression datasets. Add metadata to a JSON file with the same basename containing the fields "species", "endpoint", "source" and "unit" (regression only). You can find example training data at https://github.com/opentox/lazar-public-data.
# @return [OpenTox::Model::Validation] lazar model with three independent 10-fold crossvalidations
@@ -533,6 +536,14 @@ module OpenTox
end
+ # TODO
+ def to_json
+ "{\n metadata:#{super},\n model:#{model.to_json}, repeated_crossvalidations:#{repeated_crossvalidations.to_json}\n}"
+ end
+
+ def from_json_file
+ end
+
end
end
diff --git a/lib/opentox.rb b/lib/opentox.rb
index 9cc8260..fb2a579 100644
--- a/lib/opentox.rb
+++ b/lib/opentox.rb
@@ -11,13 +11,6 @@ module OpenTox
include Mongoid::Timestamps
store_in collection: klass.downcase.pluralize
field :name, type: String
- #field :source, type: String
- #field :warnings, type: Array, default: []
-
-# def warn warning
- #$logger.warn warning
-# warnings << warning
-# end
end
OpenTox.const_set klass,c
end
diff --git a/lib/train-test-validation.rb b/lib/train-test-validation.rb
index 8231728..bffee8c 100644
--- a/lib/train-test-validation.rb
+++ b/lib/train-test-validation.rb
@@ -28,7 +28,7 @@ module OpenTox
end
predictions.select!{|cid,p| p[:value] and p[:measurements]}
# hack to avoid mongos file size limit error on large datasets
- predictions.each{|cid,p| p[:neighbors] = []} #if model.training_dataset.name.match(/mutagenicity/i)
+ predictions.each{|cid,p| p.delete(:neighbors)} #if model.training_dataset.name.match(/mutagenicity/i)
validation = self.new(
:model_id => validation_model.id,
:test_dataset_id => test_set.id,