summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2014-10-07 16:38:28 +0200
committermguetlein <martin.guetlein@gmail.com>2014-10-07 16:38:28 +0200
commitcd2d1a5aabbc0cd1c3a58923845d2642589df67f (patch)
treeceb402d2966c253e9f600222f36e6350c3363ef1
parent465448493e6d400d9c67fdbc0e1632c2927c3fd7 (diff)
fix calculation of cdk features for test compounds, fix error with missing values in training data features in last column
-rw-r--r--lib/lazar.rb21
1 files changed, 19 insertions, 2 deletions
diff --git a/lib/lazar.rb b/lib/lazar.rb
index 6ce19d8..2416569 100644
--- a/lib/lazar.rb
+++ b/lib/lazar.rb
@@ -133,10 +133,27 @@ module OpenTox
end
@training_fingerprints = @feature_dataset.data_entries
+ # fill trailing missing values with nil
+ @training_fingerprints = @training_fingerprints.collect do |values|
+ values << nil while (values.size < @feature_dataset.features.size)
+ values
+ end
@training_compounds = @training_dataset.compounds
- query_fingerprints = OpenTox::Algorithm::Descriptor.send( @feature_calculation_algorithm, compounds, @feature_dataset.features.collect{ |f| f[RDF::DC.title] } )#.collect{|row| row.collect{|val| val ? val.to_f : 0.0 } }
-
+ feature_names = @feature_dataset.features.collect{ |f| f[RDF::DC.title] }
+ # one Cdk descriptor may produce several features, e.g., Cdk.WienerNumbers produces Cdk.WienerNumbers.WPATH and Cdk.WienerNumbers.WPOL
+ # -> strip suffix and use the feature only once
+ feature_names = feature_names.collect do |f|
+ if f=~/Cdk/ and f.count(".")==2
+ f[0..(f.rindex(".")-1)]
+ else
+ f
+ end
+ end
+ feature_names.uniq!
+
+ query_fingerprints = OpenTox::Algorithm::Descriptor.send( @feature_calculation_algorithm, compounds, feature_names )#.collect{|row| row.collect{|val| val ? val.to_f : 0.0 } }
+
compounds.each do |compound|
database_activities = @training_dataset.values(compound,@prediction_feature)