Fixed confidence for cosine similarity
[opentox-ruby] / lib / model.rb
1 module OpenTox
2
3   module Model
4
5     include OpenTox
6
7     # Run a model with parameters
8     # @param [Hash] params Parameters for OpenTox model
9     # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
10     # @return [text/uri-list] Task or resource URI
11     def run( params, accept_header=nil, waiting_task=nil )
12       unless accept_header
13         if CONFIG[:json_hosts].include?(URI.parse(@uri).host)
14           accept_header = 'application/json' 
15         else
16           accept_header = 'application/rdf+xml'
17         end
18       end
19       LOGGER.info "running model "+@uri.to_s+", params: "+params.inspect+", accept: "+accept_header.to_s
20       RestClientWrapper.post(@uri,params,{:accept => accept_header},waiting_task).to_s
21     end
22
23     # Generic OpenTox model class for all API compliant services
24     class Generic
25       include Model
26
27       # Find Generic Opentox Model via URI, and loads metadata, could raise NotFound/NotAuthorized error 
28       # @param [String] uri Model URI
29       # @return [OpenTox::Model::Generic] Model instance
30       def self.find(uri,subjectid=nil)
31         return nil unless uri
32         model = Generic.new(uri)
33         model.load_metadata(subjectid)
34         raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0
35         model
36       end
37
38       # provides feature type, possible types are "regression" or "classification"
39       # @return [String] feature type, "unknown" if type could not be estimated
40       def feature_type(subjectid=nil)
41         unless @feature_type
42           load_predicted_variables( subjectid ) unless @predicted_variable
43           @feature_type = OpenTox::Feature.find( @predicted_variable, subjectid ).feature_type
44         end
45         @feature_type
46       end
47     
48       def predicted_variable( subjectid )
49         load_predicted_variables( subjectid ) unless @predicted_variable
50         @predicted_variable
51       end
52
53       def predicted_variables( subjectid )
54         load_predicted_variables( subjectid, false ) unless @predicted_variables
55         @predicted_variables
56       end
57
58       def predicted_confidence( subjectid )
59         load_predicted_variables( subjectid ) unless @predicted_confidence
60         @predicted_confidence
61       end
62   
63       private
64       def load_predicted_variables( subjectid=nil, use_confidence=true )
65         load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
66         if @metadata[OT.predictedVariables]
67           predictedVariables = @metadata[OT.predictedVariables]
68           if predictedVariables.is_a?(Array)
69             if (predictedVariables.size==1)
70               @predicted_variable = predictedVariables[0]
71             elsif (predictedVariables.size>=2)
72               # PENDING identify confidence
73               if use_confidence
74                 conf_index = -1
75                 predictedVariables.size.times do |i|
76                   f = OpenTox::Feature.find(predictedVariables[i], subjectid)
77                   conf_index = i if f.metadata[DC.title]=~/(?i)confidence/
78                 end
79                 raise "could not estimate predicted variable from model: '"+uri.to_s+
80                   "', number of predicted-variables==2, but no confidence found" if conf_index==-1
81               end
82               if (predictedVariables.size==2) && use_confidence
83                 @predicted_variable = predictedVariables[1-conf_index]
84                 @predicted_confidence = predictedVariables[conf_index]
85               else
86                 @predicted_variables = predictedVariables
87               end
88             else
89               raise "could not estimate predicted variable from model: '"+uri.to_s+"', number of predicted-variables == 0"  
90             end
91           else
92             raise "could not estimate predicted variable from model: '"+uri.to_s+"', predicted-variables is no array"
93           end        
94         end
95         raise "could not estimate predicted variable from model: '"+uri.to_s+"'" unless (@predicted_variable || @predicted_variables)
96       end
97     end
98
99     # Lazy Structure Activity Relationship class
100     class Lazar
101
102       include Algorithm
103       include Model
104
105
106       attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :subjectid, :value_map, :compound_fingerprints, :feature_calculation_algorithm, :neighbors, :compounds
107       def initialize(uri=nil)
108
109         if uri
110           super uri
111         else
112           super CONFIG[:services]["opentox-model"]
113         end
114
115         @metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar")
116
117         @features = []
118         @effects = {}
119         @activities = {}
120         @p_values = {}
121         @fingerprints = {}
122         @value_map = {}
123
124         @feature_calculation_algorithm = "Substructure.match"
125         @similarity_algorithm = "Similarity.tanimoto"
126         @prediction_algorithm = "Neighbors.weighted_majority_vote"
127         
128       end
129
130       # Get URIs of all lazar models
131       # @return [Array] List of lazar model URIs
132       def self.all(subjectid=nil)
133         RestClientWrapper.get(CONFIG[:services]["opentox-model"], :subjectid => subjectid).to_s.split("\n")
134       end
135
136       # Find a lazar model
137       # @param [String] uri Model URI
138       # @return [OpenTox::Model::Lazar] lazar model
139       def self.find(uri, subjectid=nil)
140         OpenTox::Model::Lazar.from_json RestClientWrapper.get(uri,{:accept => 'application/json', :subjectid => subjectid})
141       end
142
143       # Create a new lazar model
144       # @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar)
145       # @return [OpenTox::Model::Lazar] lazar model
146       def self.create(params, waiting_task=nil )
147         subjectid = params[:subjectid]
148         lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar")
149         model_uri = lazar_algorithm.run(params, waiting_task)
150         OpenTox::Model::Lazar.find(model_uri, subjectid)      
151       end
152
153       def self.from_json(json)
154         hash = Yajl::Parser.parse(json)
155         #LOGGER.debug hash.to_yaml
156         lazar = OpenTox::Model::Lazar.new
157         #hash.each { |k,v| eval("lazar.#{k} = #{v}") }
158         lazar.uri = hash["uri"] if hash["uri"]
159         lazar.metadata = hash["metadata"] if hash["metadata"]
160         lazar.compound = hash["compound"] if hash["compound"]
161         lazar.prediction_dataset = hash["prediction_dataset"] if hash["prediction_dataset"]
162         lazar.features = hash["features"] if hash["features"]
163         lazar.effects = hash["effects"] if hash["effects"]
164         lazar.activities = hash["activities"] if hash["activities"]
165         lazar.p_values = hash["p_values"] if hash["p_values"]
166         lazar.fingerprints = hash["fingerprints"] if hash["fingerprints"]
167         lazar.feature_calculation_algorithm = hash["feature_calculation_algorithm"] if hash["feature_calculation_algorithm"]
168         lazar.similarity_algorithm = hash["similarity_algorithm"] if hash["similarity_algorithm"]
169         lazar.prediction_algorithm = hash["prediction_algorithm"] if hash["prediction_algorithm"]
170         lazar.subjectid = hash["subjectid"] if hash["subjectid"]
171         lazar.value_map = hash["value_map"] if hash["value_map"]
172         lazar.compounds = hash["compounds"] if hash["compounds"]
173
174         lazar
175       end
176
177       def to_json
178         Yajl::Encoder.encode({:uri => @uri,:metadata => @metadata, :compound => @compound, :prediction_dataset => @prediction_dataset, :features => @features, :effects => @effects, :activities => @activities, :p_values => @p_values, :fingerprints => @fingerprints, :feature_calculation_algorithm => @feature_calculation_algorithm, :similarity_algorithm => @similarity_algorithm, :prediction_algorithm => @prediction_algorithm, :subjectid => @subjectid, :value_map => @value_map, :compounds => @compounds})
179       end
180
181       def run( params, accept_header=nil, waiting_task=nil )
182       unless accept_header
183         if CONFIG[:json_hosts].include?(URI.parse(@uri).host)
184           accept_header = 'application/json' 
185         else
186           accept_header = 'application/rdf+xml'
187         end
188       end
189       LOGGER.info "running model "+@uri.to_s+", params: "+params.inspect+", accept: "+accept_header.to_s
190       RestClientWrapper.post(@uri,params,{:accept => accept_header},waiting_task).to_s
191       end
192
193       # Get a parameter value
194       # @param [String] param Parameter name
195       # @return [String] Parameter value
196       def parameter(param)
197         @metadata[OT.parameters].collect{|p| p[OT.paramValue] if p[DC.title] == param}.compact.first
198       end
199
200       # Predict a dataset
201       # @param [String] dataset_uri Dataset URI
202       # @param [optional,subjectid] 
203       # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
204       # @return [OpenTox::Dataset] Dataset with predictions
205       def predict_dataset(dataset_uri, subjectid=nil, waiting_task=nil)
206       
207         @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
208         @prediction_dataset.add_metadata({
209           OT.hasSource => @uri,
210           DC.creator => @uri,
211           DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
212           OT.parameters => [{DC.title => "dataset_uri", OT.paramValue => dataset_uri}]
213         })
214         d = Dataset.new(dataset_uri,subjectid)
215         d.load_compounds(subjectid)
216         count = 0
217         d.compounds.each do |compound_uri|
218           begin
219             predict(compound_uri,false,subjectid)
220             count += 1
221             waiting_task.progress( count/d.compounds.size.to_f*100.0 ) if waiting_task
222           rescue => e
223             LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+e.message+" subjectid: #{subjectid}"
224             #LOGGER.debug "#{e.class}: #{e.message}"
225             #LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
226
227           end
228         end
229         #@prediction_dataset.save(subjectid)
230         @prediction_dataset
231       end
232
233       # Predict a compound
234       # @param [String] compound_uri Compound URI
235       # @param [optinal,Boolean] verbose Verbose prediction (output includes neighbors and features)
236       # @return [OpenTox::Dataset] Dataset with prediction
237       def predict(compound_uri,verbose=false,subjectid=nil)
238
239         @compound = Compound.new compound_uri
240         features = {}
241
242         #LOGGER.debug self.to_yaml
243         unless @prediction_dataset
244           @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
245           @prediction_dataset.add_metadata( {
246             OT.hasSource => @uri,
247             DC.creator => @uri,
248             DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
249             OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
250           } )
251         end
252
253         unless database_activity(subjectid) # adds database activity to @prediction_dataset
254
255           # Calculation of needed values for query compound
256           @compound_features = eval("#{@feature_calculation_algorithm}({
257                                     :compound => @compound, 
258                                     :features => @features, 
259                                     :feature_dataset_uri => @metadata[OT.featureDataset],
260                                     :pc_type => self.parameter(\"pc_type\"),
261                                     :lib => self.parameter(\"lib\"),
262                                     :subjectid => subjectid
263                                     })")
264           
265           # Adding fingerprint of query compound with features and values(p_value*nr_hits)
266           @compound_fingerprints = {}
267           @compound_features.each do |feature, value| # value is nil if "Substructure.match"
268             if @feature_calculation_algorithm == "Substructure.match_hits" 
269               @compound_fingerprints[feature] = @p_values[feature] * value
270             elsif @feature_calculation_algorithm == "Substructure.match"
271               @compound_fingerprints[feature] = @p_values[feature]
272             elsif @feature_calculation_algorithm == "Substructure.lookup"
273               @compound_fingerprints[feature] = value
274             end
275           end
276
277           # Transform model data to machine learning scheme (tables of data)
278           mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(self)
279           mtf.transform
280
281           # Make a prediction
282           prediction = eval("#{@prediction_algorithm}( { :props => mtf.props,
283                                                           :acts => mtf.acts,
284                                                           :sims => mtf.sims,
285                                                           :value_map => @value_map,
286                                                           :min_train_performance => self.parameter(\"min_train_performance\")
287                                                         } ) ")
288
289           value_feature_uri = File.join( @uri, "predicted", "value")
290           confidence_feature_uri = File.join( @uri, "predicted", "confidence")
291
292           @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] unless @prediction_dataset.metadata[OT.dependentVariables] 
293           @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] unless @prediction_dataset.metadata[OT.predictedVariables] 
294
295           if OpenTox::Feature.find(metadata[OT.dependentVariables], subjectid).feature_type == "classification"
296             @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction].to_s]
297           else
298             @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction]
299           end
300           confidence=prediction[:confidence]
301           if @similarity_algorithm.to_s =~ /cosine/
302             confidence=((confidence+1.0)/2.0).abs
303           end
304           @prediction_dataset.add @compound.uri, confidence_feature_uri, confidence
305
306           @prediction_dataset.features[value_feature_uri][DC.title] = @prediction_dataset.metadata[DC.title]
307           @prediction_dataset.features[confidence_feature_uri][DC.title] = "Confidence"
308
309           if verbose
310             if @feature_calculation_algorithm == "Substructure.match"
311               f = 0
312               @compound_features.each do |feature|
313                 feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s)
314                 features[feature] = feature_uri
315                 @prediction_dataset.add_feature(feature_uri, {
316                   RDF.type => [OT.Substructure],
317                   OT.smarts => feature,
318                   OT.pValue => @p_values[feature],
319                   OT.effect => @effects[feature]
320                 })
321                 @prediction_dataset.add @compound.uri, feature_uri, true
322                 f+=1
323               end
324             elsif @feature_calculation_algorithm == "Substructure.lookup"
325               f = 0
326               @compound_features.each do |feature, value|
327                 features[feature] = feature
328                 @prediction_dataset.add_feature(feature, {
329                   RDF.type => [OT.NumericFeature]
330                 })
331                 @prediction_dataset.add @compound.uri, feature, value
332                 f+=1
333               end
334             else
335               @compound_features.each do |feature|
336                 features[feature] = feature
337                 @prediction_dataset.add @compound.uri, feature, true
338               end
339             end
340             n = 0
341             @neighbors.each do |neighbor|
342               neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s )
343               @prediction_dataset.add_feature(neighbor_uri, {
344                 OT.compound => neighbor[:compound],
345                 OT.similarity => neighbor[:similarity],
346                 OT.measuredActivity => neighbor[:activity],
347                 RDF.type => [OT.Neighbor]
348               })
349               @prediction_dataset.add @compound.uri, neighbor_uri, true
350               f = 0 unless f
351               neighbor[:features].each do |feature|
352                 if @feature_calculation_algorithm == "Substructure.match"
353                   feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature]
354                 else
355                   feature_uri = feature
356                 end
357                 if @feature_calculation_algorithm == "Substructure.lookup"
358                   @prediction_dataset.add neighbor[:compound], feature_uri, @fingerprints[neighbor[:compound]][feature_uri]
359                 else
360                   @prediction_dataset.add neighbor[:compound], feature_uri, true
361                 end
362
363                 unless features.has_key? feature
364                   features[feature] = feature_uri
365                   if @feature_calculation_algorithm == "Substructure.lookup"
366                     @prediction_dataset.add_feature(feature_uri, {
367                       RDF.type => [OT.NumericFeature]
368                     })
369                   else
370                     @prediction_dataset.add_feature(feature_uri, {
371                       RDF.type => [OT.Substructure],
372                       OT.smarts => feature,
373                       OT.pValue => @p_values[feature],
374                       OT.effect => @effects[feature]
375                     })
376                   end
377                   f+=1
378                 end
379               end
380               n+=1
381             end
382           end
383         end
384
385         @prediction_dataset.save(subjectid)
386         @prediction_dataset
387       end
388
389       # Find database activities and store them in @prediction_dataset
390       # @return [Boolean] true if compound has databasse activities, false if not
391       def database_activity(subjectid)
392         if @activities[@compound.uri]
393           if OpenTox::Feature.find(metadata[OT.dependentVariables], subjectid).feature_type == "classification"
394             @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], @value_map[act.to_s] }
395           else
396             @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act }
397           end
398           @prediction_dataset.add_metadata(OT.hasSource => @metadata[OT.trainingDataset])
399           @prediction_dataset.save(subjectid)
400           true
401         else
402           false
403         end
404       end
405
406       def prediction_features
407         [prediction_value_feature,prediction_confidence_feature]
408       end
409
410       def prediction_value_feature
411         dependent_uri = @metadata[OT.dependentVariables].first
412         feature = OpenTox::Feature.new File.join( @uri, "predicted", "value")
413         feature.add_metadata( {
414           RDF.type => OT.ModelPrediction,
415           OT.hasSource => @uri,
416           DC.creator => @uri,
417           DC.title => URI.decode(File.basename( dependent_uri )),
418           OWL.sameAs => dependent_uri
419         })
420         feature
421       end
422
423       def prediction_confidence_feature
424         dependent_uri = @metadata[OT.dependentVariables].first
425         feature = OpenTox::Feature.new File.join( @uri, "predicted", "confidence")
426         feature.add_metadata( {
427           RDF.type => OT.ModelPrediction,
428           OT.hasSource => @uri,
429           DC.creator => @uri,
430           DC.title => "#{URI.decode(File.basename( dependent_uri ))} confidence"
431         })
432         feature
433       end
434
435       # Save model at model service
436       def save(subjectid)
437         self.uri = RestClientWrapper.post(@uri,self.to_json,{:content_type =>  "application/json", :subjectid => subjectid})
438       end
439
440       # Delete model at model service
441       def delete(subjectid)
442         RestClientWrapper.delete(@uri, :subjectid => subjectid) unless @uri == CONFIG[:services]["opentox-model"]
443       end
444
445     end
446   end
447 end