c9d367e17f79f6fa75c06b638bbbf8e63156d9bf
[opentox-ruby] / lib / model.rb
1 module OpenTox
2
3   module Model
4
5     include OpenTox
6
7     # Run a model with parameters
8     # @param [Hash] params Parameters for OpenTox model
9     # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
10     # @return [text/uri-list] Task or resource URI
11     def run( params, accept_header=nil, waiting_task=nil )
12       unless accept_header
13         if CONFIG[:json_hosts].include?(URI.parse(@uri).host)
14           accept_header = 'application/json' 
15         else
16           accept_header = 'application/rdf+xml'
17         end
18       end
19       LOGGER.info "running model "+@uri.to_s+", params: "+params.inspect+", accept: "+accept_header.to_s
20       RestClientWrapper.post(@uri,params,{:accept => accept_header},waiting_task).to_s
21     end
22
23     # Generic OpenTox model class for all API compliant services
24     class Generic
25       include Model
26
27       # Find Generic Opentox Model via URI, and loads metadata, could raise NotFound/NotAuthorized error 
28       # @param [String] uri Model URI
29       # @return [OpenTox::Model::Generic] Model instance
30       def self.find(uri,subjectid=nil)
31         return nil unless uri
32         model = Generic.new(uri)
33         model.load_metadata(subjectid)
34         raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0
35         model
36       end
37
38       # provides feature type, possible types are "regression" or "classification"
39       # @return [String] feature type, "unknown" if type could not be estimated
40       def feature_type(subjectid=nil)
41         unless @feature_type
42           load_predicted_variables( subjectid ) unless @predicted_variable
43           @feature_type = OpenTox::Feature.find( @predicted_variable, subjectid ).feature_type
44         end
45         @feature_type
46       end
47     
48       def predicted_variable( subjectid )
49         load_predicted_variables( subjectid ) unless @predicted_variable
50         @predicted_variable
51       end
52
53       def predicted_variables( subjectid )
54         load_predicted_variables( subjectid, false ) unless @predicted_variables
55         @predicted_variables
56       end
57
58       def predicted_confidence( subjectid )
59         load_predicted_variables( subjectid ) unless @predicted_confidence
60         @predicted_confidence
61       end
62   
63       private
64       def load_predicted_variables( subjectid=nil, use_confidence=true )
65         load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
66         if @metadata[OT.predictedVariables]
67           predictedVariables = @metadata[OT.predictedVariables]
68           if predictedVariables.is_a?(Array)
69             if (predictedVariables.size==1)
70               @predicted_variable = predictedVariables[0]
71             elsif (predictedVariables.size>=2)
72               # PENDING identify confidence
73               if use_confidence
74                 conf_index = -1
75                 predictedVariables.size.times do |i|
76                   f = OpenTox::Feature.find(predictedVariables[i], subjectid)
77                   conf_index = i if f.metadata[DC.title]=~/(?i)confidence/
78                 end
79                 raise "could not estimate predicted variable from model: '"+uri.to_s+
80                   "', number of predicted-variables==2, but no confidence found" if conf_index==-1
81               end
82               if (predictedVariables.size==2) && use_confidence
83                 @predicted_variable = predictedVariables[1-conf_index]
84                 @predicted_confidence = predictedVariables[conf_index]
85               else
86                 @predicted_variables = predictedVariables
87               end
88             else
89               raise "could not estimate predicted variable from model: '"+uri.to_s+"', number of predicted-variables == 0"  
90             end
91           else
92             raise "could not estimate predicted variable from model: '"+uri.to_s+"', predicted-variables is no array"
93           end        
94         end
95         raise "could not estimate predicted variable from model: '"+uri.to_s+"'" unless (@predicted_variable || @predicted_variables)
96       end
97     end
98
99     # Lazy Structure Activity Relationship class
100     class Lazar
101
102       include Algorithm
103       include Model
104
105
106       attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :subjectid, :value_map, :compound_fingerprints, :feature_calculation_algorithm, :neighbors, :compounds
107       def initialize(uri=nil)
108
109         if uri
110           super uri
111         else
112           super CONFIG[:services]["opentox-model"]
113         end
114
115         @metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar")
116
117         @features = []
118         @effects = {}
119         @activities = {}
120         @p_values = {}
121         @fingerprints = {}
122         @value_map = {}
123
124         @feature_calculation_algorithm = "Substructure.match"
125         @similarity_algorithm = "Similarity.tanimoto"
126         @prediction_algorithm = "Neighbors.weighted_majority_vote"
127         
128       end
129
130       # Get URIs of all lazar models
131       # @return [Array] List of lazar model URIs
132       def self.all(subjectid=nil)
133         RestClientWrapper.get(CONFIG[:services]["opentox-model"], :subjectid => subjectid).to_s.split("\n")
134       end
135
136       # Find a lazar model
137       # @param [String] uri Model URI
138       # @return [OpenTox::Model::Lazar] lazar model
139       def self.find(uri, subjectid=nil)
140         OpenTox::Model::Lazar.from_json RestClientWrapper.get(uri,{:accept => 'application/json', :subjectid => subjectid})
141       end
142
143       # Create a new lazar model
144       # @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar)
145       # @return [OpenTox::Model::Lazar] lazar model
146       def self.create(params, waiting_task=nil )
147         subjectid = params[:subjectid]
148         lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar")
149         model_uri = lazar_algorithm.run(params, waiting_task)
150         OpenTox::Model::Lazar.find(model_uri, subjectid)      
151       end
152
153       def self.from_json(json)
154         hash = Yajl::Parser.parse(json)
155         #LOGGER.debug hash.to_yaml
156         lazar = OpenTox::Model::Lazar.new
157         #hash.each { |k,v| eval("lazar.#{k} = #{v}") }
158         lazar.uri = hash["uri"] if hash["uri"]
159         lazar.metadata = hash["metadata"] if hash["metadata"]
160         lazar.compound = hash["compound"] if hash["compound"]
161         lazar.prediction_dataset = hash["prediction_dataset"] if hash["prediction_dataset"]
162         lazar.features = hash["features"] if hash["features"]
163         lazar.effects = hash["effects"] if hash["effects"]
164         lazar.activities = hash["activities"] if hash["activities"]
165         lazar.p_values = hash["p_values"] if hash["p_values"]
166         lazar.fingerprints = hash["fingerprints"] if hash["fingerprints"]
167         lazar.feature_calculation_algorithm = hash["feature_calculation_algorithm"] if hash["feature_calculation_algorithm"]
168         lazar.similarity_algorithm = hash["similarity_algorithm"] if hash["similarity_algorithm"]
169         lazar.prediction_algorithm = hash["prediction_algorithm"] if hash["prediction_algorithm"]
170         lazar.subjectid = hash["subjectid"] if hash["subjectid"]
171         lazar.value_map = hash["value_map"] if hash["value_map"]
172         lazar.compounds = hash["compounds"] if hash["compounds"]
173
174         lazar
175       end
176
177       def to_json
178         Yajl::Encoder.encode({:uri => @uri,:metadata => @metadata, :compound => @compound, :prediction_dataset => @prediction_dataset, :features => @features, :effects => @effects, :activities => @activities, :p_values => @p_values, :fingerprints => @fingerprints, :feature_calculation_algorithm => @feature_calculation_algorithm, :similarity_algorithm => @similarity_algorithm, :prediction_algorithm => @prediction_algorithm, :subjectid => @subjectid, :value_map => @value_map, :compounds => @compounds})
179       end
180
181       def run( params, accept_header=nil, waiting_task=nil )
182       unless accept_header
183         if CONFIG[:json_hosts].include?(URI.parse(@uri).host)
184           accept_header = 'application/json' 
185         else
186           accept_header = 'application/rdf+xml'
187         end
188       end
189       LOGGER.info "running model "+@uri.to_s+", params: "+params.inspect+", accept: "+accept_header.to_s
190       RestClientWrapper.post(@uri,params,{:accept => accept_header},waiting_task).to_s
191       end
192
193       # Get a parameter value
194       # @param [String] param Parameter name
195       # @return [String] Parameter value
196       def parameter(param)
197         @metadata[OT.parameters].collect{|p| p[OT.paramValue] if p[DC.title] == param}.compact.first
198       end
199
200       # Predict a dataset
201       # @param [String] dataset_uri Dataset URI
202       # @param [optional,subjectid] 
203       # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
204       # @return [OpenTox::Dataset] Dataset with predictions
205       def predict_dataset(dataset_uri, subjectid=nil, waiting_task=nil)
206       
207         @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
208         @prediction_dataset.add_metadata({
209           OT.hasSource => @uri,
210           DC.creator => @uri,
211           DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
212           OT.parameters => [{DC.title => "dataset_uri", OT.paramValue => dataset_uri}]
213         })
214         d = Dataset.new(dataset_uri,subjectid)
215         d.load_compounds(subjectid)
216         count = 0
217         d.compounds.each do |compound_uri|
218           begin
219             predict(compound_uri,false,subjectid)
220             count += 1
221             waiting_task.progress( count/d.compounds.size.to_f*100.0 ) if waiting_task
222           rescue => e
223             LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+e.message+" subjectid: #{subjectid}"
224             #LOGGER.debug "#{e.class}: #{e.message}"
225             #LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
226
227           end
228         end
229         #@prediction_dataset.save(subjectid)
230         @prediction_dataset
231       end
232
233       # Predict a compound
234       # @param [String] compound_uri Compound URI
235       # @param [optinal,Boolean] verbose Verbose prediction (output includes neighbors and features)
236       # @return [OpenTox::Dataset] Dataset with prediction
237       def predict(compound_uri,verbose=false,subjectid=nil)
238
239         @compound = Compound.new compound_uri
240         features = {}
241
242         #LOGGER.debug self.to_yaml
243         unless @prediction_dataset
244           @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
245           @prediction_dataset.add_metadata( {
246             OT.hasSource => @uri,
247             DC.creator => @uri,
248             DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
249             OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
250           } )
251         end
252
253         unless database_activity(subjectid) # adds database activity to @prediction_dataset
254
255           # Calculation of needed values for query compound
256           @compound_features = eval("#{@feature_calculation_algorithm}({
257                                     :compound => @compound, 
258                                     :features => @features, 
259                                     :feature_dataset_uri => @metadata[OT.featureDataset],
260                                     :pc_type => self.parameter(\"pc_type\"),
261                                     :lib => self.parameter(\"lib\"),
262                                     :subjectid => subjectid
263                                     })")
264           
265           # Adding fingerprint of query compound with features and values(p_value*nr_hits)
266           @compound_fingerprints = {}
267           @compound_features.each do |feature, value| # value is nil if "Substructure.match"
268             if @feature_calculation_algorithm == "Substructure.match_hits" 
269               @compound_fingerprints[feature] = @p_values[feature] * value
270             elsif @feature_calculation_algorithm == "Substructure.match"
271               @compound_fingerprints[feature] = @p_values[feature]
272             elsif @feature_calculation_algorithm == "Substructure.lookup"
273               @compound_fingerprints[feature] = value
274             end
275           end
276
277           # Transform model data to machine learning scheme (tables of data)
278           mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(self)
279           mtf.transform
280
281           # Make a prediction
282           prediction = eval("#{@prediction_algorithm}( { :props => mtf.props,
283                                                           :acts => mtf.acts,
284                                                           :sims => mtf.sims,
285                                                           :value_map => @value_map,
286                                                           :min_train_performance => self.parameter(\"min_train_performance\")
287                                                         } ) ")
288
289           value_feature_uri = File.join( @uri, "predicted", "value")
290           confidence_feature_uri = File.join( @uri, "predicted", "confidence")
291
292           @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] unless @prediction_dataset.metadata[OT.dependentVariables] 
293           @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] unless @prediction_dataset.metadata[OT.predictedVariables] 
294
295           if OpenTox::Feature.find(metadata[OT.dependentVariables], subjectid).feature_type == "classification"
296             @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction].to_s]
297           else
298             @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction]
299           end
300           @prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence]
301           @prediction_dataset.features[value_feature_uri][DC.title] = @prediction_dataset.metadata[DC.title]
302           @prediction_dataset.features[confidence_feature_uri][DC.title] = "Confidence"
303
304           if verbose
305             if @feature_calculation_algorithm == "Substructure.match"
306               f = 0
307               @compound_features.each do |feature|
308                 feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s)
309                 features[feature] = feature_uri
310                 @prediction_dataset.add_feature(feature_uri, {
311                   RDF.type => [OT.Substructure],
312                   OT.smarts => feature,
313                   OT.pValue => @p_values[feature],
314                   OT.effect => @effects[feature]
315                 })
316                 @prediction_dataset.add @compound.uri, feature_uri, true
317                 f+=1
318               end
319             elsif @feature_calculation_algorithm == "Substructure.lookup"
320               f = 0
321               @compound_features.each do |feature, value|
322                 features[feature] = feature
323                 @prediction_dataset.add_feature(feature, {
324                   RDF.type => [OT.NumericFeature]
325                 })
326                 @prediction_dataset.add @compound.uri, feature, value
327                 f+=1
328               end
329             else
330               @compound_features.each do |feature|
331                 features[feature] = feature
332                 @prediction_dataset.add @compound.uri, feature, true
333               end
334             end
335             n = 0
336             @neighbors.each do |neighbor|
337               neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s )
338               @prediction_dataset.add_feature(neighbor_uri, {
339                 OT.compound => neighbor[:compound],
340                 OT.similarity => neighbor[:similarity],
341                 OT.measuredActivity => neighbor[:activity],
342                 RDF.type => [OT.Neighbor]
343               })
344               @prediction_dataset.add @compound.uri, neighbor_uri, true
345               f = 0 unless f
346               neighbor[:features].each do |feature|
347                 if @feature_calculation_algorithm == "Substructure.match"
348                   feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature]
349                 else
350                   feature_uri = feature
351                 end
352                 if @feature_calculation_algorithm == "Substructure.lookup"
353                   @prediction_dataset.add neighbor[:compound], feature_uri, @fingerprints[neighbor[:compound]][feature_uri]
354                 else
355                   @prediction_dataset.add neighbor[:compound], feature_uri, true
356                 end
357
358                 unless features.has_key? feature
359                   features[feature] = feature_uri
360                   if @feature_calculation_algorithm == "Substructure.lookup"
361                     @prediction_dataset.add_feature(feature_uri, {
362                       RDF.type => [OT.NumericFeature]
363                     })
364                   else
365                     @prediction_dataset.add_feature(feature_uri, {
366                       RDF.type => [OT.Substructure],
367                       OT.smarts => feature,
368                       OT.pValue => @p_values[feature],
369                       OT.effect => @effects[feature]
370                     })
371                   end
372                   f+=1
373                 end
374               end
375               n+=1
376             end
377           end
378         end
379
380         @prediction_dataset.save(subjectid)
381         @prediction_dataset
382       end
383
384       # Find database activities and store them in @prediction_dataset
385       # @return [Boolean] true if compound has databasse activities, false if not
386       def database_activity(subjectid)
387         if @activities[@compound.uri]
388           if OpenTox::Feature.find(metadata[OT.dependentVariables], subjectid).feature_type == "classification"
389             @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], @value_map[act.to_s] }
390           else
391             @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act }
392           end
393           @prediction_dataset.add_metadata(OT.hasSource => @metadata[OT.trainingDataset])
394           @prediction_dataset.save(subjectid)
395           true
396         else
397           false
398         end
399       end
400
401       def prediction_features
402         [prediction_value_feature,prediction_confidence_feature]
403       end
404
405       def prediction_value_feature
406         dependent_uri = @metadata[OT.dependentVariables].first
407         feature = OpenTox::Feature.new File.join( @uri, "predicted", "value")
408         feature.add_metadata( {
409           RDF.type => OT.ModelPrediction,
410           OT.hasSource => @uri,
411           DC.creator => @uri,
412           DC.title => URI.decode(File.basename( dependent_uri )),
413           OWL.sameAs => dependent_uri
414         })
415         feature
416       end
417
418       def prediction_confidence_feature
419         dependent_uri = @metadata[OT.dependentVariables].first
420         feature = OpenTox::Feature.new File.join( @uri, "predicted", "confidence")
421         feature.add_metadata( {
422           RDF.type => OT.ModelPrediction,
423           OT.hasSource => @uri,
424           DC.creator => @uri,
425           DC.title => "#{URI.decode(File.basename( dependent_uri ))} confidence"
426         })
427         feature
428       end
429
430       # Save model at model service
431       def save(subjectid)
432         self.uri = RestClientWrapper.post(@uri,self.to_json,{:content_type =>  "application/json", :subjectid => subjectid})
433       end
434
435       # Delete model at model service
436       def delete(subjectid)
437         RestClientWrapper.delete(@uri, :subjectid => subjectid) unless @uri == CONFIG[:services]["opentox-model"]
438       end
439
440     end
441   end
442 end