summaryrefslogtreecommitdiff
path: root/lazar.rb
diff options
context:
space:
mode:
authorMartin Gütlein <martin.guetlein@gmail.com>2010-03-23 17:17:38 +0100
committerMartin Gütlein <martin.guetlein@gmail.com>2010-03-23 17:17:38 +0100
commit8bf1f2deb2fc91100bf6f20d19b4619b19a1e094 (patch)
tree80e859e3c2ea5f43f8a38f4a28ad080ba39e7549 /lazar.rb
parentc2241eaa829a0a1d4ec0c735c475d8a4512c2a39 (diff)
parent3c847e5555ac0af41e62f11e6c92d915d8618794 (diff)
resolved conflict
Diffstat (limited to 'lazar.rb')
-rw-r--r--lazar.rb198
1 files changed, 132 insertions, 66 deletions
diff --git a/lazar.rb b/lazar.rb
index dc16a7e..b3992da 100644
--- a/lazar.rb
+++ b/lazar.rb
@@ -1,27 +1,25 @@
class Lazar < Model
- attr_accessor :dataset, :predictions
-
- def classify(compound_uri)
-
- unless @dataset
- @dataset = OpenTox::Dataset.new
- @predictions = {}
- end
- lazar = YAML.load yaml
+ attr_accessor :prediction_dataset
+
+ def classify(compound_uri,prediction)
+
+ prediction.title += " lazar classification"
+
+ lazar = YAML.load self.yaml
compound = OpenTox::Compound.new(:uri => compound_uri)
- compound_matches = compound.match lazar[:features]
+ compound_matches = compound.match lazar.features
conf = 0.0
- neighbors = []
+ similarities = {}
classification = nil
- lazar[:fingerprints].each do |uri,matches|
+ lazar.fingerprints.each do |uri,matches|
- sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar[:p_values])
+ sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,lazar.p_values)
if sim > 0.3
- neighbors << uri
- lazar[:activities][uri].each do |act|
+ similarities[uri] = sim
+ lazar.activities[uri].each do |act|
case act.to_s
when 'true'
conf += OpenTox::Utils.gauss(sim)
@@ -32,71 +30,125 @@ class Lazar < Model
end
end
- conf = conf/neighbors.size
+ conf = conf/similarities.size
if conf > 0.0
classification = true
elsif conf < 0.0
classification = false
end
-
- compound = @dataset.find_or_create_compound(compound_uri)
- feature = @dataset.find_or_create_feature(lazar[:endpoint]+OpenTox::Model::Lazar::PREDICTION_FEATURE_MODIFIER)
-
if (classification != nil)
- tuple = @dataset.create_tuple(feature,{ 'lazar#classification' => classification, 'lazar#confidence' => conf})
- @dataset.add_tuple compound,tuple
- @predictions[compound_uri] = { lazar[:endpoint] => { :lazar_prediction => {
+ feature_uri = lazar.dependent_variables + "_lazar_classification"
+ prediction.compounds << compound_uri
+ prediction.features << feature_uri
+ prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
+ tuple = {
:classification => classification,
:confidence => conf,
- :neighbors => neighbors,
+ :similarities => similarities,
:features => compound_matches
- } } }
+ }
+ prediction.data[compound_uri] << {feature_uri => tuple}
end
end
- def database_activity?(compound_uri)
+ def database_activity?(compound_uri,prediction)
# find database activities
lazar = YAML.load self.yaml
- db_activities = lazar[:activities][compound_uri]
+ db_activities = lazar.activities[compound_uri]
if db_activities
- @dataset = OpenTox::Dataset.new
- @predictions = {}
- c = @dataset.find_or_create_compound(compound_uri)
- f = @dataset.find_or_create_feature(lazar[:endpoint])
- v = db_activities.join(',')
- @dataset.add c,f,v
- @predictions[compound_uri] = { lazar[:endpoint] => {:measured_activities => db_activities}}
+ prediction.source = lazar.activity_dataset_uri
+ feature_uri = lazar.dependent_variables
+ prediction.compounds << compound_uri
+ prediction.features << feature_uri
+ prediction.data[compound_uri] = [] unless prediction.data[compound_uri]
+ db_activities.each do |act|
+ prediction.data[compound_uri] << {feature_uri => act}
+ end
true
else
false
end
end
+ def to_owl
+ data = YAML.load(yaml)
+ activity_dataset = YAML.load(RestClient.get(data.activity_dataset_uri, :accept => 'application/x-yaml').to_s)
+ feature_dataset = YAML.load(RestClient.get(data.feature_dataset_uri, :accept => 'application/x-yaml').to_s)
+ owl = OpenTox::Owl.create 'Model', uri
+ owl.source = "http://github.com/helma/opentox-model"
+ owl.title = "#{URI.decode(activity_dataset.title)} lazar classification"
+ owl.date = created_at.to_s
+ owl.algorithm = data.algorithm
+ owl.dependentVariables = activity_dataset.features.join(', ')
+ owl.independentVariables = feature_dataset.features.join(', ')
+ owl.predictedVariables = activity_dataset.features.join(', ') + "_lazar_classification"
+ owl.parameters = {
+ "Dataset URI" =>
+ { :scope => "mandatory", :value => data.activity_dataset_uri },
+ "Feature URI for dependent variable" =>
+ { :scope => "mandatory", :value => activity_dataset.features.join(', ')},
+ "Feature generation URI" =>
+ { :scope => "mandatory", :value => feature_dataset.source }
+ }
+ owl.trainingDataset = data.activity_dataset_uri
+ owl.rdf
+ end
+
+end
+
+get '/:id/?' do
+ accept = request.env['HTTP_ACCEPT']
+ accept = "application/rdf+xml" if accept == '*/*' or accept == '' or accept.nil?
+ # workaround for browser links
+ case params[:id]
+ when /.yaml$/
+ params[:id].sub!(/.yaml$/,'')
+ accept = 'application/x-yaml'
+ when /.rdf$/
+ params[:id].sub!(/.rdf$/,'')
+ accept = 'application/rdf+xml'
+ end
+ model = Lazar.get(params[:id])
+ halt 404, "Model #{params[:id]} not found." unless model
+ case accept
+ when "application/rdf+xml"
+ response['Content-Type'] = 'application/rdf+xml'
+ unless model.owl # lazy owl creation
+ model.owl = model.to_owl
+ model.save
+ end
+ model.owl
+ when /yaml/
+ response['Content-Type'] = 'application/x-yaml'
+ model.yaml
+ else
+ halt 400, "Unsupported MIME type '#{accept}'"
+ end
+end
+
+get '/:id/algorithm/?' do
+ response['Content-Type'] = 'text/plain'
+ YAML.load(Lazar.get(params[:id]).yaml).algorithm
+end
+
+get '/:id/training_dataset/?' do
+ response['Content-Type'] = 'text/plain'
+ YAML.load(Lazar.get(params[:id]).yaml).activity_dataset_uri
+end
+
+get '/:id/feature_dataset/?' do
+ response['Content-Type'] = 'text/plain'
+ YAML.load(Lazar.get(params[:id]).yaml).feature_dataset_uri
end
post '/?' do # create model
- #model = Lazar.new(:task_uri => params[:task_uri])
- #model.uri = url_for("/#{model.id}", :full)
+ halt 400, "MIME type \"#{request.content_type}\" not supported." unless request.content_type.match(/yaml/)
model = Lazar.new
model.save
model.uri = url_for("/#{model.id}", :full)
-# model.uri
-#end
-#
-#put '/:id/?' do # create model from yaml representation
-# model = Lazar.first(params[:id])
- case request.content_type
- when /yaml/
- input = request.env["rack.input"].read
- model.yaml = input
- lazar = OpenTox::Model::Lazar.from_yaml(input)
- lazar.uri = model.uri
- model.owl = lazar.rdf
- model.save
- else
- halt 400, "MIME type \"#{request.content_type}\" not supported."
- end
+ model.yaml = request.env["rack.input"].read
+ model.save
model.uri
end
@@ -106,24 +158,38 @@ post '/:id/?' do # create prediction
halt 404, "Model #{params[:id]} does not exist." unless lazar
halt 404, "No compound_uri or dataset_uri parameter." unless compound_uri = params[:compound_uri] or dataset_uri = params[:dataset_uri]
+ prediction = OpenTox::Dataset.new
+ prediction.source = lazar.uri
+ prediction.title = URI.decode YAML.load(lazar.yaml).dependent_variables.split(/#/).last
+
if compound_uri
- lazar.classify(compound_uri) unless lazar.database_activity?(compound_uri) # FEHLER
- elsif dataset_uri
- input_dataset = OpenTox::Dataset.find(dataset_uri)
- input_dataset.compounds.each do |compound_uri|
- lazar.classify(compound_uri) unless lazar.database_activity?(compound_uri)
+ lazar.classify(compound_uri,prediction) unless lazar.database_activity?(compound_uri,prediction)
+ LOGGER.debug prediction.to_yaml
+ case request.env['HTTP_ACCEPT']
+ when /yaml/
+ prediction.to_yaml
+ when 'application/rdf+xml'
+ prediction.to_owl
+ else
+ halt 404, "Content type #{request.env['HTTP_ACCEPT']} not available."
end
- end
- case request.env['HTTP_ACCEPT']
- when /yaml/
- lazar.predictions.to_yaml
- else
- if params[:compound_uri]
- lazar.dataset.rdf
- elsif params[:dataset_uri]
- lazar.dataset.save
+ elsif dataset_uri
+ task = OpenTox::Task.create
+ pid = Spork.spork(:logger => LOGGER) do
+ task.started
+ input_dataset = OpenTox::Dataset.find(dataset_uri)
+ input_dataset.compounds.each do |compound_uri|
+ lazar.classify(compound_uri,prediction) unless lazar.database_activity?(compound_uri,prediction)
+ end
+ uri = prediction.save.chomp
+ task.completed(uri)
end
+ task.pid = pid
+ LOGGER.debug "Prediction task PID: " + pid.to_s
+ #status 303
+ response['Content-Type'] = 'text/uri-list'
+ task.uri + "\n"
end
end