diff options
author | Andreas Maunz <andreas@maunz.de> | 2012-02-10 09:31:31 +0100 |
---|---|---|
committer | Andreas Maunz <andreas@maunz.de> | 2012-02-10 09:31:31 +0100 |
commit | c4d79f636827def2b6ac288275570ecfc7187bf1 (patch) | |
tree | d74f64ad68b9fe8a24789974a1255fe03cdf743f | |
parent | a56315499d714a783078d4a02c8982ccdb510cff (diff) | |
parent | 771514f7a6be11b87def56577ea09327ef328246 (diff) |
Merge branch 'pc_new_1' of github.com:opentox/opentox-ruby into pc_new_1pc_new_1
-rw-r--r-- | lib/algorithm.rb | 2 | ||||
-rw-r--r-- | lib/dataset.rb | 40 | ||||
-rw-r--r-- | lib/opentox-ruby.rb | 2 | ||||
-rw-r--r-- | lib/parser.rb | 2 | ||||
-rw-r--r-- | lib/r-util.rb | 354 | ||||
-rw-r--r-- | lib/stratification.R | 201 | ||||
-rw-r--r-- | lib/task.rb | 8 |
7 files changed, 602 insertions, 7 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 9e9e62d..db21c46 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -92,7 +92,7 @@ module OpenTox LOGGER.warn "No #{feature} activity for #{compound.to_s}." else if @prediction_feature.feature_type == "classification" - activity= value_map.invert[value].to_i # activities are mapped to 1..n + activity= value_map.invert[value.to_s].to_i # activities are mapped to 1..n @db_class_sizes[activity-1].nil? ? @db_class_sizes[activity-1]=1 : @db_class_sizes[activity-1]+=1 # AM effect elsif @prediction_feature.feature_type == "regression" activity= value.to_f diff --git a/lib/dataset.rb b/lib/dataset.rb index 8f76ee7..95c1918 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -373,7 +373,45 @@ module OpenTox dataset.save(subjectid) dataset end - + + # merges two dataset into a new dataset (by default uses all compounds and features) + # precondition: both datasets are fully loaded + # @param [OpenTox::Dataset] dataset1 to merge + # @param [OpenTox::Dataset] dataset2 to merge + # @param [Hash] metadata + # @param [optional,String] subjectid + # @param [optional,Array] features1, if specified only this features of dataset1 are used + # @param [optional,Array] features2, if specified only this features of dataset2 are used + # @param [optional,Array] compounds1, if specified only this compounds of dataset1 are used + # @param [optional,Array] compounds2, if specified only this compounds of dataset2 are used + # example: if you want no features from dataset2, give empty array as features2 + def self.merge( dataset1, dataset2, metadata, subjectid=nil, features1=nil, features2=nil, compounds1=nil, compounds2=nil ) + features1 = dataset1.features.keys unless features1 + features2 = dataset2.features.keys unless features2 + compounds1 = dataset1.compounds unless compounds1 + compounds2 = dataset2.compounds unless compounds2 + data_combined = OpenTox::Dataset.create(CONFIG[:services]["opentox-dataset"],subjectid) + LOGGER.debug("merging datasets #{dataset1.uri} and #{dataset2.uri} to #{data_combined.uri}") + [[dataset1, features1, compounds1], [dataset2, features2, compounds2]].each do |dataset,features,compounds| + compounds.each{|c| data_combined.add_compound(c)} + features.each do |f| + m = dataset.features[f] + m[OT.hasSource] = dataset.uri unless m[OT.hasSource] + data_combined.add_feature(f,m) + compounds.each do |c| + dataset.data_entries[c][f].each do |v| + data_combined.add(c,f,v) + end if dataset.data_entries[c] and dataset.data_entries[c][f] + end + end + end + metadata = {} unless metadata + metadata[OT.hasSource] = "Merge from #{dataset1.uri} and #{dataset2.uri}" unless metadata[OT.hasSource] + data_combined.add_metadata(metadata) + data_combined.save(subjectid) + data_combined + end + # Save dataset at the dataset service # - creates a new dataset if uri is not set # - overwrites dataset if uri exists diff --git a/lib/opentox-ruby.rb b/lib/opentox-ruby.rb index 1fa2a86..d25632c 100644 --- a/lib/opentox-ruby.rb +++ b/lib/opentox-ruby.rb @@ -9,6 +9,6 @@ rescue LoadError end ['opentox', 'compound','dataset', 'parser','serializer', 'algorithm','model','task','validation','feature', - 'rest_client_wrapper', 'authorization', 'policy', 'helper', 'to-html', 'ontology' ].each do |lib| + 'rest_client_wrapper', 'authorization', 'policy', 'helper', 'to-html', 'ontology', 'r-util' ].each do |lib| require lib end diff --git a/lib/parser.rb b/lib/parser.rb index ae8ada6..18c0ba7 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -57,7 +57,7 @@ module OpenTox `rapper -i rdfxml -o ntriples #{file.path} 2>/dev/null`.each_line do |line| triple = line.to_triple if triple[0] == @uri - if triple[1] == RDF.type || triple[1]==OT.predictedVariables # allow multiple types + if triple[1] == RDF.type || triple[1]==OT.predictedVariables || triple[1]==OT.independentVariables # allow multiple types @metadata[triple[1]] = [] unless @metadata[triple[1]] @metadata[triple[1]] << triple[2].split('^^').first else diff --git a/lib/r-util.rb b/lib/r-util.rb new file mode 100644 index 0000000..7163c46 --- /dev/null +++ b/lib/r-util.rb @@ -0,0 +1,354 @@ +# pending: package dir hack --------- +# CONFIG[:base_dir] = "/home/<user>/opentox-ruby/www" +# PACKAGE_DIR = "/home/<user>/opentox-ruby/r-packages" +package_dir = CONFIG[:base_dir].split("/") +package_dir[-1] = "r-packages" +package_dir = package_dir.join("/") +PACKAGE_DIR = package_dir + +require "tempfile" + +module OpenTox + + class RUtil + + @@feats = {} + + def initialize + @r = RinRuby.new(true,false) unless defined?(@r) and @r + @r.eval ".libPaths('#{PACKAGE_DIR}')" + @r_packages = @r.pull "installed.packages()[,1]" + ["sampling","gam","vegan"].each{|l| install_package(l)} #"caret", "smacof", "TunePareto" + @r.eval "source('#{File.join(Gem.loaded_specs['opentox-ruby'].full_gem_path,'lib/stratification.R')}')" + end + + def quit_r + begin + @r.quit + @r = nil + rescue + end + end + + def r + @r + end + + def package_installed?( package ) + @r_packages.include?(package) + end + + def install_package( package ) + unless package_installed?(package) + LOGGER.debug "r-util> installing r-package #{package} to #{PACKAGE_DIR}" + @r.eval "install.packages('#{package}', repos='http://cran.r-project.org', lib='#{PACKAGE_DIR}')" + end + end + + # <0 -> array1 << array2 + # 0 -> no significant difference + # >0 -> array2 >> array1 + def paired_ttest(array1, array2, significance_level=0.95) + @r.assign "v1",array1 + @r.assign "v2",array2 + @r.eval "ttest = t.test(as.numeric(v1),as.numeric(v2),paired=T)" + t = @r.pull "ttest$statistic" + p = @r.pull "ttest$p.value" + if (1-significance_level > p) + t + else + 0 + end + end + + # example: + # files = ["/tmp/box.svg","/tmp/box.png"] + # data = [ [ :method, [4,4,5,5,4,3,2] ], [ :method2, [1,2,3,4,5,4,6] ], [ :asdf, [9,1,8,0,7,1,6] ] ] + # boxplot(files, data, "comparison1" ) + # + def boxplot(files, data, title="") + LOGGER.debug("r-util> create boxplot") + assign_dataframe("boxdata",data.collect{|e| e[1]}.transpose,nil,data.collect{|e| e[0].to_s}) + plot_to_files(files) do |file| + @r.eval "boxplot(boxdata,main='#{title}',col=rep(2:#{data.size+1}))" + end + end + + # embedds feature values of two datasets into 2D and plots it + # fast_plot = true -> PCA, fast_plot = false -> SMACOF (iterative optimisation method) + # + def feature_value_plot(files, dataset_uri1, dataset_uri2, dataset_name1, dataset_name2, + features=nil, fast_plot=true, subjectid=nil, waiting_task=nil) + + raise "r-package smacof missing" if fast_plot==false and !package_installed?("smacof") + LOGGER.debug("r-util> create feature value plot") + d1 = OpenTox::Dataset.find(dataset_uri1,subjectid) + d2 = OpenTox::Dataset.find(dataset_uri2,subjectid) + if features + [d1, d2].each{|d| features.each{|f| raise "feature not included" unless d.features.keys.include?(f)}} + else + raise "different\n#{d1.features.keys.sort.to_yaml}\n#{d2.features.keys.sort.to_yaml}" if + (d1.features.keys.sort != d2.features.keys.sort) + features = d1.features.keys + end + raise "at least two features needed" if d1.features.keys.size<2 + waiting_task.progress(25) if waiting_task + + df1 = dataset_to_dataframe(d1,0,subjectid,features) + df2 = dataset_to_dataframe(d2,0,subjectid,features) + waiting_task.progress(50) if waiting_task + + @r.eval "df <- rbind(#{df1},#{df2})" + @r.eval "split <- c(rep(0,nrow(#{df1})),rep(1,nrow(#{df2})))" + @r.names = [dataset_name1, dataset_name2] + LOGGER.debug("r-util> - convert data to 2d") + @r.eval "df.2d <- plot_pre_process(df, method='#{(fast_plot ? "pca" : "smacof")}')" + waiting_task.progress(75) if waiting_task + + if fast_plot + info = "main='PCA-Embedding of #{features.size} features',xlab='PC1',ylab='PC2'" + else + info = "main='SMACOF-Embedding of #{features.size} features',xlab='x',ylab='y'" + end + LOGGER.debug("r-util> - plot data") + plot_to_files(files) do |file| + @r.eval "plot_split( df.2d, split, names, #{info})" + end + end + + # plots a double histogram + # data1 and data2 are arrays with values, either numerical or categorial (string values) + # is_numerical, boolean flag indicating value types + # log (only for numerical), plot logarithm of values + def double_hist_plot(files, data1, data2, is_numerical, log=false, name1="first", name2="second", title="title", xaxis="x-values") + LOGGER.debug("r-util> create double hist plot") + all = data1 + data2 + if (is_numerical) + @r.eval "double_plot <- function(data1, data2, log=FALSE, names=c('data1','data2'), title='title', xlab='x-values') + { + if (log) + { + data1 <- log(data1) + data2 <- log(data2) + xlab = paste('logarithm of',xlab,sep=' ') + } + xlims <- round(c(min(c(min(data1),min(data2))),max(c(max(data1),max(data2))))) + h <- hist(rbind(data1,data2),plot=F) + h1 <- hist(data1,plot=F,breaks=h$breaks) + h2 <- hist(data2,plot=F,breaks=h$breaks) + xlims = c(min(h$breaks),max(h$breaks)) + ylims = c(0,max(h1$counts,h2$counts)) + xaxps = c(min(h$breaks),max(h$breaks),(length(h$breaks)-1)) + plot(h1, col=rgb(1,0,0,2/4), xlim=xlims, xaxp=xaxps, ylim=ylims, + main=title, xlab=xlab, ylab='counts' ) + plot(h2, col=rgb(0,1,0,2/4), add=T ) + legend('topleft',names,lty=c(1,1),col=c('red','green')) + }" + @r.assign("data1",data1) + @r.assign("data2",data2) + @r.legend = [name1, name2] + else + raise "log not valid for categorial" if log + vals = all.uniq.sort! + counts1 = vals.collect{|e| data1.count(e)} + counts2 = vals.collect{|e| data2.count(e)} + @r.data1 = counts1 + @r.data2 = counts2 + @r.value_names = [name1, name2] + @r.legend = vals + @r.eval("data <- cbind(data1,data2)") + end + + plot_to_files(files) do |file| + if (is_numerical) + @r.eval "double_plot(data1,data2,log=#{log ? "T":"F"},names=legend,title='#{title}',xlab='#{xaxis}')" + else + @r.eval("bp <- barplot(data, beside=T, names.arg=value_names, + main='#{title}', col=sort(rep(2:3,length(legend))))") #legend.text=c(legend), + @r.eval "text(bp, 0, round(data, 1),cex=1,pos=3)" + end + end + end + + # stratified splits a dataset into two dataset the feature values + # all features are taken into account unless <split_features> is given + def stratified_split( dataset, missing_values="NA", pct=0.3, subjectid=nil, seed=42, split_features=nil ) + raise "not a loaded ot-dataset" unless dataset.is_a?(OpenTox::Dataset) and dataset.compounds.size>0 and dataset.features.size>0 + LOGGER.debug("r-util> apply stratified split to #{dataset.uri}") + + df = dataset_to_dataframe( dataset, missing_values, subjectid, split_features ) + @r.eval "set.seed(#{seed})" + @r.eval "split <- stratified_split(#{df}, ratio=#{pct})" + split = @r.pull 'split' + split = split.collect{|s| 1-s.to_i} # reverse 1s and 0s, as 1 means selected, but 0 will be first set + split_to_datasets( df, split, subjectid ) + end + + # dataset should be loaded completely (use Dataset.find) + # takes duplicates into account + # replaces missing values with param <missing_value> + # returns dataframe-variable-name in R + def dataset_to_dataframe( dataset, missing_value="NA", subjectid=nil, features=nil ) + LOGGER.debug "r-util> convert dataset to dataframe #{dataset.uri}" + + # count duplicates + num_compounds = {} + dataset.features.keys.each do |f| + dataset.compounds.each do |c| + if dataset.data_entries[c] + val = dataset.data_entries[c][f] + size = val==nil ? 1 : val.size + num_compounds[c] = num_compounds[c]==nil ? size : [num_compounds[c],size].max + else + num_compounds[c] = 1 + end + end + end + + # use either all, or the provided features, sorting is important as col-index := features + if features + features.sort! + else + features = dataset.features.keys.sort + end + compounds = [] + dataset.compounds.each do |c| + num_compounds[c].times do |i| + compounds << c + end + end + + # values into 2D array, then to dataframe + d_values = [] + dataset.compounds.each do |c| + num_compounds[c].times do |i| + c_values = [] + features.each do |f| + if dataset.data_entries[c] + val = dataset.data_entries[c][f] + v = val==nil ? "" : val[i].to_s + else + raise "wtf" if i>0 + v = "" + end + v = missing_value if v.size()==0 + c_values << v + end + d_values << c_values + end + end + df_name = "df_#{dataset.uri.split("/")[-1].split("?")[0]}" + assign_dataframe(df_name,d_values,compounds,features) + + # set dataframe column types accordingly + f_count = 1 #R starts at 1 + features.each do |f| + feat = OpenTox::Feature.find(f,subjectid) + nominal = feat.metadata[RDF.type].to_a.flatten.include?(OT.NominalFeature) + if nominal + @r.eval "#{df_name}[,#{f_count}] <- as.character(#{df_name}[,#{f_count}])" + else + @r.eval "#{df_name}[,#{f_count}] <- as.numeric(#{df_name}[,#{f_count}])" + end + f_count += 1 + end + #@r.eval "head(#{df_name})" + + # store compounds, and features (including metainformation) + @@feats[df_name] = {} + features.each do |f| + @@feats[df_name][f] = dataset.features[f] + end + df_name + end + + # converts a dataframe into a dataset (a new dataset is created at the dataset webservice) + # this is only possible if a superset of the dataframe was created by dataset_to_dataframe (metadata and URIs!) + def dataframe_to_dataset( df, subjectid=nil ) + dataframe_to_dataset_indices( df, subjectid, nil) + end + + private + def dataframe_to_dataset_indices( df, subjectid=nil, compound_indices=nil ) + raise unless @@feats[df].size>0 + values, compounds, features = pull_dataframe(df) + features.each{|f| raise unless @@feats[df][f]} + dataset = OpenTox::Dataset.create(CONFIG[:services]["opentox-dataset"],subjectid) + LOGGER.debug "r-util> convert dataframe to dataset #{dataset.uri}" + compounds.size.times{|i| dataset.add_compound(compounds[i]) if compound_indices==nil or compound_indices.include?(i)} + features.each{|f| dataset.add_feature(f,@@feats[df][f])} + features.size.times do |c| + feat = OpenTox::Feature.find(features[c],subjectid) + nominal = feat.metadata[RDF.type].to_a.flatten.include?(OT.NominalFeature) + compounds.size.times do |r| + if compound_indices==nil or compound_indices.include?(r) + dataset.add(compounds[r],features[c],nominal ? values[r][c] : values[r][c].to_f) if values[r][c]!="NA" + end + end + end + dataset.save(subjectid) + dataset + end + + def split_to_datasets( df, split, subjectid=nil ) + sets = [] + (split.min.to_i .. split.max.to_i).each do |i| + indices = [] + split.size.times{|j| indices<<j if split[j]==i} + dataset = dataframe_to_dataset_indices( df, subjectid, indices ) + LOGGER.debug("r-util> split into #{dataset.uri}, c:#{dataset.compounds.size}, f:#{dataset.features.size}") + sets << dataset + end + sets + end + + def pull_dataframe(df) + tmp = File.join(Dir.tmpdir,Time.new.to_f.to_s+"_"+rand(10000).to_s+".csv") + @r.eval "write.table(#{df},file='#{tmp}',sep='#')" + res = []; compounds = []; features = [] + first = true + file = File.new(tmp, 'r') + file.each_line("\n") do |row| + if first + features = row.chomp.split("#").collect{|e| e.gsub("\"","")} + first = false + else + vals = row.chomp.split("#").collect{|e| e.gsub("\"","")} + compounds << vals[0] + res << vals[1..-1] + end + end + begin File.delete(tmp); rescue; end + return res, compounds, features + end + + def assign_dataframe(df,input,rownames,colnames) + tmp = File.join(Dir.tmpdir,Time.new.to_f.to_s+"_"+rand(10000).to_s+".csv") + file = File.new(tmp, 'w') + input.each{|i| file.puts(i.collect{|e| "\"#{e}\""}.join("#")+"\n")} + file.flush + @r.rownames = rownames if rownames + @r.colnames = colnames + @r.eval "#{df} <- read.table(file='#{tmp}',sep='#',"+ + "#{rownames ? "row.names=rownames" : ""},col.names=colnames,check.names=F)" + begin File.delete(tmp); rescue; end + end + + def plot_to_files(files) + files.each do |file| + if file=~/(?i)\.svg/ + @r.eval("svg('#{file}',10,8)") + elsif file=~/(?i)\.png/ + @r.eval("png('#{file}')") + else + raise "invalid format: "+file.to_s + end + yield file + LOGGER.debug "r-util> plotted to #{file}" + @r.eval("dev.off()") + end + end + end +end + + diff --git a/lib/stratification.R b/lib/stratification.R new file mode 100644 index 0000000..76ff2d8 --- /dev/null +++ b/lib/stratification.R @@ -0,0 +1,201 @@ + +nominal_to_binary <- function( data ) +{ + result = NULL + for (i in 1:ncol(data)) + { + #print(i) + if (is.numeric( data[,i] ) ) + { + if (is.null(result)) + result = data.frame(data[,i]) + else + result = data.frame(result, data[,i]) + colnames(result)[ncol(result)] <- colnames(data)[i] + } + else + { + vals = unique(data[,i]) + for (j in 1:length(vals)) + { + #print(j) + bins = c() + for (k in 1:nrow(data)) + { + if(data[,i][k] == vals[j]) + bins = c(bins,1) + else + bins = c(bins,0) + } + #print(bins) + if (is.null(result)) + result = data.frame(bins) + else + result = data.frame(result, bins) + colnames(result)[ncol(result)] <- paste(colnames(data)[i],"is",vals[j]) + if (length(vals)==2) break + } + } + } + #print(head(result)) + result +} + +process_data <- function( data ) +{ + data.num <- as.data.frame(data) + if (!is.numeric(data.num)) + { + data.num = nominal_to_binary(data.num) + } + if(any(is.na(data.num))) + { + require("gam") + data.repl = na.gam.replace(data.num) + } + else + data.repl = data.num + data.repl +} + +cluster <- function( data, min=10, max=15 ) +{ + require("vegan") + max <- min(max,nrow(unique(data))) + max <- min(max,nrow(data)-1) + if (min>max) + min=max + print(paste("cascade k-means ",min," - ",max)) + s = cascadeKM(data,min,max,iter=30) + m = max.col(s$results)[2] + print(paste("best k-means clustering result: ",((m-1)+min)," num clusters")) + cbind(s$partition[,m]) +} + +stratified_split <- function( data, ratio=0.3, method="cluster" ) +{ + data.processed = as.matrix(process_data( data )) + if (method == "samplecube") + { + require("sampling") + # adjust ratio to make samplecube return exact number of samples + ratio = round(nrow(data.processed)*ratio)/nrow(data.processed) + pik = rep(ratio,times=nrow(data.processed)) + data.strat = cbind(pik,data.processed) + samplecube(data.strat,pik,order=2,comment=F) + } + else if (method == "cluster") + { + cl = cluster(data.processed) +# require("caret") +# res = createDataPartition(cl,p=ratio) +# split = rep(1, times=nrow(data)) +# for (j in 1:nrow(data)) +# if ( is.na(match(j,res$Resample1)) ) +# split[j]=0 +# split + require("sampling") + stratified_split(cl,ratio,"samplecube") + } + else + stop("unknown method") +} + +stratified_k_fold_split <- function( data, num_folds=10, method="cluster" ) +{ + print(paste(num_folds,"-fold-split, data-size",nrow(data))) + data.processed = as.matrix(process_data( data )) + if (method == "samplecube") + { + folds = rep(0, times=nrow(data)) + for (i in 1:(num_folds-1)) + { + require("sampling") + prop = 1/(num_folds-(i-1)) + print(paste("fold",i,"/",num_folds," prop",prop)) + pik = rep(prop,times=nrow(data)) + for (j in 1:nrow(data)) + if(folds[j]!=0) + pik[j]=0 + data.strat = cbind(pik,data.processed) + s<-samplecube(data.strat,pik,order=2,comment=F) + print(paste("fold size: ",sum(s))) + for (j in 1:nrow(data)) + if (s[j] == 1) + folds[j]=i + } + for (j in 1:nrow(data)) + if (folds[j] == 0) + folds[j]=num_folds + folds + } + else if (method == "cluster") + { + require("TunePareto") + cl = cluster(data.processed) + res = generateCVRuns(cl,ntimes=1,nfold=3) + folds = rep(0, times=nrow(data)) + for (i in 1:num_folds) + for(j in 1:length(res[[1]][[i]])) + folds[res[[1]][[i]][j]]=i + folds + } + else + stop("unknown method") +} + +plot_pre_process <- function( data, method="pca" ) +{ + data.processed = process_data( data ) + if (method == "pca") + { + data.pca <- prcomp(data.processed, scale=TRUE) + as.data.frame(data.pca$x)[1:2] + } + else if (method == "smacof") + { + require("smacof") + data.emb <- smacofSym(dist(data.processed, method = "euclidean"), ndim=2, verbose=T) + data.emb$conf + } + else + stop("unknown method") +} + +plot_split <- function( data, split, names=NULL, ... ) +{ + if (ncol(data)!=2 || !is.numeric(data[,1]) || !is.numeric(data[,2])) + stop("data not suitable for plotting, plot_pre_process() first") + + plot( NULL, xlim = extendrange(data[,1]), ylim = extendrange(data[,2]), ... ) + if (is.null(names)) + names <- c("split 1","split 2") + colos = as.double(rep(2:(max(split)+2))) + legend("topleft",names,pch=2,col=colos) + + for (j in max(split):0) + { + set = c() + for (i in 1:nrow(data)) + if (split[i] == j) + set = c(set,i) + points(data[set,], pch = 2, col=(j+2)) + } +} + +#a<-matrix(rnorm(100, mean=50, sd=4), ncol=5) +#b<-matrix(rnorm(5000, mean=0, sd=10), ncol=5) +#data<-rbind(a,b) +#c<-matrix(rnorm(50, mean=-50, sd=2), ncol=5) +#data<-rbind(data,c) +#data=iris +#split = stratified_k_fold_split(data, num_folds=3) +#split = stratified_split(data, ratio=0.33, method="cluster") +#print(sum(split)) +#plot_split(plot_pre_process(data),split,c("training","test")) + +#cl = cluster(data) + + + + diff --git a/lib/task.rb b/lib/task.rb index 66825cd..102f4dc 100644 --- a/lib/task.rb +++ b/lib/task.rb @@ -242,18 +242,20 @@ module OpenTox # waits for a task, unless time exceeds or state is no longer running # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly # @param [optional,Numeric] dur seconds pausing before cheking again for completion - def wait_for_completion( waiting_task=nil, dur=0.3) + def wait_for_completion( waiting_task=nil) waiting_task.waiting_for(self.uri) if waiting_task due_to_time = Time.new + DEFAULT_TASK_MAX_DURATION + start_time = Time.new + dur = 0 LOGGER.debug "start waiting for task "+@uri.to_s+" at: "+Time.new.to_s+", waiting at least until "+due_to_time.to_s load_metadata # for extremely fast tasks check_state while self.running? or self.queued? sleep dur - #LOGGER.debug "dv ---------------- dur: '#{dur}'" - dur = dur*2 unless dur>=30.0 + dur = [[(Time.new - start_time)/20.0,0.3].max,300.0].min + #LOGGER.debug "task-object-id: #{self.object_id} - wait: #{"%.2f"%(Time.new - start_time)} - dur: #{"%.2f"%dur}" load_metadata # if another (sub)task is waiting for self, set progress accordingly waiting_task.progress(@metadata[OT.percentageCompleted].to_f) if waiting_task |