summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2012-07-04 15:16:58 +0200
committermguetlein <martin.guetlein@gmail.com>2012-07-04 15:16:58 +0200
commitc3865b1887979933494778703e469c549335a6c3 (patch)
tree8caeca53533a17505165c8c801beaa6a95027645
parent411f35653986227ddc314526043854f3c36e014e (diff)
dirty speedup hack for parse csv
-rw-r--r--lib/parser.rb15
-rw-r--r--lib/r-util.rb22
2 files changed, 22 insertions, 15 deletions
diff --git a/lib/parser.rb b/lib/parser.rb
index 2193cf4..7bf46cf 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -373,7 +373,8 @@ module OpenTox
}
}
- input.each_with_index { |row, i|
+ first_data_row = true
+ input.each_with_index { |row, i|
drop=false
row = split_row(row)
raise "Entry has size #{row.size}, different from headers (#{headers.size})" if row.size != headers.size
@@ -382,7 +383,10 @@ module OpenTox
drop=true
drop_missing=true if (row.count("") == row.size-1)
end
- add_values(row, regression_features) unless (drop_missing && drop)
+ unless (drop_missing && drop)
+ add_values(row, regression_features, first_data_row)
+ first_data_row = false
+ end
if (drop_missing && drop)
@format_errors << "Row #{i} not added"
end
@@ -458,7 +462,7 @@ module OpenTox
# @param Array A row split up as an array
# @param Array Indicator for regression for each field
# @param Array Indices for duplicate features
- def add_values(row, regression_features)
+ def add_values(row, regression_features, add_features=true)
id = row.shift
case id
@@ -475,6 +479,8 @@ module OpenTox
@duplicates[compound.inchi] = [] unless @duplicates[compound.inchi]
@duplicates[compound.inchi] << id+", "+row.join(", ")
+ @dataset.add_compound(compound.uri)
+
feature_idx = 0
row.each_index do |i|
@@ -502,7 +508,8 @@ module OpenTox
feature_idx += 1
if val != nil
- @dataset.add(compound.uri, feature, val)
+ @dataset.add_feature(feature) if add_features
+ @dataset.add(compound.uri, feature, val, true)
if @feature_types[feature].include? OT.NominalFeature
@dataset.features[feature][OT.acceptValue] = [] unless @dataset.features[feature][OT.acceptValue]
@dataset.features[feature][OT.acceptValue] << val unless @dataset.features[feature][OT.acceptValue].include?(val)
diff --git a/lib/r-util.rb b/lib/r-util.rb
index 8357d0f..bbf98f3 100644
--- a/lib/r-util.rb
+++ b/lib/r-util.rb
@@ -61,11 +61,11 @@ module OpenTox
# <0 -> array1 << array2
# 0 -> no significant difference
# >0 -> array2 >> array1
- def paired_ttest(array1, array2, significance_level=0.95)
+ def ttest(array1, array2, paired, significance_level=0.95)
@r.assign "v1",array1
@r.assign "v2",array2
- paired = array1.size==array2.size ? "T" : "F"
- @r.eval "ttest = t.test(as.numeric(v1),as.numeric(v2),paired=#{paired})"
+ raise if paired && array1.size!=array2.size
+ @r.eval "ttest = t.test(as.numeric(v1),as.numeric(v2),paired=#{paired ? "T" : "F"})"
t = @r.pull "ttest$statistic"
p = @r.pull "ttest$p.value"
if (1-significance_level > p)
@@ -83,7 +83,7 @@ module OpenTox
end
- def ttest(array1, value2, significance_level=0.95)
+ def ttest_single_value(array1, value2, significance_level=0.95)
@r.assign "v1",array1
@r.eval "ttest = t.test(as.numeric(v1),conf.level=#{significance_level})"
min = @r.pull "ttest$conf.int[1]"
@@ -151,7 +151,7 @@ module OpenTox
hlines << [max_median,2+max_median_idx]
hlines << [min_median,2+min_median_idx]
plot_to_files(files, hlines) do |file|
- @r.eval "boxplot(boxdata,main='#{title}',col=rep(2:#{data.size+1})#{param_str})"
+ @r.eval "superboxplot(boxdata,main='#{title}',col=rep(2:#{data.size+1})#{param_str})"
end
end
@@ -522,20 +522,20 @@ module OpenTox
begin File.delete(tmp); rescue; end
end
- @svg_plot_width = 14
- @svg_plot_height = 10
-
+ @@svg_plot_width = 12
+ @@svg_plot_height = 8
+
public
def set_svg_plot_size(width,height)
- @svg_plot_width = width
- @svg_plot_height = height
+ @@svg_plot_width = width
+ @@svg_plot_height = height
end
private
def plot_to_files(files,hlines=nil)
files.each do |file|
if file=~/(?i)\.svg/
- @r.eval("svg('#{file}',#{@svg_plot_width},#{@svg_plot_height})")
+ @r.eval("svg('#{file}',#{@@svg_plot_width},#{@@svg_plot_height})")
elsif file=~/(?i)\.png/
@r.eval("png('#{file}')")
else