require "rinruby" require 'statsample' #R.eval(".libPaths(\"C:/Program Files/R/R-3.5.0/library\")") #R.eval(".libPaths(\"\\\\home.gu.gu.se/home-XB$/xbalek/Documents/R/win-library/3.5\")") #R.eval(".libPaths(\"U:\\Documents\\R\\win-library\\3.5\")") #R.eval(".libPaths(\"U:/Documents/R/win-library/4.0\")") #R.eval(".libPaths(\"C:/Users/xbalek/Work Folders/Documents/R/win-library/3.3\")") R.eval("library(\"irr\")") R.eval("library(\"effectsize\")") o = File.open("alpha_rho_analysis.tsv","w:utf-8") o.puts "ncoders\tntexts\tmean_alpha_max\tmean_alpha_min\talpha_diff\talpha_wilcox_p\talpha_r_effect_size\tmean_rho_max\tmean_rho_min\trho_diff\trho_wilcox_p\trho_r_effect_size\tn_max\tn_min\tcoders\talphas_max\talphas_min\trhos_max\trhos_min" def stats(input, type) if type == "hash" sent_array = input.values elsif type == "array" sent_array = input end sent_sum = 0.0 sent_array.each do |sent| sent_sum += sent end mean = sent_sum/sent_array.length sumsq = 0.0 sent_array.each do |sent| sumsq += (mean - sent)*(mean - sent) end sd = Math.sqrt(sumsq/sent_array.length) return mean, sd end def construct_combinations_length(source,length) #combs = Hash.new{|hash, key| hash[key] = Array.new} cands = source.permutation(length).to_a cands2 = [] cands.each do |cand| if !cands2.include?(cand.sort) cands2 << cand end end #combs = cands2 return cands2 end sources = [[1,6,7,8,9,10,11],[6,7,8,9,10,11],[6,7,8,9,10],[6,7,8,11],[7,8,11]] #sources = [[3,6,7,8,9,10,11]] #sources = [[7,8,11]] userids = [] STDOUT.puts "baseuser\tanotheruser\talpha\trho\tcomb" sources.each do |source| ncoders = source.length f = File.open("sd_source#{source.join(";")}.tsv","r:utf-8") users = Hash.new{|hash, key| hash[key] = Array.new} f.each_line.with_index do |line, index| if index > 0 line1 = line.strip.split for j in 5..5+source.length-1 #if source[0] == 1 and j == 5 # k = 1 #else # k = j #end k = j - 5 users[userids[k].to_i] << line1[j].to_f end else userids = line.strip.split("\t")[5..5+source.length-1] #STDERR.puts userids.join(";") end end #STDERR.puts users nvalues = users.values[0].length #STDERR.puts "#{nvalues} judgments in total" alphas_max = [] rhos_max = [] combs_max = construct_combinations_length(source,ncoders-1) combs_max.each do |comb| baseuser = "" source.each do |user| if !comb.include?(user) baseuser = user break end end basevalues = users[baseuser] sum_values = [] ave_values = [] comb.each do |user| users[user].each.with_index do |value, index| sum_values[index] = sum_values[index].to_f + value end end sum_values.each do |value| ave_values << value/(ncoders-1).to_f end #STDERR.puts "max" #STDERR.puts "base: #{basevalues.join(";")}" #STDERR.puts "ave: #{ave_values.join(";")}" R.assign "base", basevalues R.assign "ave", ave_values R.eval "m1 <- matrix(c(base,ave),nrow=2,byrow=TRUE)" alpha = R.pull "kripp.alpha(m1,\"interval\")$value" alphas_max << alpha rho = Statsample::Bivariate.spearman(Daru::Vector.new(basevalues), Daru::Vector.new(ave_values)) rhos_max << rho pearson = Statsample::Bivariate::Pearson.new(Daru::Vector.new(basevalues), Daru::Vector.new(ave_values)) pearson_r = pearson.r if source == [6,7,8,9,10,11] #STDOUT.puts "baseuser\talpha\trho\tr\tcomb" #STDOUT.puts "#{baseuser}\t#{alpha}\t#{rho}\t#{pearson_r}\t#{comb}" end end alphas_min = [] rhos_min = [] combs_min = construct_combinations_length(source,2) combs_min.each do |comb| baseuser = comb[0] anotheruser = comb[1] #STDERR.puts "min" #STDERR.puts "base: #{users[baseuser].join(";")}" #STDERR.puts "ave: #{users[anotheruser].join(";")}" R.assign "base", users[baseuser] R.assign "ave", users[anotheruser] R.eval "m1 <- matrix(c(base,ave),nrow=2,byrow=TRUE)" alpha = R.pull "kripp.alpha(m1,\"interval\")$value" alphas_min << alpha rho = Statsample::Bivariate.spearman(Daru::Vector.new(users[baseuser]), Daru::Vector.new(users[anotheruser])) rhos_min << rho #pearson = Statsample::Bivariate::Pearson.new(Daru::Vector.new(baseuser), Daru::Vector.new(anotheruser)) #pearson_r = pearson.r if source == [6,7,8,9,10,11] STDOUT.puts "#{baseuser}\t#{anotheruser}\t#{alpha}\t#{rho}\t#{comb}" end end #STDERR.puts alphas_max.join(",") #STDERR.puts stats(alphas_max,"array")[0] #STDERR.puts alphas_min.join(",") #STDERR.puts stats(alphas_min,"array")[0] R.assign "alphas_max", alphas_max R.assign "alphas_min", alphas_min p = R.pull "wilcox.test(alphas_max,alphas_min)$p.value" r = R.pull "rank_biserial(alphas_max,alphas_min)$r" R.assign "rhos_max", rhos_max R.assign "rhos_min", rhos_min p_rho = R.pull "wilcox.test(rhos_max,rhos_min)$p.value" r_rho = R.pull "rank_biserial(rhos_max,rhos_min)$r" #STDERR.puts p #STDERR.puts r #o.puts "ncoders\tntexts\tmean_alpha_max\tmean_alpha_min\talpha_diff\talpha_wilcox_p\talpha_r_effect_size\tmean_rho_max\tmean_rho_min\trho_diff\trho_wilcox_p\trho_r_effect_size\tn_max\tn_min\tcoders\talphas_max\talphas_min\trhos_max\trhos_min" o.puts "#{ncoders}\t#{nvalues}\t#{stats(alphas_max,"array")[0]}\t#{stats(alphas_min,"array")[0]}\t#{stats(alphas_max,"array")[0]-stats(alphas_min,"array")[0]}\t#{p}\t#{r}\t#{stats(rhos_max,"array")[0]}\t#{stats(rhos_min,"array")[0]}\t#{stats(rhos_max,"array")[0]-stats(rhos_min,"array")[0]}\t#{p_rho}\t#{r_rho}\t#{alphas_max.length}\t#{alphas_min.length}\t#{source.join(",")}\t#{alphas_max.join(",")}\t#{alphas_min.join(",")}\t#{rhos_max.join(",")}\t#{rhos_min.join(",")}" #o.puts alphas_max.join(",") #o.puts f.close end