Evaluation of user story voting¶
In [13]:
import os
import pandas
import glob
import numpy as np
import matplotlib.pyplot as plt
from container import Container
Input parameters¶
In [14]:
# Xlsx file pattern to process
pattern = "./voting_03-16-20/voting_table_*.xlsx"
# Selection of rows and columns from the xlsx
usecols = "A:I"
skiprows = list(range(8)) + list(range(14, 100))
# Rename columns of input xlsx if available
column_rename = {"Quality" : "Product_quality",
"Efficiency" : "Product_efficiency",
"Scalability" : "Product_scalability",
"Quality.1" : "Production_quality",
"Efficiency.1" : "Production_efficiency",
"Work effort" : "Work_effort",
"Technical effort" : "Technical_effort",
"Financial effort" : "Financial_effort"}
# Output parameters
outputdir = "./results"
output_dpi = 250
figsize = (12, 8)
Read all xlsx files¶
In [15]:
files = glob.glob(pattern)
df = []
for k, file in enumerate(files):
data = pandas.read_excel(file,
usecols=usecols,
skiprows=skiprows) \
.rename(columns=column_rename)
df.append(data)
df = pandas.concat(df, axis=0)
metrics = df.columns[1:]
Aggregate indices¶
This is somewhat a bit of manual processing, because the votiers did not name user stories exactly as in the initial formulation. So, here, define indices associated with individual user stories and aggregate them together.
In [16]:
# Manual given labels
df["label_no"] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 7, 8, 1, 7, 5, 12, 3, 10, 12, 2, 7, 8, 6]
# Aggregate
df["label"] = np.nan
for i in range(13):
idx = (df["label_no"] == i)
df["label"][idx] = df[df.columns[0]][idx].iloc[0]
df.pop(df.columns[0])
for column in metrics:
df[column] = df[column].astype(float)
Apply some preprocessing¶
Filter only labels that at minimum occurs twice.
In [17]:
counts = df.groupby("label")["label_no"].count().sort_values(ascending=False)
label = counts >= 2
label = label[label].index
df = df[df["label"].isin(label)]
Compute statistics over individual labels.
In [18]:
df_mean = df.groupby("label")[metrics].mean()
df_min = df.groupby("label")[metrics].min()
df_max = df.groupby("label")[metrics].max()
Save mean csv
In [19]:
df_mean.to_csv(os.path.join(outputdir, "mean_score.csv"))
Plot total user choices¶
In [20]:
X = np.arange(counts.shape[0])
fig = plt.figure(figsize=figsize)
fig.tight_layout()
ax = plt.gca()
ax.bar(X, counts, color="k", width=0.5, label="counts")
ax.set_ylabel("# User choices")
ax.set_xticks(X)
ax.set_xticklabels(tuple(counts.index), rotation='vertical')
ax.set_title("# votes related to user stories")
ax.grid()
fig.tight_layout()
fig.savefig(os.path.join(outputdir, "number_of_votes.png"), dpi=output_dpi)
Plot voting results vs. domain¶
In [21]:
X = np.arange(df_mean.shape[0])
fig = plt.figure(figsize=figsize)
fig.tight_layout()
ax = plt.gca()
ax.set_ylim(0, 6.5)
filterterms = {"Product_" : {"shift" : -0.25, "c" : "g", "label" : "Impact on Product"},
"Production_" : {"shift" : 0, "c" : "b", "label" : "Impact on Production"},
"_effort" : {"shift" : 0.25, "c" : "r", "label" : "Effort of Realization"}}
for filterterm, Xi in filterterms.items():
mean_ = df_mean[df_mean.columns[df_mean.columns.str.contains(filterterm)]].mean(axis=1)
min_ = df_min[df_min.columns[df_min.columns.str.contains(filterterm)]].mean(axis=1)
max_ = df_max[df_max.columns[df_max.columns.str.contains(filterterm)]].mean(axis=1)
ax.bar(X + Xi["shift"], min_.values, color=Xi["c"], width=0.25, alpha=0.7, label=Xi["label"])
ax.bar(X + Xi["shift"], mean_.values, color=Xi["c"], width=0.25, alpha=0.3)
ax.bar(X + Xi["shift"], max_.values, color=Xi["c"], width=0.25, alpha=0.3)
ax.bar([0], [0], color="g", alpha=1., label="min")
ax.bar([0], [0], color="g", alpha=0.6, label="mean")
ax.bar([0], [0], color="g", alpha=0.3, label="max")
ax.legend()
ax.set_ylabel("Voting score")
ax.grid()
ax.set_xticks(X)
ax.set_xticklabels(tuple(mean_.index), rotation='vertical')
ax.set_title("Voting scores vs. domain")
fig.tight_layout()
fig.savefig(os.path.join(outputdir, "scores_vs_domain.png"), dpi=output_dpi)
Plot voting results vs. benefit¶
In [22]:
X = np.arange(df_mean.shape[0])
fig = plt.figure(figsize=figsize)
fig.tight_layout()
ax = plt.gca()
ax.set_ylim(0, 6.5)
filterterms = {"_quality" : {"shift" : -0.25, "c" : "g", "label" : "Quality"},
"_efficiency" : {"shift" : 0, "c" : "b", "label" : "Efficiency"},
"_scalability" : {"shift" : 0.25, "c" : "k", "label" : "Scalability"}}
for filterterm, Xi in filterterms.items():
mean_ = df_mean[df_mean.columns[df_mean.columns.str.contains(filterterm)]].mean(axis=1)
min_ = df_min[df_min.columns[df_min.columns.str.contains(filterterm)]].mean(axis=1)
max_ = df_max[df_max.columns[df_max.columns.str.contains(filterterm)]].mean(axis=1)
ax.bar(X + Xi["shift"], min_.values, color=Xi["c"], width=0.25, alpha=0.7, label=Xi["label"])
ax.bar(X + Xi["shift"], mean_.values, color=Xi["c"], width=0.25, alpha=0.3)
ax.bar(X + Xi["shift"], max_.values, color=Xi["c"], width=0.25, alpha=0.3)
ax.bar([0], [0], color="g", alpha=1., label="min")
ax.bar([0], [0], color="g", alpha=0.6, label="mean")
ax.bar([0], [0], color="g", alpha=0.3, label="max")
ax.legend()
ax.set_ylabel("Voting score")
ax.grid()
ax.set_xticks(X)
ax.set_xticklabels(tuple(mean_.index), rotation='vertical')
ax.set_title("Voting scores vs. Benefits")
fig.tight_layout()
fig.savefig(os.path.join(outputdir, "scores_vs_benefits.png"), dpi=output_dpi)
Visualize the top-n user stories for individual performance metrics¶
In [23]:
# Top-n-user stories
n = 3
topn = {}
for metric in metrics:
data = df_mean[metric]
if not "_effort" in metric:
ni = data[data==data.max()].shape[0]
if ni < n:
ni = n
topn[metric] = data.sort_values(ascending=False).head(ni).to_dict()
else:
ni = data[data==data.min()].shape[0]
if ni < n:
ni = n
topn[metric] = data.sort_values(ascending=True).head(ni).to_dict()
Container.contains(topn)
Out[23]:
In [ ]: