User Story Voting Eval

Last updated: March 30th, 20202020-03-30Project preview

Evaluation of user story voting

In [13]:
import os
import pandas
import glob
import numpy as np
import matplotlib.pyplot as plt

from container import Container

Input parameters

In [14]:
# Xlsx file pattern to process
pattern = "./voting_03-16-20/voting_table_*.xlsx"

# Selection of rows and columns from the xlsx
usecols = "A:I"
skiprows = list(range(8)) + list(range(14, 100))

# Rename columns of input xlsx if available
column_rename = {"Quality" : "Product_quality",
                 "Efficiency" : "Product_efficiency",
                 "Scalability" : "Product_scalability",
                 "Quality.1" : "Production_quality",
                 "Efficiency.1" : "Production_efficiency",
                 "Work effort" : "Work_effort",
                 "Technical effort" : "Technical_effort",
                 "Financial effort" : "Financial_effort"}

# Output parameters
outputdir = "./results"
output_dpi = 250
figsize = (12, 8)

Read all xlsx files

In [15]:
files = glob.glob(pattern)

df = []

for k, file in enumerate(files):
    
    data = pandas.read_excel(file, 
                             usecols=usecols,
                             skiprows=skiprows) \
                 .rename(columns=column_rename)
    
    df.append(data)

df = pandas.concat(df, axis=0)

metrics = df.columns[1:]

Aggregate indices

This is somewhat a bit of manual processing, because the votiers did not name user stories exactly as in the initial formulation. So, here, define indices associated with individual user stories and aggregate them together.

In [16]:
# Manual given labels
df["label_no"] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 7, 8, 1, 7, 5, 12, 3, 10, 12, 2, 7, 8, 6]

# Aggregate
df["label"] = np.nan
for i in range(13):
    idx = (df["label_no"] == i)
    df["label"][idx] = df[df.columns[0]][idx].iloc[0]
df.pop(df.columns[0])
for column in metrics:
    df[column] = df[column].astype(float)
<ipython-input-16-09be0d632176>:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["label"][idx] = df[df.columns[0]][idx].iloc[0]

Apply some preprocessing

Filter only labels that at minimum occurs twice.

In [17]:
counts = df.groupby("label")["label_no"].count().sort_values(ascending=False)
label = counts >= 2
label = label[label].index
df = df[df["label"].isin(label)]

Compute statistics over individual labels.

In [18]:
df_mean = df.groupby("label")[metrics].mean()
df_min = df.groupby("label")[metrics].min()
df_max = df.groupby("label")[metrics].max()

Save mean csv

In [19]:
df_mean.to_csv(os.path.join(outputdir, "mean_score.csv"))

Plot total user choices

In [20]:
X = np.arange(counts.shape[0])
fig = plt.figure(figsize=figsize)
fig.tight_layout()
ax = plt.gca()

ax.bar(X, counts, color="k", width=0.5, label="counts")
ax.set_ylabel("# User choices")
ax.set_xticks(X)
ax.set_xticklabels(tuple(counts.index), rotation='vertical')
ax.set_title("# votes related to user stories")

ax.grid()

fig.tight_layout()

fig.savefig(os.path.join(outputdir, "number_of_votes.png"), dpi=output_dpi)

Plot voting results vs. domain

In [21]:
X = np.arange(df_mean.shape[0])
fig = plt.figure(figsize=figsize)
fig.tight_layout()
ax = plt.gca()

ax.set_ylim(0, 6.5)

filterterms = {"Product_" : {"shift" : -0.25, "c" : "g", "label" : "Impact on Product"},
               "Production_" : {"shift" : 0, "c" : "b", "label" : "Impact on Production"},
               "_effort" : {"shift" : 0.25, "c" : "r", "label" : "Effort of Realization"}}
               
for filterterm, Xi in filterterms.items():

    mean_ = df_mean[df_mean.columns[df_mean.columns.str.contains(filterterm)]].mean(axis=1)
    min_ = df_min[df_min.columns[df_min.columns.str.contains(filterterm)]].mean(axis=1)
    max_ = df_max[df_max.columns[df_max.columns.str.contains(filterterm)]].mean(axis=1)

    ax.bar(X + Xi["shift"], min_.values, color=Xi["c"], width=0.25, alpha=0.7, label=Xi["label"])
    ax.bar(X + Xi["shift"], mean_.values, color=Xi["c"], width=0.25, alpha=0.3)
    ax.bar(X + Xi["shift"], max_.values, color=Xi["c"], width=0.25, alpha=0.3)

ax.bar([0], [0], color="g", alpha=1., label="min")
ax.bar([0], [0], color="g", alpha=0.6, label="mean")
ax.bar([0], [0], color="g", alpha=0.3, label="max")
ax.legend()
ax.set_ylabel("Voting score")


ax.grid()
ax.set_xticks(X)
ax.set_xticklabels(tuple(mean_.index), rotation='vertical')
ax.set_title("Voting scores vs. domain")

fig.tight_layout()

fig.savefig(os.path.join(outputdir, "scores_vs_domain.png"), dpi=output_dpi)

Plot voting results vs. benefit

In [22]:
X = np.arange(df_mean.shape[0])
fig = plt.figure(figsize=figsize)
fig.tight_layout()
ax = plt.gca()

ax.set_ylim(0, 6.5)

filterterms = {"_quality" : {"shift" : -0.25, "c" : "g", "label" : "Quality"},
               "_efficiency" : {"shift" : 0, "c" : "b", "label" : "Efficiency"},
               "_scalability" : {"shift" : 0.25, "c" : "k", "label" : "Scalability"}}
               
for filterterm, Xi in filterterms.items():

    mean_ = df_mean[df_mean.columns[df_mean.columns.str.contains(filterterm)]].mean(axis=1)
    min_ = df_min[df_min.columns[df_min.columns.str.contains(filterterm)]].mean(axis=1)
    max_ = df_max[df_max.columns[df_max.columns.str.contains(filterterm)]].mean(axis=1)

    ax.bar(X + Xi["shift"], min_.values, color=Xi["c"], width=0.25, alpha=0.7, label=Xi["label"])
    ax.bar(X + Xi["shift"], mean_.values, color=Xi["c"], width=0.25, alpha=0.3)
    ax.bar(X + Xi["shift"], max_.values, color=Xi["c"], width=0.25, alpha=0.3)

ax.bar([0], [0], color="g", alpha=1., label="min")
ax.bar([0], [0], color="g", alpha=0.6, label="mean")
ax.bar([0], [0], color="g", alpha=0.3, label="max")
ax.legend()
ax.set_ylabel("Voting score")


ax.grid()
ax.set_xticks(X)
ax.set_xticklabels(tuple(mean_.index), rotation='vertical')
ax.set_title("Voting scores vs. Benefits")

fig.tight_layout()

fig.savefig(os.path.join(outputdir, "scores_vs_benefits.png"), dpi=output_dpi)

Visualize the top-n user stories for individual performance metrics

In [23]:
# Top-n-user stories
n = 3

topn = {}
for metric in metrics:
    
    data = df_mean[metric]
    if not "_effort" in metric:
        ni = data[data==data.max()].shape[0]
        if ni < n:
            ni = n 
        topn[metric] = data.sort_values(ascending=False).head(ni).to_dict()
    else:
        ni = data[data==data.min()].shape[0]
        if ni < n:
            ni = n 
        topn[metric] = data.sort_values(ascending=True).head(ni).to_dict()
Container.contains(topn)
Out[23]:
┐
├─ Product_quality ──────┐
│                        ├─ Measure data set similarity : 4.0
│                        ├─ Establish inter-annotator agreement : 4.0
│                        └─ Feature representation analysis : 3.5
│                        
├─ Product_efficiency ───┐
│                        ├─ Feature representation analysis : 4.5
│                        ├─ Dataset Management Database : 4.5
│                        └─ Measure data set similarity : 4.0
│                        
├─ Product_scalability ──┐
│                        ├─ Dataset Management Database : 4.0
│                        ├─ Analysis of model performance beyond F-score : 4.0
│                        └─ Feature representation analysis : 3.0
│                        
├─ Production_quality ───┐
│                        ├─ Establish inter-annotator agreement : 5.0
│                        ├─ Enhance Annotation guidelines : 4.0
│                        └─ Dataset management database : 3.6666666666666...
│                        
├─ Production_efficiency ┐
│                        ├─ Dataset Management Database : 4.5
│                        ├─ Analysis of model performance beyond F-score : 4.5
│                        └─ Dataset management database : 4.3333333333333...
│                        
├─ Work_effort ──────────┐
│                        ├─ Enhance Annotation guidelines : 1.5
│                        ├─ Analysis of model performance beyond F-score  : 2.5
│                        └─ Establish inter-annotator agreement : 3.0
│                        
├─ Technical_effort ─────┐
│                        ├─ Enhance Annotation guidelines : 1.5
│                        ├─ Establish inter-annotator agreement : 1.5
│                        └─ Analysis of model performance beyond F-score  : 1.75
│                        
└─ Financial_effort ─────┐
                         ├─ Analysis of model performance beyond F-score : 0.0
                         ├─ Establish inter-annotator agreement : 0.0
                         └─ Launch multi-user Scenarios : 0.0
                         
In [ ]:
 
Notebooks AI
Notebooks AI Profile20060