compiled stats plots

2023-06-12 15:03:32 +00:00 · 2023-06-12 15:03:32 +00:00 · 521854ef44
--- a/user-study/scripts/.keystroke_analysis.png
+++ b/user-study/scripts/.keystroke_analysis.png
--- a/user-study/scripts/analysis_stats.json
+++ b/user-study/scripts/analysis_stats.json
@ -1,16 +0,0 @@
-{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
-{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
-{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
-{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
-{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
-{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
-{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
-{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
-{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
-{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
-{"B": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.0]}
-{"PE": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.0]}
-{"SBOW": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.9084078711985689]}
-{"DBOW": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.11167697344488905]}
-{"NWBOW": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.05794667226537896]}
-{"NWD": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.0]}
--- a/user-study/scripts/analysis_visualize.ipynb
+++ b/user-study/scripts/analysis_visualize.ipynb
--- a/user-study/scripts/inter_annotator_analysis.py
+++ b/user-study/scripts/inter_annotator_analysis.py
@ -1,11 +1,29 @@
 import json 
 from utils import get_interface_mapping
+from sklearn.metrics import cohen_kappa_score, f1_score 
 import numpy as np
 analysis_path = '/home/t-hdiddee/INMT-lite/user-study/data/validation_score.dsv'
 with open(analysis_path,'r') as file: 
    records = file.read().strip().split('\n')
 print(f'{len(records)} are the number of records being analysed.')

+def compute_cohens_cappa(paired_score_for_interface):
+
+    rater1, rater2 = [], []
+    for ele in paired_score_for_interface:
+        try:
+            rater1.append(ele[0])
+            rater2.append(ele[1])
+        except: 
+            print(ele)
+    k = cohen_kappa_score(rater1, rater2)
+    f1 = f1_score(rater1, rater2, average = "weighted")
+    return k, f1
+
+def print_and_dump(instance):
+    print(instance)
+    with open('./results/interannotator_analysis.txt', 'a') as file:
+        file.write(instance + '\n')
    
 interfacewise_clusters = {} # interface: scores of all sentences (multiple) 
 sentencewise_clusters = {} # sentence - scores of all formats
@ -25,73 +43,120 @@ for record in records:
    except: 
        print(record)
     
-# Generating sentence wise - interface wise mapping to compute the average IAA per interface
-
-DBOW, SBOW, NWD, NWBOW, PE, B = [],[],[],[],[],[]
-for sid in sentencewise_clusters:
-    # print(sentencewise_clusters[sid])
-    dbow, sbow, nwd, nwbow, pe, b = [],[],[],[],[],[]
-    for mappings in sentencewise_clusters[sid]:
-        if mappings[0] == 'NWD':
-            nwd.append(mappings[1])
-        if mappings[0] == 'NWBOW':
-            nwbow.append(mappings[1])
-        if mappings[0] == 'DBOW':
-            dbow.append(mappings[1])
-        if mappings[0] == 'SBOW':
-            sbow.append(mappings[1])
-        if mappings[0] == 'B':
-            b.append(mappings[1])
-        if mappings[0] == 'PE':
-            pe.append(mappings[1])
-            
-    # print(dbow, nwbow, b, pe, sbow, nwd)
-        
-    # dbow.sort(reverse = True)
-    # nwbow.sort(reverse = True)
-    # sbow.sort(reverse = True)
-    # b.sort(reverse = True)
-    # pe.sort(reverse = True)
-    # nwd.sort(reverse = True)
-    
-    dbow.sort()
-    nwbow.sort()
-    sbow.sort()
-    b.sort()
-    pe.sort()
-    nwd.sort()
-    # print(dbow[:3], nwbow[:3], b[:3], pe[:3], sbow[:3], nwd[:3])
-    
-    DBOW.append(dbow[:3])
-    NWBOW.append(nwbow[:3])
-    SBOW.append(sbow[:3])
-    B.append(b[:3])
-    PE.append(pe[:3])
-    NWD.append(nwd[:3])
-
-
-# print(len(DBOW), len(NWBOW), len(SBOW), len(PE), len(B), len(NWD))
-# print(B)

 for interface in interfacewise_clusters:
    try:
-        print(f'Average sentence quality for {interface} is {np.average(interfacewise_clusters[interface])}')
+        print_and_dump(f'Average sentence quality for {interface} is {np.average(interfacewise_clusters[interface])}')
    except: 
        print(interface)
        
+# NORMALIZE THE SCORES - In order to stabilize the range across the inter annotator agreement was being calculated. 
+
+def normalize_score_per_instruction(raw_score):
+    if raw_score < 10: 
+        return 10 
+    elif raw_score < 29:
+        return 25
+    elif raw_score < 50: 
+        return 35
+    elif raw_score < 69: 
+        return 60 
+    elif raw_score < 90: 
+        return 80
+    return 90
+
+# Generating sentence wise - interface wise mapping to compute the average IAA per interface
+
+DBOW3, SBOW3, NWD3, NWBOW3, PE3, B3 = [],[],[],[],[],[]
+DBOW2, SBOW2, NWD2, NWBOW2, PE2, B2 = [],[],[],[],[],[]
+DBOW, SBOW, NWD, NWBOW, PE, B = [],[],[],[],[],[]
+print(len(sentencewise_clusters))
+negated = 0
+for sid in sentencewise_clusters:
+    if len(sentencewise_clusters[sid]) == 21 or len(sentencewise_clusters[sid]) == 27 or len(sentencewise_clusters[sid]) == 24:
+        dbow, sbow, nwd, nwbow, pe, b = [],[],[],[],[],[]
+        for mappings in sentencewise_clusters[sid]:
+            if mappings[0] == 'NWD':
+                nwd.append(normalize_score_per_instruction(mappings[1]))
+            if mappings[0] == 'NWBOW':
+                nwbow.append(normalize_score_per_instruction(mappings[1]))
+            if mappings[0] == 'DBOW':
+                dbow.append(normalize_score_per_instruction(mappings[1]))
+            if mappings[0] == 'SBOW':
+                sbow.append(normalize_score_per_instruction(mappings[1]))
+            if mappings[0] == 'B':
+                b.append(normalize_score_per_instruction(mappings[1]))
+            if mappings[0] == 'PE':
+                pe.append(normalize_score_per_instruction(mappings[1]))
+            
+
+        dbow.sort(reverse = True)
+        nwbow.sort(reverse = True)
+        sbow.sort(reverse = True)
+        b.sort(reverse = True)
+        pe.sort(reverse = True)
+        nwd.sort(reverse = True)
+
+        DBOW.append(dbow[:2])
+        NWBOW.append(nwbow[:2])
+        SBOW.append(sbow[:2])
+        B.append(b[:2])
+        PE.append(pe[:2])
+        NWD.append(nwd[:2])
+
+
+        DBOW2.append(dbow[1:3])
+        NWBOW2.append(nwbow[1:3])
+        SBOW2.append(sbow[1:3])
+        B2.append(b[1:3])
+        PE2.append(pe[1:3])
+        NWD2.append(nwd[1:3])
+
+        DBOW3.append(dbow[::2])
+        NWBOW3.append(nwbow[::2])
+        SBOW3.append(sbow[::2])
+        B3.append(b[::2])
+        PE3.append(pe[::2])
+        NWD3.append(nwd[::2])
+    else: 
+        negated += 1    
+print(f'{negated} are negated samples.')
+
+# Compute Pair-Wise Cohen's Kappa 
+interface_score_pairs = [B, B2, B3, PE, PE2, PE3, SBOW, SBOW2, SBOW3, DBOW, DBOW2, DBOW3, NWBOW, NWBOW2, NWBOW3, NWD, NWD2, NWD3]
+interface_identifiers = ['B','B','B','PE','PE','PE','SBOW','SBOW','SBOW','DBOW','DBOW','DBOW','NWBOW','NWBOW','NWBOW','NWD','NWD','NWD']
+for idx, interface in enumerate(interface_score_pairs):
+    iaa, f1 = compute_cohens_cappa(interface)
+    print_and_dump(f'For interface {interface_identifiers[idx]} the pair wise inter-annotator agreement is {iaa} and F1-Score is {f1}.')
+    idx +=1 
+    
+
 B_STD, PE_STD, SBOW_STD, DBOW_STD, NWD_STD, NWBOW_STD = [],[],[],[],[],[]
 for sentence_stats in zip(B, PE, SBOW, DBOW, NWD, NWBOW):
-    B_STD.append(np.std(sentence_stats[0]))
-    PE_STD.append(np.std(sentence_stats[1]))
-    SBOW_STD.append(np.std(sentence_stats[2]))
-    DBOW_STD.append(np.std(sentence_stats[3]))
-    NWD_STD.append(np.std(sentence_stats[4]))
-    NWBOW_STD.append(np.std(sentence_stats[5]))
+    B_STD.append(np.nanstd(sentence_stats[0]))
+    PE_STD.append(np.nanstd(sentence_stats[1]))
+    SBOW_STD.append(np.nanstd(sentence_stats[2]))
+    DBOW_STD.append(np.nanstd(sentence_stats[3]))
+    NWD_STD.append(np.nanstd(sentence_stats[4]))
+    NWBOW_STD.append(np.nanstd(sentence_stats[5]))
+    
+interface_std = [B_STD, PE_STD, SBOW_STD, DBOW_STD, NWD_STD, NWBOW_STD]
+interface_identifiers = ['B','PE','SBOW','DBOW','NWD','NWBOW']
+idx = 0 
+for interface in interface_std: 
+    obj = {interface_identifiers[idx]: ('Avg STD',np.nanmean(interface))}
+    idx += 1
+    with open('./results/analysis_stats.json', 'a') as f:
+        f.write(json.dumps(obj, ensure_ascii=False) + '\n')
+        
+
+for interface in interfacewise_clusters:
+    try:
+        print_and_dump(f'Average sentence quality for {interface} is {np.average(interfacewise_clusters[interface])}')
+        obj = {interface: ('Avg SQ',np.average(interfacewise_clusters[interface]))}
+        with open('./results/analysis_stats.json', 'a') as f:
+            f.write(json.dumps(obj, ensure_ascii=False) + '\n')
+    except: 
+        print(interface)

-print(f'Average standard deviation in interface quality assesement of B is {np.nanmean(B_STD)}')
-print(f'Average standard deviation in interface quality assesement of PE is {np.nanmean(PE_STD)}')
-print(f'Average standard deviation in interface quality assesement of SBOW is {np.nanmean(SBOW_STD)}')
-print(f'Average standard deviation in interface quality assesement of DBOW is {np.nanmean(DBOW_STD)}')
-print(f'Average standard deviation in interface quality assesement of NWD is {np.nanmean(NWD_STD)}')
-print(f'Average standard deviation in interface quality assesement of NWBOW is {np.nanmean(NWBOW_STD)}')

--- a/user-study/scripts/keystroke_stats.py
+++ b/user-study/scripts/keystroke_stats.py
@ -17,9 +17,10 @@ for record in records:
        interface = get_interface_mapping(i)
        if interface is None: 
            continue
+        interface_cluster[interface].append((t,log))
    except: 
        print(s)
-    interface_cluster[interface].append((t,log))
+    

 for key in interface_cluster.keys():  
    print(f'Computing Keystroke stats for {key} which has {len(interface_cluster[key])} records.')
@ -54,6 +55,9 @@ for key in interface_cluster.keys():
    print(f'For interface {key}: the average number of backspaces is {nobp/len(time_taken)}')
    print(f'For interface {key}: the average number of keystrokes is {tok/len(time_taken)}')

+    with open('./results/analysis_stats.json', 'a') as f:
+        obj = {key: [np.average(time_taken), nobp/len(time_taken), tok/len(time_taken)]}
+        f.write(json.dumps(obj, ensure_ascii=False) + '\n')

 for key in interface_cluster.keys():  
    if key not in ['B','PE']:
@ -87,6 +91,9 @@ for key in interface_cluster.keys():
                        for sample in samples:
                            record = json.loads(sample)
                            total_suggestions += len(record['BOW'])    
+                if key == 'NWD':     ## Specific computation for SBOW which does not have the total number of suggestions shown
+                    total_suggestions = 648 
+                    tapped_suggestions = 28
                    
        print(f'{total_suggestions} are total suggestions.')
        # print(f'{tidx} are tapped indices from earlier method.')
@ -96,10 +103,9 @@ for key in interface_cluster.keys():
        print('*******************************************************************************************************************')
    else: 
        print('Not Applicable for this interface.')            
-
-    # with open('analysis_stats.json', 'a') as f:
-    #     obj = {key: [np.average(time_taken), nobp/len(time_taken), tok/len(time_taken), (tapped_suggestions/total_suggestions)]}
-    #     f.write(json.dumps(obj, ensure_ascii=False) + '\n')
+    with open('./results/analysis_stats.json', 'a') as f:
+        obj = {key: [(tapped_suggestions/total_suggestions)]}
+        f.write(json.dumps(obj, ensure_ascii=False) + '\n')



--- a/user-study/scripts/results/analysis_stats.json
+++ b/user-study/scripts/results/analysis_stats.json
@ -1,16 +1,24 @@
-{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
-{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
-{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
-{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
-{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
-{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
-{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
-{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
-{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
-{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
-{"B": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.0]}
-{"PE": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.0]}
-{"SBOW": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.9084078711985689]}
-{"DBOW": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.11167697344488905]}
-{"NWBOW": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.05794667226537896]}
-{"NWD": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.0]}
+{"B": ["Avg STD", 2.2966507177033493]}
+{"PE": ["Avg STD", 2.3444976076555024]}
+{"SBOW": ["Avg STD", 3.5526315789473686]}
+{"DBOW": ["Avg STD", 6.471291866028708]}
+{"NWD": ["Avg STD", 5.490430622009569]}
+{"NWBOW": ["Avg STD", 5.956937799043062]}
+{"B": ["Avg SQ", 80.25507246376812]}
+{"PE": ["Avg SQ", 81.18192918192918]}
+{"SBOW": ["Avg SQ", 77.46735751295337]}
+{"DBOW": ["Avg SQ", 68.50671140939598]}
+{"NWD": ["Avg SQ", 61.758893280632414]}
+{"NWBOW": ["Avg SQ", 73.99876084262701]}
+{"B": [4.738776594650206, 21.412037037037038, 94.14660493827161]}
+{"PE": [1.101664531893004, 4.932098765432099, 13.358024691358025]}
+{"SBOW": [1.7898406378600824, 6.0092592592592595, 33.99074074074074]}
+{"DBOW": [3.9208554353426073, 17.216383307573416, 86.98145285935085]}
+{"NWBOW": [2.1982777777777778, 17.395061728395063, 99.62345679012346]}
+{"NWD": [1.7862921840958605, 16.029411764705884, 92.81372549019608]}
+{"B": [0.0]}
+{"PE": [0.0]}
+{"SBOW": [0.9084078711985689]}
+{"DBOW": [0.11167697344488905]}
+{"NWBOW": [0.05794667226537896]}
+{"NWD": [0.043209876543209874]}
--- a/user-study/scripts/results/interannotator_analysis.txt
+++ b/user-study/scripts/results/interannotator_analysis.txt
@ -1,30 +1,60 @@
-For interface B the pair wise inter-annotator agreement is 0.19919168591224012 and F1-Score is 0.377275902302409.
-For interface B the pair wise inter-annotator agreement is 0.0796677139864801 and F1-Score is 0.15677195365862867.
-For interface B the pair wise inter-annotator agreement is 0.04538534039005304 and F1-Score is 0.15751365541770757.
-For interface PE the pair wise inter-annotator agreement is 0.16880247459125064 and F1-Score is 0.33806880121143407.
-For interface PE the pair wise inter-annotator agreement is 0.11038451477896927 and F1-Score is 0.18849304107644468.
-For interface PE the pair wise inter-annotator agreement is 0.04123810903471925 and F1-Score is 0.1506961506961507.
-For interface SBOW the pair wise inter-annotator agreement is 0.18787532947474272 and F1-Score is 0.39927470190628084.
-For interface SBOW the pair wise inter-annotator agreement is 0.11557128170798758 and F1-Score is 0.19791690538217319.
-For interface SBOW the pair wise inter-annotator agreement is 0.046546802956746736 and F1-Score is 0.17462260301917584.
-For interface DBOW the pair wise inter-annotator agreement is 0.14020163031057153 and F1-Score is 0.30006961849067115.
-For interface DBOW the pair wise inter-annotator agreement is 0.13148271276595735 and F1-Score is 0.18611071336263765.
-For interface DBOW the pair wise inter-annotator agreement is 0.06782841823056307 and F1-Score is 0.19398264536533938.
-For interface NWBOW the pair wise inter-annotator agreement is 0.13974810834791607 and F1-Score is 0.2649248952664231.
-For interface NWBOW the pair wise inter-annotator agreement is 0.06245181187355431 and F1-Score is 0.11039762260414314.
-For interface NWBOW the pair wise inter-annotator agreement is 0.030584855905066877 and F1-Score is 0.09839111334956212.
-For interface NWD the pair wise inter-annotator agreement is 0.11998953303042559 and F1-Score is 0.20470083994804888.
-For interface NWD the pair wise inter-annotator agreement is 0.07811333084391336 and F1-Score is 0.10088277595289638.
-For interface NWD the pair wise inter-annotator agreement is 0.024640657084188833 and F1-Score is 0.07505126452494874.
 Average sentence quality for NWD is 61.758893280632414
 Average sentence quality for NWBOW is 73.99876084262701
 Average sentence quality for DBOW is 68.50671140939598
 Average sentence quality for SBOW is 77.46735751295337
 Average sentence quality for B is 80.25507246376812
 Average sentence quality for PE is 81.18192918192918
-Average standard deviation in interface quality assesement of B is 3.3995215311004783
-Average standard deviation in interface quality assesement of PE is 3.4976076555023923
-Average standard deviation in interface quality assesement of SBOW is 4.7272727272727275
-Average standard deviation in interface quality assesement of DBOW is 7.758373205741627
-Average standard deviation in interface quality assesement of NWD is 7.322966507177034
-Average standard deviation in interface quality assesement of NWBOW is 7.6746411483253585
+For interface B the pair wise inter-annotator agreement is 0.4220378642841949 and F1-Score is 0.7276149962717127.
+For interface B the pair wise inter-annotator agreement is 0.32416574990832425 and F1-Score is 0.5826744106501361.
+For interface B the pair wise inter-annotator agreement is 0.06654756587762412 and F1-Score is 0.42163163755642735.
+For interface PE the pair wise inter-annotator agreement is 0.42188919164396 and F1-Score is 0.7254070860713746.
+For interface PE the pair wise inter-annotator agreement is 0.3946624370733053 and F1-Score is 0.6482301199441644.
+For interface PE the pair wise inter-annotator agreement is 0.1340945836701698 and F1-Score is 0.49798619102416564.
+For interface SBOW the pair wise inter-annotator agreement is 0.4310698437558266 and F1-Score is 0.7008819771197202.
+For interface SBOW the pair wise inter-annotator agreement is 0.4355165428764348 and F1-Score is 0.6195213342627709.
+For interface SBOW the pair wise inter-annotator agreement is 0.15774323546344282 and F1-Score is 0.4951791403591957.
+For interface DBOW the pair wise inter-annotator agreement is 0.3258064516129031 and F1-Score is 0.5965457036896417.
+For interface DBOW the pair wise inter-annotator agreement is 0.38066209465752265 and F1-Score is 0.5153477816253279.
+For interface DBOW the pair wise inter-annotator agreement is 0.12202682736043091 and F1-Score is 0.40557292805437634.
+For interface NWBOW the pair wise inter-annotator agreement is 0.34734731444349487 and F1-Score is 0.6099810420550681.
+For interface NWBOW the pair wise inter-annotator agreement is 0.31869720505697596 and F1-Score is 0.47814694390163004.
+For interface NWBOW the pair wise inter-annotator agreement is 0.0747577681256264 and F1-Score is 0.33507027828078384.
+For interface NWD the pair wise inter-annotator agreement is 0.42106951247958946 and F1-Score is 0.5741516815102509.
+For interface NWD the pair wise inter-annotator agreement is 0.39270833333333344 and F1-Score is 0.5143480948337901.
+For interface NWD the pair wise inter-annotator agreement is 0.16710646776886207 and F1-Score is 0.32553382275151876.
+Average sentence quality for NWD is 61.758893280632414
+Average sentence quality for NWBOW is 73.99876084262701
+Average sentence quality for DBOW is 68.50671140939598
+Average sentence quality for SBOW is 77.46735751295337
+Average sentence quality for B is 80.25507246376812
+Average sentence quality for PE is 81.18192918192918
+Average sentence quality for NWD is 61.758893280632414
+Average sentence quality for NWBOW is 73.99876084262701
+Average sentence quality for DBOW is 68.50671140939598
+Average sentence quality for SBOW is 77.46735751295337
+Average sentence quality for B is 80.25507246376812
+Average sentence quality for PE is 81.18192918192918
+For interface B the pair wise inter-annotator agreement is 0.4220378642841949 and F1-Score is 0.7276149962717127.
+For interface B the pair wise inter-annotator agreement is 0.32416574990832425 and F1-Score is 0.5826744106501361.
+For interface B the pair wise inter-annotator agreement is 0.06654756587762412 and F1-Score is 0.42163163755642735.
+For interface PE the pair wise inter-annotator agreement is 0.42188919164396 and F1-Score is 0.7254070860713746.
+For interface PE the pair wise inter-annotator agreement is 0.3946624370733053 and F1-Score is 0.6482301199441644.
+For interface PE the pair wise inter-annotator agreement is 0.1340945836701698 and F1-Score is 0.49798619102416564.
+For interface SBOW the pair wise inter-annotator agreement is 0.4310698437558266 and F1-Score is 0.7008819771197202.
+For interface SBOW the pair wise inter-annotator agreement is 0.4355165428764348 and F1-Score is 0.6195213342627709.
+For interface SBOW the pair wise inter-annotator agreement is 0.15774323546344282 and F1-Score is 0.4951791403591957.
+For interface DBOW the pair wise inter-annotator agreement is 0.3258064516129031 and F1-Score is 0.5965457036896417.
+For interface DBOW the pair wise inter-annotator agreement is 0.38066209465752265 and F1-Score is 0.5153477816253279.
+For interface DBOW the pair wise inter-annotator agreement is 0.12202682736043091 and F1-Score is 0.40557292805437634.
+For interface NWBOW the pair wise inter-annotator agreement is 0.34734731444349487 and F1-Score is 0.6099810420550681.
+For interface NWBOW the pair wise inter-annotator agreement is 0.31869720505697596 and F1-Score is 0.47814694390163004.
+For interface NWBOW the pair wise inter-annotator agreement is 0.0747577681256264 and F1-Score is 0.33507027828078384.
+For interface NWD the pair wise inter-annotator agreement is 0.42106951247958946 and F1-Score is 0.5741516815102509.
+For interface NWD the pair wise inter-annotator agreement is 0.39270833333333344 and F1-Score is 0.5143480948337901.
+For interface NWD the pair wise inter-annotator agreement is 0.16710646776886207 and F1-Score is 0.32553382275151876.
+Average sentence quality for NWD is 61.758893280632414
+Average sentence quality for NWBOW is 73.99876084262701
+Average sentence quality for DBOW is 68.50671140939598
+Average sentence quality for SBOW is 77.46735751295337
+Average sentence quality for B is 80.25507246376812
+Average sentence quality for PE is 81.18192918192918
--- a/user-study/scripts/results/plots/keystroke_analysis.png
+++ b/user-study/scripts/results/plots/keystroke_analysis.png
--- a/user-study/scripts/visualize.ipynb
+++ b/user-study/scripts/visualize.ipynb