This commit is contained in:
HarshitaDiddee 2023-06-12 15:03:32 +00:00
Родитель 0c00d7c2af
Коммит 521854ef44
9 изменённых файлов: 779 добавлений и 163 удалений

Двоичные данные
user-study/scripts/.keystroke_analysis.png Normal file

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 15 KiB

Просмотреть файл

@ -1,16 +0,0 @@
{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
{"B": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.0]}
{"PE": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.0]}
{"SBOW": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.9084078711985689]}
{"DBOW": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.11167697344488905]}
{"NWBOW": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.05794667226537896]}
{"NWD": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.0]}

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -1,11 +1,29 @@
import json
from utils import get_interface_mapping
from sklearn.metrics import cohen_kappa_score, f1_score
import numpy as np
analysis_path = '/home/t-hdiddee/INMT-lite/user-study/data/validation_score.dsv'
with open(analysis_path,'r') as file:
records = file.read().strip().split('\n')
print(f'{len(records)} are the number of records being analysed.')
def compute_cohens_cappa(paired_score_for_interface):
rater1, rater2 = [], []
for ele in paired_score_for_interface:
try:
rater1.append(ele[0])
rater2.append(ele[1])
except:
print(ele)
k = cohen_kappa_score(rater1, rater2)
f1 = f1_score(rater1, rater2, average = "weighted")
return k, f1
def print_and_dump(instance):
print(instance)
with open('./results/interannotator_analysis.txt', 'a') as file:
file.write(instance + '\n')
interfacewise_clusters = {} # interface: scores of all sentences (multiple)
sentencewise_clusters = {} # sentence - scores of all formats
@ -25,73 +43,120 @@ for record in records:
except:
print(record)
# Generating sentence wise - interface wise mapping to compute the average IAA per interface
DBOW, SBOW, NWD, NWBOW, PE, B = [],[],[],[],[],[]
for sid in sentencewise_clusters:
# print(sentencewise_clusters[sid])
dbow, sbow, nwd, nwbow, pe, b = [],[],[],[],[],[]
for mappings in sentencewise_clusters[sid]:
if mappings[0] == 'NWD':
nwd.append(mappings[1])
if mappings[0] == 'NWBOW':
nwbow.append(mappings[1])
if mappings[0] == 'DBOW':
dbow.append(mappings[1])
if mappings[0] == 'SBOW':
sbow.append(mappings[1])
if mappings[0] == 'B':
b.append(mappings[1])
if mappings[0] == 'PE':
pe.append(mappings[1])
# print(dbow, nwbow, b, pe, sbow, nwd)
# dbow.sort(reverse = True)
# nwbow.sort(reverse = True)
# sbow.sort(reverse = True)
# b.sort(reverse = True)
# pe.sort(reverse = True)
# nwd.sort(reverse = True)
dbow.sort()
nwbow.sort()
sbow.sort()
b.sort()
pe.sort()
nwd.sort()
# print(dbow[:3], nwbow[:3], b[:3], pe[:3], sbow[:3], nwd[:3])
DBOW.append(dbow[:3])
NWBOW.append(nwbow[:3])
SBOW.append(sbow[:3])
B.append(b[:3])
PE.append(pe[:3])
NWD.append(nwd[:3])
# print(len(DBOW), len(NWBOW), len(SBOW), len(PE), len(B), len(NWD))
# print(B)
for interface in interfacewise_clusters:
try:
print(f'Average sentence quality for {interface} is {np.average(interfacewise_clusters[interface])}')
print_and_dump(f'Average sentence quality for {interface} is {np.average(interfacewise_clusters[interface])}')
except:
print(interface)
# NORMALIZE THE SCORES - In order to stabilize the range across the inter annotator agreement was being calculated.
def normalize_score_per_instruction(raw_score):
if raw_score < 10:
return 10
elif raw_score < 29:
return 25
elif raw_score < 50:
return 35
elif raw_score < 69:
return 60
elif raw_score < 90:
return 80
return 90
# Generating sentence wise - interface wise mapping to compute the average IAA per interface
DBOW3, SBOW3, NWD3, NWBOW3, PE3, B3 = [],[],[],[],[],[]
DBOW2, SBOW2, NWD2, NWBOW2, PE2, B2 = [],[],[],[],[],[]
DBOW, SBOW, NWD, NWBOW, PE, B = [],[],[],[],[],[]
print(len(sentencewise_clusters))
negated = 0
for sid in sentencewise_clusters:
if len(sentencewise_clusters[sid]) == 21 or len(sentencewise_clusters[sid]) == 27 or len(sentencewise_clusters[sid]) == 24:
dbow, sbow, nwd, nwbow, pe, b = [],[],[],[],[],[]
for mappings in sentencewise_clusters[sid]:
if mappings[0] == 'NWD':
nwd.append(normalize_score_per_instruction(mappings[1]))
if mappings[0] == 'NWBOW':
nwbow.append(normalize_score_per_instruction(mappings[1]))
if mappings[0] == 'DBOW':
dbow.append(normalize_score_per_instruction(mappings[1]))
if mappings[0] == 'SBOW':
sbow.append(normalize_score_per_instruction(mappings[1]))
if mappings[0] == 'B':
b.append(normalize_score_per_instruction(mappings[1]))
if mappings[0] == 'PE':
pe.append(normalize_score_per_instruction(mappings[1]))
dbow.sort(reverse = True)
nwbow.sort(reverse = True)
sbow.sort(reverse = True)
b.sort(reverse = True)
pe.sort(reverse = True)
nwd.sort(reverse = True)
DBOW.append(dbow[:2])
NWBOW.append(nwbow[:2])
SBOW.append(sbow[:2])
B.append(b[:2])
PE.append(pe[:2])
NWD.append(nwd[:2])
DBOW2.append(dbow[1:3])
NWBOW2.append(nwbow[1:3])
SBOW2.append(sbow[1:3])
B2.append(b[1:3])
PE2.append(pe[1:3])
NWD2.append(nwd[1:3])
DBOW3.append(dbow[::2])
NWBOW3.append(nwbow[::2])
SBOW3.append(sbow[::2])
B3.append(b[::2])
PE3.append(pe[::2])
NWD3.append(nwd[::2])
else:
negated += 1
print(f'{negated} are negated samples.')
# Compute Pair-Wise Cohen's Kappa
interface_score_pairs = [B, B2, B3, PE, PE2, PE3, SBOW, SBOW2, SBOW3, DBOW, DBOW2, DBOW3, NWBOW, NWBOW2, NWBOW3, NWD, NWD2, NWD3]
interface_identifiers = ['B','B','B','PE','PE','PE','SBOW','SBOW','SBOW','DBOW','DBOW','DBOW','NWBOW','NWBOW','NWBOW','NWD','NWD','NWD']
for idx, interface in enumerate(interface_score_pairs):
iaa, f1 = compute_cohens_cappa(interface)
print_and_dump(f'For interface {interface_identifiers[idx]} the pair wise inter-annotator agreement is {iaa} and F1-Score is {f1}.')
idx +=1
B_STD, PE_STD, SBOW_STD, DBOW_STD, NWD_STD, NWBOW_STD = [],[],[],[],[],[]
for sentence_stats in zip(B, PE, SBOW, DBOW, NWD, NWBOW):
B_STD.append(np.std(sentence_stats[0]))
PE_STD.append(np.std(sentence_stats[1]))
SBOW_STD.append(np.std(sentence_stats[2]))
DBOW_STD.append(np.std(sentence_stats[3]))
NWD_STD.append(np.std(sentence_stats[4]))
NWBOW_STD.append(np.std(sentence_stats[5]))
B_STD.append(np.nanstd(sentence_stats[0]))
PE_STD.append(np.nanstd(sentence_stats[1]))
SBOW_STD.append(np.nanstd(sentence_stats[2]))
DBOW_STD.append(np.nanstd(sentence_stats[3]))
NWD_STD.append(np.nanstd(sentence_stats[4]))
NWBOW_STD.append(np.nanstd(sentence_stats[5]))
interface_std = [B_STD, PE_STD, SBOW_STD, DBOW_STD, NWD_STD, NWBOW_STD]
interface_identifiers = ['B','PE','SBOW','DBOW','NWD','NWBOW']
idx = 0
for interface in interface_std:
obj = {interface_identifiers[idx]: ('Avg STD',np.nanmean(interface))}
idx += 1
with open('./results/analysis_stats.json', 'a') as f:
f.write(json.dumps(obj, ensure_ascii=False) + '\n')
for interface in interfacewise_clusters:
try:
print_and_dump(f'Average sentence quality for {interface} is {np.average(interfacewise_clusters[interface])}')
obj = {interface: ('Avg SQ',np.average(interfacewise_clusters[interface]))}
with open('./results/analysis_stats.json', 'a') as f:
f.write(json.dumps(obj, ensure_ascii=False) + '\n')
except:
print(interface)
print(f'Average standard deviation in interface quality assesement of B is {np.nanmean(B_STD)}')
print(f'Average standard deviation in interface quality assesement of PE is {np.nanmean(PE_STD)}')
print(f'Average standard deviation in interface quality assesement of SBOW is {np.nanmean(SBOW_STD)}')
print(f'Average standard deviation in interface quality assesement of DBOW is {np.nanmean(DBOW_STD)}')
print(f'Average standard deviation in interface quality assesement of NWD is {np.nanmean(NWD_STD)}')
print(f'Average standard deviation in interface quality assesement of NWBOW is {np.nanmean(NWBOW_STD)}')

Просмотреть файл

@ -17,9 +17,10 @@ for record in records:
interface = get_interface_mapping(i)
if interface is None:
continue
interface_cluster[interface].append((t,log))
except:
print(s)
interface_cluster[interface].append((t,log))
for key in interface_cluster.keys():
print(f'Computing Keystroke stats for {key} which has {len(interface_cluster[key])} records.')
@ -54,6 +55,9 @@ for key in interface_cluster.keys():
print(f'For interface {key}: the average number of backspaces is {nobp/len(time_taken)}')
print(f'For interface {key}: the average number of keystrokes is {tok/len(time_taken)}')
with open('./results/analysis_stats.json', 'a') as f:
obj = {key: [np.average(time_taken), nobp/len(time_taken), tok/len(time_taken)]}
f.write(json.dumps(obj, ensure_ascii=False) + '\n')
for key in interface_cluster.keys():
if key not in ['B','PE']:
@ -87,6 +91,9 @@ for key in interface_cluster.keys():
for sample in samples:
record = json.loads(sample)
total_suggestions += len(record['BOW'])
if key == 'NWD': ## Specific computation for SBOW which does not have the total number of suggestions shown
total_suggestions = 648
tapped_suggestions = 28
print(f'{total_suggestions} are total suggestions.')
# print(f'{tidx} are tapped indices from earlier method.')
@ -96,10 +103,9 @@ for key in interface_cluster.keys():
print('*******************************************************************************************************************')
else:
print('Not Applicable for this interface.')
# with open('analysis_stats.json', 'a') as f:
# obj = {key: [np.average(time_taken), nobp/len(time_taken), tok/len(time_taken), (tapped_suggestions/total_suggestions)]}
# f.write(json.dumps(obj, ensure_ascii=False) + '\n')
with open('./results/analysis_stats.json', 'a') as f:
obj = {key: [(tapped_suggestions/total_suggestions)]}
f.write(json.dumps(obj, ensure_ascii=False) + '\n')

Просмотреть файл

@ -1,16 +1,24 @@
{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
{"B": [46.879629629629626, 21.412037037037038, 0.08333333333333333, 0.0]}
{"PE": [10.66820987654321, 4.932098765432099, 0.007716049382716049, 0.0]}
{"B": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.0]}
{"PE": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.0]}
{"SBOW": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.9084078711985689]}
{"DBOW": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.11167697344488905]}
{"NWBOW": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.05794667226537896]}
{"NWD": [1.786899809989142, 15.986970684039088, 92.68078175895765, 0.0]}
{"B": ["Avg STD", 2.2966507177033493]}
{"PE": ["Avg STD", 2.3444976076555024]}
{"SBOW": ["Avg STD", 3.5526315789473686]}
{"DBOW": ["Avg STD", 6.471291866028708]}
{"NWD": ["Avg STD", 5.490430622009569]}
{"NWBOW": ["Avg STD", 5.956937799043062]}
{"B": ["Avg SQ", 80.25507246376812]}
{"PE": ["Avg SQ", 81.18192918192918]}
{"SBOW": ["Avg SQ", 77.46735751295337]}
{"DBOW": ["Avg SQ", 68.50671140939598]}
{"NWD": ["Avg SQ", 61.758893280632414]}
{"NWBOW": ["Avg SQ", 73.99876084262701]}
{"B": [4.738776594650206, 21.412037037037038, 94.14660493827161]}
{"PE": [1.101664531893004, 4.932098765432099, 13.358024691358025]}
{"SBOW": [1.7898406378600824, 6.0092592592592595, 33.99074074074074]}
{"DBOW": [3.9208554353426073, 17.216383307573416, 86.98145285935085]}
{"NWBOW": [2.1982777777777778, 17.395061728395063, 99.62345679012346]}
{"NWD": [1.7862921840958605, 16.029411764705884, 92.81372549019608]}
{"B": [0.0]}
{"PE": [0.0]}
{"SBOW": [0.9084078711985689]}
{"DBOW": [0.11167697344488905]}
{"NWBOW": [0.05794667226537896]}
{"NWD": [0.043209876543209874]}

Просмотреть файл

@ -1,30 +1,60 @@
For interface B the pair wise inter-annotator agreement is 0.19919168591224012 and F1-Score is 0.377275902302409.
For interface B the pair wise inter-annotator agreement is 0.0796677139864801 and F1-Score is 0.15677195365862867.
For interface B the pair wise inter-annotator agreement is 0.04538534039005304 and F1-Score is 0.15751365541770757.
For interface PE the pair wise inter-annotator agreement is 0.16880247459125064 and F1-Score is 0.33806880121143407.
For interface PE the pair wise inter-annotator agreement is 0.11038451477896927 and F1-Score is 0.18849304107644468.
For interface PE the pair wise inter-annotator agreement is 0.04123810903471925 and F1-Score is 0.1506961506961507.
For interface SBOW the pair wise inter-annotator agreement is 0.18787532947474272 and F1-Score is 0.39927470190628084.
For interface SBOW the pair wise inter-annotator agreement is 0.11557128170798758 and F1-Score is 0.19791690538217319.
For interface SBOW the pair wise inter-annotator agreement is 0.046546802956746736 and F1-Score is 0.17462260301917584.
For interface DBOW the pair wise inter-annotator agreement is 0.14020163031057153 and F1-Score is 0.30006961849067115.
For interface DBOW the pair wise inter-annotator agreement is 0.13148271276595735 and F1-Score is 0.18611071336263765.
For interface DBOW the pair wise inter-annotator agreement is 0.06782841823056307 and F1-Score is 0.19398264536533938.
For interface NWBOW the pair wise inter-annotator agreement is 0.13974810834791607 and F1-Score is 0.2649248952664231.
For interface NWBOW the pair wise inter-annotator agreement is 0.06245181187355431 and F1-Score is 0.11039762260414314.
For interface NWBOW the pair wise inter-annotator agreement is 0.030584855905066877 and F1-Score is 0.09839111334956212.
For interface NWD the pair wise inter-annotator agreement is 0.11998953303042559 and F1-Score is 0.20470083994804888.
For interface NWD the pair wise inter-annotator agreement is 0.07811333084391336 and F1-Score is 0.10088277595289638.
For interface NWD the pair wise inter-annotator agreement is 0.024640657084188833 and F1-Score is 0.07505126452494874.
Average sentence quality for NWD is 61.758893280632414
Average sentence quality for NWBOW is 73.99876084262701
Average sentence quality for DBOW is 68.50671140939598
Average sentence quality for SBOW is 77.46735751295337
Average sentence quality for B is 80.25507246376812
Average sentence quality for PE is 81.18192918192918
Average standard deviation in interface quality assesement of B is 3.3995215311004783
Average standard deviation in interface quality assesement of PE is 3.4976076555023923
Average standard deviation in interface quality assesement of SBOW is 4.7272727272727275
Average standard deviation in interface quality assesement of DBOW is 7.758373205741627
Average standard deviation in interface quality assesement of NWD is 7.322966507177034
Average standard deviation in interface quality assesement of NWBOW is 7.6746411483253585
For interface B the pair wise inter-annotator agreement is 0.4220378642841949 and F1-Score is 0.7276149962717127.
For interface B the pair wise inter-annotator agreement is 0.32416574990832425 and F1-Score is 0.5826744106501361.
For interface B the pair wise inter-annotator agreement is 0.06654756587762412 and F1-Score is 0.42163163755642735.
For interface PE the pair wise inter-annotator agreement is 0.42188919164396 and F1-Score is 0.7254070860713746.
For interface PE the pair wise inter-annotator agreement is 0.3946624370733053 and F1-Score is 0.6482301199441644.
For interface PE the pair wise inter-annotator agreement is 0.1340945836701698 and F1-Score is 0.49798619102416564.
For interface SBOW the pair wise inter-annotator agreement is 0.4310698437558266 and F1-Score is 0.7008819771197202.
For interface SBOW the pair wise inter-annotator agreement is 0.4355165428764348 and F1-Score is 0.6195213342627709.
For interface SBOW the pair wise inter-annotator agreement is 0.15774323546344282 and F1-Score is 0.4951791403591957.
For interface DBOW the pair wise inter-annotator agreement is 0.3258064516129031 and F1-Score is 0.5965457036896417.
For interface DBOW the pair wise inter-annotator agreement is 0.38066209465752265 and F1-Score is 0.5153477816253279.
For interface DBOW the pair wise inter-annotator agreement is 0.12202682736043091 and F1-Score is 0.40557292805437634.
For interface NWBOW the pair wise inter-annotator agreement is 0.34734731444349487 and F1-Score is 0.6099810420550681.
For interface NWBOW the pair wise inter-annotator agreement is 0.31869720505697596 and F1-Score is 0.47814694390163004.
For interface NWBOW the pair wise inter-annotator agreement is 0.0747577681256264 and F1-Score is 0.33507027828078384.
For interface NWD the pair wise inter-annotator agreement is 0.42106951247958946 and F1-Score is 0.5741516815102509.
For interface NWD the pair wise inter-annotator agreement is 0.39270833333333344 and F1-Score is 0.5143480948337901.
For interface NWD the pair wise inter-annotator agreement is 0.16710646776886207 and F1-Score is 0.32553382275151876.
Average sentence quality for NWD is 61.758893280632414
Average sentence quality for NWBOW is 73.99876084262701
Average sentence quality for DBOW is 68.50671140939598
Average sentence quality for SBOW is 77.46735751295337
Average sentence quality for B is 80.25507246376812
Average sentence quality for PE is 81.18192918192918
Average sentence quality for NWD is 61.758893280632414
Average sentence quality for NWBOW is 73.99876084262701
Average sentence quality for DBOW is 68.50671140939598
Average sentence quality for SBOW is 77.46735751295337
Average sentence quality for B is 80.25507246376812
Average sentence quality for PE is 81.18192918192918
For interface B the pair wise inter-annotator agreement is 0.4220378642841949 and F1-Score is 0.7276149962717127.
For interface B the pair wise inter-annotator agreement is 0.32416574990832425 and F1-Score is 0.5826744106501361.
For interface B the pair wise inter-annotator agreement is 0.06654756587762412 and F1-Score is 0.42163163755642735.
For interface PE the pair wise inter-annotator agreement is 0.42188919164396 and F1-Score is 0.7254070860713746.
For interface PE the pair wise inter-annotator agreement is 0.3946624370733053 and F1-Score is 0.6482301199441644.
For interface PE the pair wise inter-annotator agreement is 0.1340945836701698 and F1-Score is 0.49798619102416564.
For interface SBOW the pair wise inter-annotator agreement is 0.4310698437558266 and F1-Score is 0.7008819771197202.
For interface SBOW the pair wise inter-annotator agreement is 0.4355165428764348 and F1-Score is 0.6195213342627709.
For interface SBOW the pair wise inter-annotator agreement is 0.15774323546344282 and F1-Score is 0.4951791403591957.
For interface DBOW the pair wise inter-annotator agreement is 0.3258064516129031 and F1-Score is 0.5965457036896417.
For interface DBOW the pair wise inter-annotator agreement is 0.38066209465752265 and F1-Score is 0.5153477816253279.
For interface DBOW the pair wise inter-annotator agreement is 0.12202682736043091 and F1-Score is 0.40557292805437634.
For interface NWBOW the pair wise inter-annotator agreement is 0.34734731444349487 and F1-Score is 0.6099810420550681.
For interface NWBOW the pair wise inter-annotator agreement is 0.31869720505697596 and F1-Score is 0.47814694390163004.
For interface NWBOW the pair wise inter-annotator agreement is 0.0747577681256264 and F1-Score is 0.33507027828078384.
For interface NWD the pair wise inter-annotator agreement is 0.42106951247958946 and F1-Score is 0.5741516815102509.
For interface NWD the pair wise inter-annotator agreement is 0.39270833333333344 and F1-Score is 0.5143480948337901.
For interface NWD the pair wise inter-annotator agreement is 0.16710646776886207 and F1-Score is 0.32553382275151876.
Average sentence quality for NWD is 61.758893280632414
Average sentence quality for NWBOW is 73.99876084262701
Average sentence quality for DBOW is 68.50671140939598
Average sentence quality for SBOW is 77.46735751295337
Average sentence quality for B is 80.25507246376812
Average sentence quality for PE is 81.18192918192918

Двоичные данные
user-study/scripts/results/plots/keystroke_analysis.png Normal file

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 50 KiB

Различия файлов скрыты, потому что одна или несколько строк слишком длинны