From 72ffc852f0d06812f43b12b0d133e76cc164121c Mon Sep 17 00:00:00 2001 From: pnispel Date: Tue, 20 May 2014 14:22:56 -0700 Subject: [PATCH 01/13] still need to workout caching, but basic api endpoint is complete --- treeherder/webapp/api/logslice.py | 76 +++++++++++++++++++++++++++++++ treeherder/webapp/api/urls.py | 11 ++++- 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 treeherder/webapp/api/logslice.py diff --git a/treeherder/webapp/api/logslice.py b/treeherder/webapp/api/logslice.py new file mode 100644 index 000000000..daaee0060 --- /dev/null +++ b/treeherder/webapp/api/logslice.py @@ -0,0 +1,76 @@ +from rest_framework import viewsets +from rest_framework.response import Response +from rest_framework.decorators import action, link +from rest_framework.reverse import reverse + +from django.core.cache import cache + +from treeherder.webapp.api.utils import (UrlQueryFilter, with_jobs, + oauth_required) + +import urllib2 +import gzip +import io +import logging + +class LogSliceView(viewsets.ViewSet): + """ + This view serves slices of the log + """ + + def get_log_handle(self, url): + """Hook to get a handle to the log with this url""" + return urllib2.urlopen(url) + + @with_jobs + def list(self, request, project, jm): + """ + GET method implementation for log slicer + + Receives a line range and job_id and returns those lines + """ + job_id = request.QUERY_PARAMS.get("job_id") + + filter = UrlQueryFilter({"job_id": job_id, "name": "Structured Log"}) + + objs = jm.get_job_artifact_list(0, 1, filter.conditions) + + handle = None + gz_file = None + + start_line = int(request.QUERY_PARAMS.get("start_line")) + end_line = int(request.QUERY_PARAMS.get("end_line")) + + if objs: + job = objs[0] + + try: + handle = self.get_log_handle( job.get("blob").get("logurl") ) + gz_file = gzip.GzipFile(fileobj=io.BytesIO(handle.read())) + + lines = [] + + for i, line in enumerate(gz_file): + lines.append({"text": line, "index": i}) + + return Response( lines[start_line:end_line] ) + + except Exception as e: + import traceback + print traceback.format_exc() + logging.error(e) + return Response("there was an error opening the log file", 404) + + finally: + if handle: + handle.close() + if gz_file: + gz_file.close() + + else: + return Response("job_artifact {0} not found".format(job_id), 404) + + # if obj: + # return Response(obj) + # else: + # return Response("No job with id: {0}".format(pk), 404) \ No newline at end of file diff --git a/treeherder/webapp/api/urls.py b/treeherder/webapp/api/urls.py index b66f1c322..959c09547 100644 --- a/treeherder/webapp/api/urls.py +++ b/treeherder/webapp/api/urls.py @@ -1,22 +1,25 @@ from django.conf.urls import patterns, include, url from treeherder.webapp.api import (refdata, objectstore, jobs, resultset, - artifact, note, revision, bug) + artifact, note, revision, bug, logslice) from rest_framework import routers # router for views that are bound to a project # i.e. all those views that don't involve reference data project_bound_router = routers.SimpleRouter() + project_bound_router.register( r'objectstore', objectstore.ObjectstoreViewSet, base_name='objectstore', ) + project_bound_router.register( r'jobs', jobs.JobsViewSet, base_name='jobs', ) + project_bound_router.register( r'resultset', resultset.ResultSetViewSet, @@ -47,6 +50,12 @@ project_bound_router.register( base_name='bug-job-map', ) +project_bound_router.register( + r'logslice', + logslice.LogSliceView, + base_name='logslice', +) + # this is the default router for plain restful endpoints # refdata endpoints: From 254fb44f3bdbe6feee1e5474f9fa432c5c4709eb Mon Sep 17 00:00:00 2001 From: pnispel Date: Tue, 20 May 2014 14:22:56 -0700 Subject: [PATCH 02/13] still need to workout caching, but basic api endpoint is complete --- treeherder/webapp/api/logslice.py | 76 +++++++++++++++++++++++++++++++ treeherder/webapp/api/urls.py | 11 ++++- 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 treeherder/webapp/api/logslice.py diff --git a/treeherder/webapp/api/logslice.py b/treeherder/webapp/api/logslice.py new file mode 100644 index 000000000..daaee0060 --- /dev/null +++ b/treeherder/webapp/api/logslice.py @@ -0,0 +1,76 @@ +from rest_framework import viewsets +from rest_framework.response import Response +from rest_framework.decorators import action, link +from rest_framework.reverse import reverse + +from django.core.cache import cache + +from treeherder.webapp.api.utils import (UrlQueryFilter, with_jobs, + oauth_required) + +import urllib2 +import gzip +import io +import logging + +class LogSliceView(viewsets.ViewSet): + """ + This view serves slices of the log + """ + + def get_log_handle(self, url): + """Hook to get a handle to the log with this url""" + return urllib2.urlopen(url) + + @with_jobs + def list(self, request, project, jm): + """ + GET method implementation for log slicer + + Receives a line range and job_id and returns those lines + """ + job_id = request.QUERY_PARAMS.get("job_id") + + filter = UrlQueryFilter({"job_id": job_id, "name": "Structured Log"}) + + objs = jm.get_job_artifact_list(0, 1, filter.conditions) + + handle = None + gz_file = None + + start_line = int(request.QUERY_PARAMS.get("start_line")) + end_line = int(request.QUERY_PARAMS.get("end_line")) + + if objs: + job = objs[0] + + try: + handle = self.get_log_handle( job.get("blob").get("logurl") ) + gz_file = gzip.GzipFile(fileobj=io.BytesIO(handle.read())) + + lines = [] + + for i, line in enumerate(gz_file): + lines.append({"text": line, "index": i}) + + return Response( lines[start_line:end_line] ) + + except Exception as e: + import traceback + print traceback.format_exc() + logging.error(e) + return Response("there was an error opening the log file", 404) + + finally: + if handle: + handle.close() + if gz_file: + gz_file.close() + + else: + return Response("job_artifact {0} not found".format(job_id), 404) + + # if obj: + # return Response(obj) + # else: + # return Response("No job with id: {0}".format(pk), 404) \ No newline at end of file diff --git a/treeherder/webapp/api/urls.py b/treeherder/webapp/api/urls.py index b66f1c322..959c09547 100644 --- a/treeherder/webapp/api/urls.py +++ b/treeherder/webapp/api/urls.py @@ -1,22 +1,25 @@ from django.conf.urls import patterns, include, url from treeherder.webapp.api import (refdata, objectstore, jobs, resultset, - artifact, note, revision, bug) + artifact, note, revision, bug, logslice) from rest_framework import routers # router for views that are bound to a project # i.e. all those views that don't involve reference data project_bound_router = routers.SimpleRouter() + project_bound_router.register( r'objectstore', objectstore.ObjectstoreViewSet, base_name='objectstore', ) + project_bound_router.register( r'jobs', jobs.JobsViewSet, base_name='jobs', ) + project_bound_router.register( r'resultset', resultset.ResultSetViewSet, @@ -47,6 +50,12 @@ project_bound_router.register( base_name='bug-job-map', ) +project_bound_router.register( + r'logslice', + logslice.LogSliceView, + base_name='logslice', +) + # this is the default router for plain restful endpoints # refdata endpoints: From ace3d2bd4c9b69d88ab0eca09ba9e3c5ca8d67f7 Mon Sep 17 00:00:00 2001 From: pnispel Date: Thu, 22 May 2014 10:46:04 -0700 Subject: [PATCH 03/13] made the changes from Mauro's comments on github --- treeherder/webapp/api/logslice.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/treeherder/webapp/api/logslice.py b/treeherder/webapp/api/logslice.py index daaee0060..24de6ce65 100644 --- a/treeherder/webapp/api/logslice.py +++ b/treeherder/webapp/api/logslice.py @@ -1,12 +1,10 @@ from rest_framework import viewsets from rest_framework.response import Response -from rest_framework.decorators import action, link -from rest_framework.reverse import reverse - from django.core.cache import cache -from treeherder.webapp.api.utils import (UrlQueryFilter, with_jobs, - oauth_required) +from treeherder.webapp.api.utils import (UrlQueryFilter, with_jobs) + +from treeherder.webapp.api.exceptions import ResourceNotFoundException import urllib2 import gzip @@ -51,15 +49,14 @@ class LogSliceView(viewsets.ViewSet): lines = [] for i, line in enumerate(gz_file): + if i < start_line or i >= end_line: continue lines.append({"text": line, "index": i}) - return Response( lines[start_line:end_line] ) + return Response( lines ) except Exception as e: - import traceback - print traceback.format_exc() logging.error(e) - return Response("there was an error opening the log file", 404) + raise ResourceNotFoundException finally: if handle: @@ -69,8 +66,3 @@ class LogSliceView(viewsets.ViewSet): else: return Response("job_artifact {0} not found".format(job_id), 404) - - # if obj: - # return Response(obj) - # else: - # return Response("No job with id: {0}".format(pk), 404) \ No newline at end of file From dbd00f14556f73519c50283cf79530867e1de199 Mon Sep 17 00:00:00 2001 From: pnispel Date: Thu, 22 May 2014 13:47:22 -0700 Subject: [PATCH 04/13] added filesystem cache and updated logslice to use it. --- treeherder/settings/base.py | 6 ++++++ treeherder/webapp/api/logslice.py | 18 +++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/treeherder/settings/base.py b/treeherder/settings/base.py index 7056ba717..789ce5630 100644 --- a/treeherder/settings/base.py +++ b/treeherder/settings/base.py @@ -257,6 +257,12 @@ CACHES = { "TIMEOUT": 0, # bumping this is effectively equivalent to restarting memcached "VERSION": 1, + }, + "filesystem": { + "BACKEND": "django.core.cache.backends.filebased.FileBasedCache", + "LOCATION": "/var/tmp/django_cache", + + "VERSION": 1 } } diff --git a/treeherder/webapp/api/logslice.py b/treeherder/webapp/api/logslice.py index 24de6ce65..21c97d129 100644 --- a/treeherder/webapp/api/logslice.py +++ b/treeherder/webapp/api/logslice.py @@ -1,6 +1,6 @@ from rest_framework import viewsets from rest_framework.response import Response -from django.core.cache import cache +from django.core import cache from treeherder.webapp.api.utils import (UrlQueryFilter, with_jobs) @@ -11,6 +11,8 @@ import gzip import io import logging +filesystem = cache.get_cache('filesystem') + class LogSliceView(viewsets.ViewSet): """ This view serves slices of the log @@ -43,8 +45,18 @@ class LogSliceView(viewsets.ViewSet): job = objs[0] try: - handle = self.get_log_handle( job.get("blob").get("logurl") ) - gz_file = gzip.GzipFile(fileobj=io.BytesIO(handle.read())) + + url = job.get("blob").get("logurl") + gz_file = filesystem.get(url) + + if not gz_file: + print('miss') + handle = self.get_log_handle(url) + gz_file = gzip.GzipFile(fileobj=io.BytesIO(handle.read())) + filesystem.set(url, gz_file.fileobj) + else: + print('hit') + gz_file = gzip.GzipFile(fileobj=gz_file) lines = [] From 46e8ede60892fa9269b399c7193d29b45cd1e078 Mon Sep 17 00:00:00 2001 From: pnispel Date: Thu, 22 May 2014 14:02:16 -0700 Subject: [PATCH 05/13] removed two print statements --- treeherder/webapp/api/logslice.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/treeherder/webapp/api/logslice.py b/treeherder/webapp/api/logslice.py index 21c97d129..79d0afe99 100644 --- a/treeherder/webapp/api/logslice.py +++ b/treeherder/webapp/api/logslice.py @@ -50,12 +50,10 @@ class LogSliceView(viewsets.ViewSet): gz_file = filesystem.get(url) if not gz_file: - print('miss') handle = self.get_log_handle(url) gz_file = gzip.GzipFile(fileobj=io.BytesIO(handle.read())) filesystem.set(url, gz_file.fileobj) else: - print('hit') gz_file = gzip.GzipFile(fileobj=gz_file) lines = [] From 1a7a9e5c64a71f8aade8a46a773b9350687002ff Mon Sep 17 00:00:00 2001 From: pnispel Date: Thu, 22 May 2014 14:24:37 -0700 Subject: [PATCH 06/13] changed directory for log cache from /var/tmp to webapp/log_cache --- .gitignore | 2 ++ treeherder/settings/base.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 0c63262d8..df880acd0 100644 --- a/.gitignore +++ b/.gitignore @@ -67,3 +67,5 @@ docs/_build/ *.pid *.c + +treeherder/webapp/log_cache \ No newline at end of file diff --git a/treeherder/settings/base.py b/treeherder/settings/base.py index 789ce5630..38f111ac6 100644 --- a/treeherder/settings/base.py +++ b/treeherder/settings/base.py @@ -260,7 +260,7 @@ CACHES = { }, "filesystem": { "BACKEND": "django.core.cache.backends.filebased.FileBasedCache", - "LOCATION": "/var/tmp/django_cache", + "LOCATION": path("webapp", "log_cache"), "VERSION": 1 } From 9d08d9bba4b91fd5cdab9a5c8f51670de2b85624 Mon Sep 17 00:00:00 2001 From: pnispel Date: Thu, 22 May 2014 14:29:25 -0700 Subject: [PATCH 07/13] changed exception message in logslice --- treeherder/webapp/api/logslice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/treeherder/webapp/api/logslice.py b/treeherder/webapp/api/logslice.py index 79d0afe99..da59d3e3b 100644 --- a/treeherder/webapp/api/logslice.py +++ b/treeherder/webapp/api/logslice.py @@ -66,7 +66,7 @@ class LogSliceView(viewsets.ViewSet): except Exception as e: logging.error(e) - raise ResourceNotFoundException + raise ResourceNotFoundException("There was an error fetching the log file.") finally: if handle: From 6a2a42a274ddcd8ba2c1af9ea6ab7eac1eb71cda Mon Sep 17 00:00:00 2001 From: pnispel Date: Thu, 22 May 2014 14:36:46 -0700 Subject: [PATCH 08/13] replaced returning a response with raising an exception --- treeherder/webapp/api/logslice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/treeherder/webapp/api/logslice.py b/treeherder/webapp/api/logslice.py index da59d3e3b..11ef45126 100644 --- a/treeherder/webapp/api/logslice.py +++ b/treeherder/webapp/api/logslice.py @@ -75,4 +75,4 @@ class LogSliceView(viewsets.ViewSet): gz_file.close() else: - return Response("job_artifact {0} not found".format(job_id), 404) + raise ResourceNotFoundException("job_artifact {0} not found".format(job_id)) From 8617fa4cf0f3c9f384efdef0f375b17be47f0a4d Mon Sep 17 00:00:00 2001 From: pnispel Date: Thu, 22 May 2014 16:15:34 -0700 Subject: [PATCH 09/13] removed unecessary filtering in logslice --- treeherder/webapp/api/logslice.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/treeherder/webapp/api/logslice.py b/treeherder/webapp/api/logslice.py index 11ef45126..93c20087f 100644 --- a/treeherder/webapp/api/logslice.py +++ b/treeherder/webapp/api/logslice.py @@ -2,7 +2,7 @@ from rest_framework import viewsets from rest_framework.response import Response from django.core import cache -from treeherder.webapp.api.utils import (UrlQueryFilter, with_jobs) +from treeherder.webapp.api.utils import (with_jobs) from treeherder.webapp.api.exceptions import ResourceNotFoundException @@ -31,9 +31,7 @@ class LogSliceView(viewsets.ViewSet): """ job_id = request.QUERY_PARAMS.get("job_id") - filter = UrlQueryFilter({"job_id": job_id, "name": "Structured Log"}) - - objs = jm.get_job_artifact_list(0, 1, filter.conditions) + log = jm.get_log_references(job_id) handle = None gz_file = None @@ -41,12 +39,9 @@ class LogSliceView(viewsets.ViewSet): start_line = int(request.QUERY_PARAMS.get("start_line")) end_line = int(request.QUERY_PARAMS.get("end_line")) - if objs: - job = objs[0] - + if log[0]: try: - - url = job.get("blob").get("logurl") + url = log[0].get("url") gz_file = filesystem.get(url) if not gz_file: @@ -62,7 +57,7 @@ class LogSliceView(viewsets.ViewSet): if i < start_line or i >= end_line: continue lines.append({"text": line, "index": i}) - return Response( lines ) + return Response(lines) except Exception as e: logging.error(e) From 076a5b6db96c5fcc7487eafe83368e12aa7c3e58 Mon Sep 17 00:00:00 2001 From: pnispel Date: Fri, 23 May 2014 09:04:20 -0700 Subject: [PATCH 10/13] made some changes to the cache --- treeherder/settings/base.py | 7 +++++-- treeherder/webapp/api/logslice.py | 17 +++++++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/treeherder/settings/base.py b/treeherder/settings/base.py index 38f111ac6..85bab88c1 100644 --- a/treeherder/settings/base.py +++ b/treeherder/settings/base.py @@ -261,8 +261,11 @@ CACHES = { "filesystem": { "BACKEND": "django.core.cache.backends.filebased.FileBasedCache", "LOCATION": path("webapp", "log_cache"), - - "VERSION": 1 + "TIMEOUT": 0, + "VERSION": 1, + 'OPTIONS': { + 'MAX_ENTRIES': 1000 + } } } diff --git a/treeherder/webapp/api/logslice.py b/treeherder/webapp/api/logslice.py index 93c20087f..d587d1ed4 100644 --- a/treeherder/webapp/api/logslice.py +++ b/treeherder/webapp/api/logslice.py @@ -10,6 +10,7 @@ import urllib2 import gzip import io import logging +import math filesystem = cache.get_cache('filesystem') @@ -36,8 +37,14 @@ class LogSliceView(viewsets.ViewSet): handle = None gz_file = None - start_line = int(request.QUERY_PARAMS.get("start_line")) - end_line = int(request.QUERY_PARAMS.get("end_line")) + try: + start_line = math.abs(int(request.QUERY_PARAMS.get("start_line", 0))) + end_line = math.abs(int(request.QUERY_PARAMS.get("end_line", 0))) + except Exception as e: + return Response("parameters could not be converted to integers", 400) + + if start_line >= end_line: + return Response("end_line must be larger than start_line", 400) if log[0]: try: @@ -54,14 +61,16 @@ class LogSliceView(viewsets.ViewSet): lines = [] for i, line in enumerate(gz_file): - if i < start_line or i >= end_line: continue + if i < start_line: continue + elif i >= end_line: break + lines.append({"text": line, "index": i}) return Response(lines) except Exception as e: logging.error(e) - raise ResourceNotFoundException("There was an error fetching the log file.") + raise ResourceNotFoundException("log file not found") finally: if handle: From d3f003c3a810da530747504278fe591dfc4713d3 Mon Sep 17 00:00:00 2001 From: pnispel Date: Fri, 23 May 2014 10:36:38 -0700 Subject: [PATCH 11/13] oops.. used abs instead of fabs from math library --- treeherder/webapp/api/logslice.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/treeherder/webapp/api/logslice.py b/treeherder/webapp/api/logslice.py index d587d1ed4..09accd633 100644 --- a/treeherder/webapp/api/logslice.py +++ b/treeherder/webapp/api/logslice.py @@ -38,8 +38,8 @@ class LogSliceView(viewsets.ViewSet): gz_file = None try: - start_line = math.abs(int(request.QUERY_PARAMS.get("start_line", 0))) - end_line = math.abs(int(request.QUERY_PARAMS.get("end_line", 0))) + start_line = math.fabs(int(request.QUERY_PARAMS.get("start_line", 0))) + end_line = math.fabs(int(request.QUERY_PARAMS.get("end_line", 0))) except Exception as e: return Response("parameters could not be converted to integers", 400) From 59922aaa26df30607be9208975aff82a51d08de3 Mon Sep 17 00:00:00 2001 From: pnispel Date: Fri, 23 May 2014 10:39:44 -0700 Subject: [PATCH 12/13] fabs to abs again. didnt need the float version --- treeherder/webapp/api/logslice.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/treeherder/webapp/api/logslice.py b/treeherder/webapp/api/logslice.py index 09accd633..34e640e95 100644 --- a/treeherder/webapp/api/logslice.py +++ b/treeherder/webapp/api/logslice.py @@ -10,7 +10,6 @@ import urllib2 import gzip import io import logging -import math filesystem = cache.get_cache('filesystem') @@ -38,8 +37,8 @@ class LogSliceView(viewsets.ViewSet): gz_file = None try: - start_line = math.fabs(int(request.QUERY_PARAMS.get("start_line", 0))) - end_line = math.fabs(int(request.QUERY_PARAMS.get("end_line", 0))) + start_line = abs(int(request.QUERY_PARAMS.get("start_line", 0))) + end_line = abs(int(request.QUERY_PARAMS.get("end_line", 0))) except Exception as e: return Response("parameters could not be converted to integers", 400) From ca67105abe0dd6777bff277bbe9b31b02349233d Mon Sep 17 00:00:00 2001 From: pnispel Date: Fri, 23 May 2014 11:32:05 -0700 Subject: [PATCH 13/13] removed checking first index of array with checking the length in logslice --- treeherder/webapp/api/logslice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/treeherder/webapp/api/logslice.py b/treeherder/webapp/api/logslice.py index 34e640e95..84c9ad079 100644 --- a/treeherder/webapp/api/logslice.py +++ b/treeherder/webapp/api/logslice.py @@ -45,7 +45,7 @@ class LogSliceView(viewsets.ViewSet): if start_line >= end_line: return Response("end_line must be larger than start_line", 400) - if log[0]: + if len(log) > 0: try: url = log[0].get("url") gz_file = filesystem.get(url)