зеркало из https://github.com/mozilla/taar_loader.git
more minor patches
This commit is contained in:
Родитель
4f79867117
Коммит
0b644246c0
|
@ -12,6 +12,9 @@ from datetime import timedelta
|
|||
|
||||
import taar_loader
|
||||
|
||||
from pyspark import SparkConf
|
||||
from pyspark.sql import SparkSession
|
||||
|
||||
|
||||
def parse_args():
|
||||
def valid_date_type(arg_date_str):
|
||||
|
@ -41,4 +44,8 @@ def parse_args():
|
|||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
taar_loader.runme(args.run_date)
|
||||
APP_NAME = "HBaseAddonRecommenderView"
|
||||
conf = SparkConf().setAppName(APP_NAME)
|
||||
conf = conf.setMaster("local[*]")
|
||||
spark = SparkSession.builder.config(conf=conf).getOrCreate()
|
||||
taar_loader.main(spark, args.run_date)
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
from taar_dynamo import runme
|
||||
from filters import dynamo_reducer
|
||||
from .taar_dynamo import main
|
||||
from .filters import dynamo_reducer
|
||||
|
|
|
@ -8,8 +8,6 @@ This module replicates the scala script over at
|
|||
https://github.com/mozilla/telemetry-batch-view/blob/1c544f65ad2852703883fe31a9fba38c39e75698/src/main/scala/com/mozilla/telemetry/views/HBaseAddonRecommenderView.scala
|
||||
"""
|
||||
|
||||
from pyspark import SparkConf
|
||||
from pyspark.sql import SparkSession
|
||||
from pyspark.sql.functions import desc, row_number
|
||||
from pyspark.sql import Window
|
||||
|
||||
|
@ -23,10 +21,7 @@ def etl(spark, run_date, dataFrameFunc):
|
|||
print("Processing %s" % currentDateString)
|
||||
|
||||
# Get the data for the desired date out of the dataframe
|
||||
tmp = dataFrameFunc(currentDateString)
|
||||
|
||||
# TODO: Remove downsampling later
|
||||
datasetForDate = tmp.sample(False, 0.00000001)
|
||||
datasetForDate = dataFrameFunc(currentDateString)
|
||||
|
||||
print("Dataset is sampled!")
|
||||
|
||||
|
@ -124,11 +119,3 @@ def reducer(tuple_a, tuple_b):
|
|||
tuple_data = tuple(tuple_data)
|
||||
|
||||
return tuple_data
|
||||
|
||||
|
||||
def runme(run_date):
|
||||
APP_NAME = "HBaseAddonRecommenderView"
|
||||
conf = SparkConf().setAppName(APP_NAME)
|
||||
conf = conf.setMaster("local[*]")
|
||||
sparkSession = SparkSession.builder.config(conf=conf).getOrCreate()
|
||||
return main(sparkSession, run_date)
|
||||
|
|
Загрузка…
Ссылка в новой задаче