diff --git a/.gitignore b/.gitignore index a8734b4..0df914b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,6 @@ /pyLibrary/.svn *.pyc /results -/resources/aws/prices.json /examples/config/etl_supervisor.conf.alt /result /tests diff --git a/README.md b/README.md index 619d842..e2c8dc1 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ The module assumes your workload is **long running** and has **many save-points**. In my case each machine is setup to pull small tasks off a queue and -execute them. These machines can be shutdown at any time; with the most +execute them. These machines can be shutdown at any time; with the most recent task simply placed back on the queue for some other machine to run. ## Overview @@ -32,7 +32,7 @@ with the best `estimated_value`, are bid on first. * boto * requests * ecdsa (required by fabric, but not installed by pip) -* fabric +* fabric2 ## Installation @@ -45,7 +45,7 @@ For now, you must clone the repo There are three main branches * **dev** - development done here (unstable) -* **beta** - not used +* **manager-etl** - multithreaded management, not ready for ES node management (Oct 2018) * **manager** - used to manage the staging clusters * **master** - proven stable on **manager** for at least a few days @@ -162,7 +162,7 @@ Some caveats: ephemeral drives, but the EBS will be removed too. If you want the volume to be permanent, you must map the block device yourself. * ***block devices will not be formatted nor mounted***. The `path` is -provided only so the InstanceManger.setup() routine can perform the `mkfs` +provided only so the `InstanceManger.setup()` routine can perform the `mkfs` and `mount` commands. ### Writing a InstanceManager @@ -177,9 +177,10 @@ also up to you. The `examples` uses the size of the pending queue to determine, roughly, how much utility is required. * **`setup()`** - function is called to setup an instance. It is passed both a boto ec2 instance object, and the utility this instance is -expected to provide. +expected to provide. This is run in its own thread, and multiple can be +called at the same time; ensure your code is threadsafe. * **`teardown()`** - When the machine is no longer required, this will be -called before SpotManager terminates the EC2 instance. This method is +called before SpotManager terminates the EC2 instance. This method is *not* called when AWS terminates the instance. diff --git a/examples/config/es_config.yml b/examples/config/es_config.yml deleted file mode 100644 index 3732a3f..0000000 --- a/examples/config/es_config.yml +++ /dev/null @@ -1,46 +0,0 @@ -cluster.name: active-data -node.zone: spot -node.name: spot_{{id}} -node.master: false -node.data: true - -script.inline: on -script.indexed: on - - -cluster.routing.allocation.cluster_concurrent_rebalance: 1 -cluster.routing.allocation.node_concurrent_recoveries: 1 - -bootstrap.mlockall: true -path.data: {{data_paths}} -path.logs: /data1/logs -cloud: - aws: - region: us-west-2 - protocol: https - ec2: - protocol: https -discovery.type: ec2 -discovery.zen.ping.multicast.enabled: false -discovery.zen.minimum_master_nodes: 1 - -index.number_of_shards: 1 -index.number_of_replicas: 1 -index.cache.field.type: soft -index.translog.interval: 60s -index.translog.flush_threshold_size: 1gb - -indices.memory.index_buffer_size: 20% -indices.recovery.concurrent_streams: 1 -indices.recovery.max_bytes_per_sec: 1000mb -indices.store.throttle.type: none - -http.compression: true -http.cors.allow-origin: "/.*/" -http.cors.enabled: true -http.compression: true -http.max_content_length: 1000mb -http.timeout: 600 - -threadpool.bulk.queue_size: 3000 -threadpool.index.queue_size: 1000 diff --git a/examples/config/es_run.sh b/examples/config/es_run.sh deleted file mode 100644 index 80e4892..0000000 --- a/examples/config/es_run.sh +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/sh - -ES_CLASSPATH=$ES_CLASSPATH:$ES_HOME/lib/elasticsearch-1.7.1.jar:$ES_HOME/lib/*:$ES_HOME/lib/sigar/* - -ES_MIN_MEM={{memory}}g -ES_MAX_MEM={{memory}}g - -# min and max heap sizes should be set to the same value to avoid -# stop-the-world GC pauses during resize, and so that we can lock the -# heap in memory on startup to prevent any of it from being swapped -# out. -JAVA_OPTS="$JAVA_OPTS -Xms${ES_MIN_MEM}" -JAVA_OPTS="$JAVA_OPTS -Xmx${ES_MAX_MEM}" - -# new generation -if [ "x$ES_HEAP_NEWSIZE" != "x" ]; then - JAVA_OPTS="$JAVA_OPTS -Xmn${ES_HEAP_NEWSIZE}" -fi - -# max direct memory -if [ "x$ES_DIRECT_SIZE" != "x" ]; then - JAVA_OPTS="$JAVA_OPTS -XX:MaxDirectMemorySize=${ES_DIRECT_SIZE}" -fi - -# reduce the per-thread stack size -JAVA_OPTS="$JAVA_OPTS -Xss256k" - -# set to headless, just in case -JAVA_OPTS="$JAVA_OPTS -Djava.awt.headless=true" - -# Force the JVM to use IPv4 stack -if [ "x$ES_USE_IPV4" != "x" ]; then - JAVA_OPTS="$JAVA_OPTS -Djava.net.preferIPv4Stack=true" -fi - -JAVA_OPTS="$JAVA_OPTS -XX:+UseParNewGC" -JAVA_OPTS="$JAVA_OPTS -XX:+UseConcMarkSweepGC" - -JAVA_OPTS="$JAVA_OPTS -XX:CMSInitiatingOccupancyFraction=75" -JAVA_OPTS="$JAVA_OPTS -XX:+UseCMSInitiatingOccupancyOnly" - -# GC logging options -if [ "x$ES_USE_GC_LOGGING" != "x" ]; then - JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCDetails" - JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCTimeStamps" - JAVA_OPTS="$JAVA_OPTS -XX:+PrintClassHistogram" - JAVA_OPTS="$JAVA_OPTS -XX:+PrintTenuringDistribution" - JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCApplicationStoppedTime" - JAVA_OPTS="$JAVA_OPTS -Xloggc:/var/log/elasticsearch/gc.log" -fi - -# Causes the JVM to dump its heap on OutOfMemory. -JAVA_OPTS="$JAVA_OPTS -XX:+HeapDumpOnOutOfMemoryError" -# The path to the heap dump location, note directory must exists and have enough -# space for a full heap dump. -JAVA_OPTS="$JAVA_OPTS -XX:HeapDumpPath=/data/heapdump/heapdump.hprof" - -# Disables explicit GC -JAVA_OPTS="$JAVA_OPTS -XX:+DisableExplicitGC" - -# Ensure UTF-8 encoding by default (e.g. filenames) -JAVA_OPTS="$JAVA_OPTS -Dfile.encoding=UTF-8" - - diff --git a/examples/config/es_settings.json b/examples/config/es_settings.json deleted file mode 100644 index f323ddb..0000000 --- a/examples/config/es_settings.json +++ /dev/null @@ -1,195 +0,0 @@ -{ - "budget": 4.00, //MAXIMUM SPEND PER HOUR FOR ALL INSTANCES - "max_utility_price": 0.02, //MOST THAT WILL BE SPENT ON A SINGLE UTILITY POINT - "max_new_utility": 120, //MOST NEW UTILITY THAT WILL BE REQUESTED IN A SINGLE RUN - "max_requests_per_type": 2, //LIMIT THE NUMBER OF NET-NEW REQUESTS BY TYPE - "max_percent_per_type": 0.50, //ALL INSTANCE TYPES MAY NOT GO OVER THIS AS A PERCENT OF TOTAL INSTANCES (USED TO MITIGATE LOOSING ALL INSTANCES AT ONCE) - "uptime":{ - "history": "week", //HOW MUCH HISTORY TO USE - "duration": "day", //HOW LONG WE WOULD LIKE OUR MACHINE TO TO STAY UP - "bid_percentile": 0.95 //THE PROBABILITY WE ACHIEVE OUR UPTIME - }, - "price_file": "resources/aws/prices.json", - "run_interval": "10minute", //HOW LONG BEFORE NEXT RUN - "availability_zone": "us-west-2c", - "product":"Linux/UNIX (Amazon VPC)", - "aws": { - "$ref": "//~/private.json#aws_credentials" - }, - "more_drives":[ - {"path":"/data1", "size":1000, "volume_type":"standard"} - ], - "1_ephemeral_drives":[ - {"path":"/data1", "device":"/dev/sdb"} - ], - "2_ephemeral_drives":[ - {"path":"/data1", "device":"/dev/sdb"}, - {"path":"/data2", "device":"/dev/sdc"} - ], - "3_ephemeral_drives":[ - {"path":"/data1", "device":"/dev/sdb"}, - {"path":"/data2", "device":"/dev/sdc"}, - {"path":"/data3", "device":"/dev/sdd"} - ], - "4_ephemeral_drives":[ - {"path":"/data1", "device":"/dev/sdb"}, - {"path":"/data2", "device":"/dev/sdc"}, - {"path":"/data3", "device":"/dev/sdd"}, - {"path":"/data4", "device":"/dev/sde"} - ], - "6_ephemeral_drives":[ - {"path":"/data1", "device":"/dev/sdb"}, - {"path":"/data2", "device":"/dev/sdc"}, - {"path":"/data3", "device":"/dev/sdd"}, - {"path":"/data4", "device":"/dev/sde"}, - {"path":"/data5", "device":"/dev/sdf"}, - {"path":"/data6", "device":"/dev/sdg"} - ], - "8_ephemeral_drives":[ - {"path":"/data1", "device":"/dev/sdb"}, - {"path":"/data2", "device":"/dev/sdc"}, - {"path":"/data3", "device":"/dev/sdd"}, - {"path":"/data4", "device":"/dev/sde"}, - {"path":"/data5", "device":"/dev/sdf"}, - {"path":"/data6", "device":"/dev/sdg"}, - {"path":"/data7", "device":"/dev/sdh"}, - {"path":"/data8", "device":"/dev/sdi"} - ], - "utility":[ - // ONE POINT PER 1 GIG OF MEMORY. OR 2 PER 100 GIG OF DRIVESPACE, OR 60 POINTS, WHICHEVER IS LESS - // EBS IS WAY TO SLOW FOR ELASTICSEARCH -// {"instance_type": "c1.medium", "storage": 350, "drives": {"$ref": "#1_ephemeral_drives"}, "discount": 0, "ecu": 5, "num_drives": 1, "memory": 1.7, "cpu": 2, "utility": 1.7}, -// {"instance_type": "c1.xlarge", "storage": 1680, "drives": {"$ref": "#4_ephemeral_drives"}, "discount": 0, "ecu": 20, "num_drives": 4, "memory": 7, "cpu": 8, "utility": 7}, - {"instance_type": "c3.2xlarge", "storage": 160, "drives": {"$ref": "#2_ephemeral_drives"}, "discount": 0, "ecu": 28, "num_drives": 2, "memory": 15, "cpu": 8, "utility": 3.2}, - {"instance_type": "c3.4xlarge", "storage": 320, "drives": {"$ref": "#2_ephemeral_drives"}, "discount": 0, "ecu": 55, "num_drives": 2, "memory": 30, "cpu": 16, "utility": 6.4}, - {"instance_type": "c3.8xlarge", "storage": 640, "drives": {"$ref": "#2_ephemeral_drives"}, "discount": 0, "ecu": 108, "num_drives": 2, "memory": 60, "cpu": 32, "utility": 12.8}, - {"instance_type": "c3.large", "storage": 32, "drives": {"$ref": "#2_ephemeral_drives"}, "discount": 0, "ecu": 7, "num_drives": 2, "memory": 3.75, "cpu": 2, "utility": 0.64}, - {"instance_type": "c3.xlarge", "storage": 80, "drives": {"$ref": "#2_ephemeral_drives"}, "discount": 0, "ecu": 14, "num_drives": 2, "memory": 7.5, "cpu": 4, "utility": 1.6}, -// {"instance_type": "cc2.8xlarge", "storage": 3360, "drives": {"$ref": "#4_ephemeral_drives"}, "discount": 0, "ecu": 88, "num_drives": 4, "memory": 60.5, "cpu": 32, "utility": 60}, -// {"instance_type": "cg1.4xlarge", "storage": 1680, "drives": {"$ref": "#2_ephemeral_drives"}, "discount": 0, "ecu": 33.5, "num_drives": 2, "memory": 22.5, "cpu": 16, "utility": 22.5}, - {"instance_type": "cr1.8xlarge", "storage": 240, "drives": {"$ref": "#2_ephemeral_drives"}, "discount": 0, "ecu": 88, "num_drives": 2, "memory": 244, "cpu": 32, "utility": 4.8}, - {"instance_type": "d2.2xlarge", "storage": 12000, "drives": {"$ref": "#6_ephemeral_drives"}, "discount": 0, "ecu": 28, "num_drives": 6, "memory": 61, "cpu": 8, "utility": 60}, - {"instance_type": "d2.4xlarge", "storage": 24000, "drives": {"$ref": "#8_ephemeral_drives"}, "discount": 0, "ecu": 56, "num_drives": 12, "memory": 122, "cpu": 16, "utility": 60}, - {"instance_type": "d2.8xlarge", "storage": 48000, "drives": {"$ref": "#8_ephemeral_drives"}, "discount": 0, "ecu": 116, "num_drives": 24, "memory": 244, "cpu": 36, "utility": 60}, - {"instance_type": "d2.xlarge", "storage": 6000, "drives": {"$ref": "#3_ephemeral_drives"}, "discount": 0, "ecu": 14, "num_drives": 3, "memory": 30.5, "cpu": 4, "utility": 30.5}, - {"instance_type": "g2.2xlarge", "storage": 60, "drives": {"$ref": "#1_ephemeral_drives"}, "discount": 0, "ecu": 26, "num_drives": 1, "memory": 15, "cpu": 8, "utility": 1.2}, - {"instance_type": "g2.8xlarge", "storage": 240, "drives": {"$ref": "#2_ephemeral_drives"}, "discount": 0, "ecu": 104, "num_drives": 2, "memory": 60, "cpu": 32, "utility": 4.8}, - {"instance_type": "hi1.4xlarge", "storage": 2048, "drives": {"$ref": "#2_ephemeral_drives"}, "discount": 0, "ecu": 35, "num_drives": 2, "memory": 60.5, "cpu": 16, "utility": 40.96}, -// {"instance_type": "hs1.8xlarge", "storage": 48000, "drives": {"$ref": "#8_ephemeral_drives"}, "discount": 0, "ecu": 35, "num_drives": 24, "memory": 117, "cpu": 16, "utility": 60}, - {"instance_type": "i2.2xlarge", "storage": 1600, "drives": {"$ref": "#2_ephemeral_drives"}, "discount": 0, "ecu": 27, "num_drives": 2, "memory": 61, "cpu": 8, "utility": 32.0}, - {"instance_type": "i2.4xlarge", "storage": 3200, "drives": {"$ref": "#4_ephemeral_drives"}, "discount": 0, "ecu": 53, "num_drives": 4, "memory": 122, "cpu": 16, "utility": 60}, - {"instance_type": "i2.8xlarge", "storage": 6400, "drives": {"$ref": "#8_ephemeral_drives"}, "discount": 0, "ecu": 104, "num_drives": 8, "memory": 244, "cpu": 32, "utility": 60}, - {"instance_type": "i2.xlarge", "storage": 800, "drives": {"$ref": "#1_ephemeral_drives"}, "discount": 0, "ecu": 14, "num_drives": 1, "memory": 30.5, "cpu": 4, "utility": 16.0}, -// {"instance_type": "i3.16xlarge", "storage": 15200, "drives": {"$ref": "#8_ephemeral_drives"}, "discount": 0, "ecu": 200, "num_drives": 8, "memory": 488, "cpu": 64, "utility": 60}, -// {"instance_type": "i3.2xlarge", "storage": 1900, "drives": {"$ref": "#1_ephemeral_drives"}, "discount": 0, "ecu": 27, "num_drives": 1, "memory": 61, "cpu": 8, "utility": 38.0}, -// {"instance_type": "i3.4xlarge", "storage": 3800, "drives": {"$ref": "#2_ephemeral_drives"}, "discount": 0, "ecu": 53, "num_drives": 2, "memory": 122, "cpu": 16, "utility": 60}, -// {"instance_type": "i3.8xlarge", "storage": 7600, "drives": {"$ref": "#4_ephemeral_drives"}, "discount": 0, "ecu": 99, "num_drives": 4, "memory": 244, "cpu": 32, "utility": 60}, -// {"instance_type": "i3.large", "storage": 475, "drives": {"$ref": "#1_ephemeral_drives"}, "discount": 0, "ecu": 7, "num_drives": 1, "memory": 15.25, "cpu": 2, "utility": 9.5}, -// {"instance_type": "i3.xlarge", "storage": 950, "drives": {"$ref": "#1_ephemeral_drives"}, "discount": 0, "ecu": 13, "num_drives": 1, "memory": 30.5, "cpu": 4, "utility": 19.0}, -// {"instance_type": "m1.large", "storage": 840, "drives": {"$ref": "#2_ephemeral_drives"}, "discount": 0, "ecu": 4, "num_drives": 2, "memory": 7.5, "cpu": 2, "utility": 7.5}, -// {"instance_type": "m1.medium", "storage": 410, "drives": {"$ref": "#1_ephemeral_drives"}, "discount": 0, "ecu": 2, "num_drives": 1, "memory": 3.75, "cpu": 1, "utility": 3.75}, -// {"instance_type": "m1.small", "storage": 160, "drives": {"$ref": "#1_ephemeral_drives"}, "discount": 0, "ecu": 1, "num_drives": 1, "memory": 1.7, "cpu": 1, "utility": 1.7}, -// {"instance_type": "m1.xlarge", "storage": 1680, "drives": {"$ref": "#4_ephemeral_drives"}, "discount": 0, "ecu": 8, "num_drives": 4, "memory": 15, "cpu": 4, "utility": 15}, -// {"instance_type": "m2.2xlarge", "storage": 850, "drives": {"$ref": "#1_ephemeral_drives"}, "discount": 0, "ecu": 13, "num_drives": 1, "memory": 34.2, "cpu": 4, "utility": 17.0}, -// {"instance_type": "m2.4xlarge", "storage": 1680, "drives": {"$ref": "#2_ephemeral_drives"}, "discount": 0, "ecu": 26, "num_drives": 2, "memory": 68.4, "cpu": 8, "utility": 33.6}, -// {"instance_type": "m2.xlarge", "storage": 420, "drives": {"$ref": "#1_ephemeral_drives"}, "discount": 0, "ecu": 6.5, "num_drives": 1, "memory": 17.1, "cpu": 2, "utility": 8.4}, - {"instance_type": "m3.2xlarge", "storage": 160, "drives": {"$ref": "#2_ephemeral_drives"}, "discount": 0, "ecu": 26, "num_drives": 2, "memory": 30, "cpu": 8, "utility": 3.2}, - {"instance_type": "m3.large", "storage": 32, "drives": {"$ref": "#1_ephemeral_drives"}, "discount": 0, "ecu": 6.5, "num_drives": 1, "memory": 7.5, "cpu": 2, "utility": 0.64}, - {"instance_type": "m3.medium", "storage": 4, "drives": {"$ref": "#1_ephemeral_drives"}, "discount": 0, "ecu": 3, "num_drives": 1, "memory": 3.75, "cpu": 1, "utility": 0.08}, - {"instance_type": "m3.xlarge", "storage": 80, "drives": {"$ref": "#2_ephemeral_drives"}, "discount": 0, "ecu": 13, "num_drives": 2, "memory": 15, "cpu": 4, "utility": 1.6}, - {"instance_type": "r3.2xlarge", "storage": 160, "drives": {"$ref": "#1_ephemeral_drives"}, "discount": 0, "ecu": 26, "num_drives": 1, "memory": 61, "cpu": 8, "utility": 3.2}, - {"instance_type": "r3.4xlarge", "storage": 320, "drives": {"$ref": "#1_ephemeral_drives"}, "discount": 0, "ecu": 52, "num_drives": 1, "memory": 122, "cpu": 16, "utility": 6.4}, - {"instance_type": "r3.8xlarge", "storage": 640, "drives": {"$ref": "#2_ephemeral_drives"}, "discount": 0, "ecu": 104, "num_drives": 2, "memory": 244, "cpu": 32, "utility": 12.8}, - {"instance_type": "r3.large", "storage": 32, "drives": {"$ref": "#1_ephemeral_drives"}, "discount": 0, "ecu": 6.5, "num_drives": 1, "memory": 15.25, "cpu": 2, "utility": 0.64}, - {"instance_type": "r3.xlarge", "storage": 80, "drives": {"$ref": "#1_ephemeral_drives"}, "discount": 0, "ecu": 13, "num_drives": 1, "memory": 30.5, "cpu": 4, "utility": 1.6}, - {"instance_type": "x1.16xlarge", "storage": 1920, "drives": {"$ref": "#1_ephemeral_drives"}, "discount": 0, "ecu": 174.5, "num_drives": 1, "memory": 976, "cpu": 64, "utility": 38.4}, - {"instance_type": "x1.32xlarge", "storage": 3840, "drives": {"$ref": "#2_ephemeral_drives"}, "discount": 0, "ecu": 349, "num_drives": 2, "memory": 1952, "cpu": 128, "utility": 60} - ], - "ec2": { - "request": { - //SEE http://boto.readthedocs.org/en/latest/ref/ec2.html#boto.ec2.connection.EC2Connection.request_spot_instances - "price": 0.001, - "image_id": "ami-e7527ed7", - "count": 1, - "type": "one-time", - "valid_from": null, - "expiration": "hour", //SPECIAL, USED TO FILL valid_until - "valid_until": null, - "launch_group": null, - "availability_zone_group": null, - "key_name": "activedata", - "security_groups": null, - "user_data": null, - "addressing_type": null, - "instance_type": null, - "placement": null, - "kernel_id": null, - "ramdisk_id": null, - "monitoring_enabled": false, - "subnet_id": null, - "placement_group": "es", - "block_device_map": null, - "instance_profile_arn": null, - "instance_profile_name": "active-data", - "security_group_ids": null, - "ebs_optimized": false, - "network_interfaces": { - "subnet_id": "subnet-b7c137ee", - "groups": ["sg-bb542fde"], - "associate_public_ip_address": true - }, - "dry_run": false - }, - "instance": { - "name": "ActiveData ES Spot Instance" - } - }, - "instance":{ - "class":"examples.es.ESSpot", - "minimum_utility": 2000, - "connect": { - //USED IN Fabric's `env` GLOBAL CONFIG OBJECT - "user": "ec2-user", - "key_filename": "~/.ssh/activedata.pem", - "disable_known_hosts": true, - "host_string": "", - "port": 22, - "password": "", - "banner_timeout": 30 - }, - "new_volume":{ - "size":1000, - "volume_type":"magnetic", - "zone": "us-west-2c", - "snapshot": null, - "iops":null, - "encrypted":false - } - }, - "debug": { - "trace": true, - "cprofile": { - "enabled": false, - "filename": "results/examples_spot_profile.tab" - }, - "log": [ - { - "class": "logging.handlers.RotatingFileHandler", - "filename": "examples/logs/examples_es.log", - "maxBytes": 10000000, - "backupCount": 10, - "encoding": "utf8" - }, - { - "log_type": "ses", - "from_address": "klahnakoski@mozilla.com", - "to_address": "klahnakoski@mozilla.com", - "subject": "[ALERT][Manager] Problem in ES Spot", - "$ref": "file://~/private.json#aws_credentials" - }, - { - "log_type": "console" - } - ] - } -} diff --git a/examples/config/es_supervisor.conf b/examples/config/es_supervisor.conf deleted file mode 100644 index 03e9247..0000000 --- a/examples/config/es_supervisor.conf +++ /dev/null @@ -1,44 +0,0 @@ -# PUT THIS FILE IN /etc/supervisord.conf - -[supervisord] -logfile=/data1/logs/supervisord.log -logfile_maxbytes=50MB -logfile_backups=10 -minfds=100000 - -[unix_http_server] -file=/etc/supervisor.sock ; (the path to the socket file) - -[rpcinterface:supervisor] -supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface - -[supervisorctl] -serverurl=unix:///etc/supervisor.sock - -[program:es] -command=/usr/local/elasticsearch/bin/elasticsearch -directory=/usr/local/elasticsearch -autostart=true -autorestart=true -startretries=10 -stopsignal=INT -stopwaitsecs=600 -stderr_logfile=/data1/logs/es.error.log -stdout_logfile=/data1/logs/es.log -stdout_logfile_maxbytes=10MB -stdout_logfile_backups=10 -user=root -environment=JAVA_HOME=/usr/java/default - -[program:push_to_es] -command=python27 activedata_etl/push_to_es.py --settings=resources/settings/staging/push_to_es.json -directory=/home/ec2-user/ActiveData-ETL -autostart=true -autorestart=true -stopsignal=INT -stopwaitsecs=30 -stderr_logfile=/data1/logs/push_to_es.error.log -stdout_logfile=/data1/logs/push_to_es.log -user=ec2-user -environment=PYTHONPATH=.;HOME=/home/ec2-user - diff --git a/examples/es.py b/examples/es.py deleted file mode 100644 index c1b58a2..0000000 --- a/examples/es.py +++ /dev/null @@ -1,235 +0,0 @@ -# encoding: utf-8 -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http://mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import division -from __future__ import unicode_literals - -from mo_fabric import Connection -from mo_files import File -from mo_future import text_type -from mo_kwargs import override -from mo_logs import Log -from mo_logs.strings import expand_template -from mo_math import Math -from mo_math.randoms import Random -from spot.instance_manager import InstanceManager - -JRE = "jre-8u131-linux-x64.rpm" -LOCAL_JRE = "resources/" + JRE - - -class ESSpot(InstanceManager): - """ - THIS CLASS MUST HAVE AN IMPLEMENTATION FOR the SpotManager TO USE - """ - @override - def __init__(self, minimum_utility, kwargs=None): - InstanceManager.__init__(self, kwargs) - self.settings = kwargs - self.minimum_utility = minimum_utility - - def required_utility(self, current_utility=None): - return self.minimum_utility - - def setup( - self, - instance, # THE boto INSTANCE OBJECT FOR THE MACHINE TO SETUP - utility, # THE utility OBJECT FOUND IN CONFIG - please_stop - ): - with Connection(host=instance.ip_address, kwargs=self.settings.connect) as conn: - gigabytes = Math.floor(utility.memory) - Log.note("setup {{instance}}", instance=instance.id) - - self._install_indexer(instance, conn) - self._install_es(gigabytes, instance, conn) - self._install_supervisor(instance, conn) - self._start_supervisor(conn) - - def teardown( - self, - instance, # THE boto INSTANCE OBJECT FOR THE MACHINE TO TEARDOWN - please_stop - ): - with Connection(host=instance.ip_address, kwargs=self.settings.connect) as conn: - Log.note("teardown {{instance}}", instance=instance.id) - - # ASK NICELY TO STOP Elasticsearch PROCESS - conn.sudo("supervisorctl stop es", warn=True) - - # ASK NICELY TO STOP "supervisord" PROCESS - conn.sudo("ps -ef | grep supervisord | grep -v grep | awk '{print $2}' | xargs kill -SIGINT", warn=True) - - # WAIT FOR SUPERVISOR SHUTDOWN - pid = True - while pid: - pid = conn.sudo("ps -ef | grep supervisord | grep -v grep | awk '{print $2}'") - - def _install_es(self, gigabytes, instance, conn): - volumes = instance.markup.drives - - if not conn.exists("/usr/local/elasticsearch"): - with conn.cd("/home/ec2-user/"): - conn.run("mkdir -p temp") - - if not File(LOCAL_JRE).exists: - Log.error("Expecting {{file}} on manager to spread to ES instances", file=LOCAL_JRE) - with conn.cd("/home/ec2-user/temp"): - conn.run('rm -f '+JRE) - conn.put("resources/"+JRE, JRE) - conn.sudo("rpm -i "+JRE) - conn.sudo("alternatives --install /usr/bin/java java /usr/java/default/bin/java 20000") - conn.run("export JAVA_HOME=/usr/java/default") - - with conn.cd("/home/ec2-user/"): - conn.run('wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.7.1.tar.gz') - conn.run('tar zxfv elasticsearch-1.7.1.tar.gz') - conn.sudo('mkdir /usr/local/elasticsearch') - conn.sudo('cp -R elasticsearch-1.7.1/* /usr/local/elasticsearch/') - - with conn.cd('/usr/local/elasticsearch/'): - # BE SURE TO MATCH THE PLUGLIN WITH ES VERSION - # https://github.com/elasticsearch/elasticsearch-cloud-aws - conn.sudo('bin/plugin -install elasticsearch/elasticsearch-cloud-aws/2.7.1') - - # REMOVE THESE FILES, WE WILL REPLACE THEM WITH THE CORRECT VERSIONS AT THE END - conn.sudo("rm -f /usr/local/elasticsearch/config/elasticsearch.yml") - conn.sudo("rm -f /usr/local/elasticsearch/bin/elasticsearch.in.sh") - - # MOUNT AND FORMAT THE EBS VOLUMES (list with `lsblk`) - for i, k in enumerate(volumes): - if not conn.exists(k.path): - conn.sudo('sudo umount '+k.device, warn=True) - - conn.sudo('yes | sudo mkfs -t ext4 '+k.device) - conn.sudo('mkdir '+k.path) - conn.sudo('sudo mount '+k.device+' '+k.path) - - # ADD TO /etc/fstab SO AROUND AFTER REBOOT - conn.sudo("sed -i '$ a\\"+k.device+" "+k.path+" ext4 defaults,nofail 0 2' /etc/fstab") - - # TEST IT IS WORKING - conn.sudo('mount -a') - - # INCREASE THE FILE HANDLE LIMITS - with conn.cd("/home/ec2-user/"): - File("./results/temp/sysctl.conf").delete() - conn.get("/etc/sysctl.conf", "./results/temp/sysctl.conf", use_sudo=True) - lines = File("./results/temp/sysctl.conf").read() - if lines.find("fs.file-max = 100000") == -1: - lines += "\nfs.file-max = 100000" - lines = lines.replace("net.bridge.bridge-nf-call-ip6tables = 0", "") - lines = lines.replace("net.bridge.bridge-nf-call-iptables = 0", "") - lines = lines.replace("net.bridge.bridge-nf-call-arptables = 0", "") - File("./results/temp/sysctl.conf").write(lines) - conn.put("./results/temp/sysctl.conf", "/etc/sysctl.conf", use_sudo=True) - - conn.sudo("sysctl -p") - - # INCREASE FILE HANDLE PERMISSIONS - conn.sudo("sed -i '$ a\\root soft nofile 100000' /etc/security/limits.conf") - conn.sudo("sed -i '$ a\\root hard nofile 100000' /etc/security/limits.conf") - conn.sudo("sed -i '$ a\\root memlock unlimited' /etc/security/limits.conf") - - conn.sudo("sed -i '$ a\\ec2-user soft nofile 100000' /etc/security/limits.conf") - conn.sudo("sed -i '$ a\\ec2-user hard nofile 100000' /etc/security/limits.conf") - conn.sudo("sed -i '$ a\\ec2-user memlock unlimited' /etc/security/limits.conf") - - # EFFECTIVE LOGIN TO LOAD CHANGES TO FILE HANDLES - # conn.sudo("sudo -i -u ec2-user") - - if not conn.exists("/data1/logs"): - conn.sudo('mkdir /data1/logs') - conn.sudo('mkdir /data1/heapdump') - - # INCREASE NUMBER OF FILE HANDLES - # conn.sudo("sysctl -w fs.file-max=64000") - # COPY CONFIG FILE TO ES DIR - if not conn.exists("/usr/local/elasticsearch/config/elasticsearch.yml"): - yml = File("./examples/config/es_config.yml").read().replace("\r", "") - yml = expand_template(yml, { - "id": Random.hex(length=8), - "data_paths": ",".join("/data"+text_type(i+1) for i, _ in enumerate(volumes)) - }) - File("./results/temp/elasticsearch.yml").write(yml) - conn.put("./results/temp/elasticsearch.yml", '/usr/local/elasticsearch/config/elasticsearch.yml', use_sudo=True) - - # FOR SOME REASON THE export COMMAND DOES NOT SEEM TO WORK - # THIS SCRIPT SETS THE ES_MIN_MEM/ES_MAX_MEM EXPLICITLY - if not conn.exists("/usr/local/elasticsearch/bin/elasticsearch.in.sh"): - sh = File("./examples/config/es_run.sh").read().replace("\r", "") - sh = expand_template(sh, {"memory": text_type(int(gigabytes/2))}) - File("./results/temp/elasticsearch.in.sh").write(sh) - with conn.cd("/home/ec2-user"): - conn.put("./results/temp/elasticsearch.in.sh", './temp/elasticsearch.in.sh', use_sudo=True) - conn.sudo("cp -f ./temp/elasticsearch.in.sh /usr/local/elasticsearch/bin/elasticsearch.in.sh") - - def _install_indexer(self, instance, conn): - Log.note("Install indexer at {{instance_id}} ({{address}})", instance_id=instance.id, address=instance.ip_address) - self._install_python(instance, conn) - - if not conn.exists("/home/ec2-user/ActiveData-ETL/"): - with conn.cd("/home/ec2-user"): - conn.sudo("yum -y install git") - conn.run("git clone https://github.com/klahnakoski/ActiveData-ETL.git") - - with conn.cd("/home/ec2-user/ActiveData-ETL/"): - conn.run("git checkout push-to-es") - conn.sudo("yum -y install gcc") # REQUIRED FOR psutil - conn.sudo("pip install -r requirements.txt") - - conn.put("~/private_active_data_etl.json", "/home/ec2-user/private.json") - - def _install_python(self, instance, conn): - Log.note("Install Python at {{instance_id}} ({{address}})", instance_id=instance.id, address=instance.ip_address) - if conn.exists("/usr/bin/pip"): - pip_version = conn.sudo("pip --version", warn=True) - else: - pip_version = "" - - if not pip_version.startswith("pip 9."): - conn.sudo("yum -y install python27") - conn.sudo("easy_install pip") - conn.sudo("rm -f /usr/bin/pip", warn=True) - conn.sudo("ln -s /usr/local/bin/pip /usr/bin/pip") - conn.sudo("pip install --upgrade pip") - - def _install_supervisor(self, instance, conn): - Log.note("Install Supervisor-plus-Cron at {{instance_id}} ({{address}})", instance_id=instance.id, address=instance.ip_address) - # REQUIRED FOR Python SSH - self._install_lib("libffi-devel", conn) - self._install_lib("openssl-devel", conn) - self._install_lib('"Development tools"', install="groupinstall", conn=conn) - - self._install_python(instance, conn) - conn.sudo("pip install pyopenssl") - conn.sudo("pip install ndg-httpsclient") - conn.sudo("pip install pyasn1") - conn.sudo("pip install fabric==1.10.2") - conn.sudo("pip install requests") - - conn.sudo("pip install supervisor-plus-cron") - - def _install_lib(self, lib_name, install="install", conn=None): - """ - :param lib_name: - :param install: use 'groupinstall' if you wish - :return: - """ - result = conn.sudo("yum "+install+" -y "+lib_name, warn=True) - if result.return_code != 0 and "already installed and latest version" not in result: - Log.error("problem with install of {{lib}}", lib=lib_name) - - def _start_supervisor(self, conn): - conn.put("./examples/config/es_supervisor.conf", "/etc/supervisord.conf", use_sudo=True) - - # START DAEMON (OR THROW ERROR IF RUNNING ALREADY) - conn.sudo("supervisord -c /etc/supervisord.conf", warn=True) - - conn.sudo("supervisorctl reread") - conn.sudo("supervisorctl update") diff --git a/examples/scripts/install.sh b/examples/scripts/install.sh deleted file mode 100644 index e69de29..0000000 diff --git a/examples/scripts/run_es.bat b/examples/scripts/run_es.bat deleted file mode 100644 index 01c1373..0000000 --- a/examples/scripts/run_es.bat +++ /dev/null @@ -1,2 +0,0 @@ -SET PYTHONPATH=.;vendor -python spot\spot_manager.py --settings=./examples/config/es_settings.json diff --git a/examples/scripts/run_es.sh b/examples/scripts/run_es.sh deleted file mode 100755 index 2a311de..0000000 --- a/examples/scripts/run_es.sh +++ /dev/null @@ -1,3 +0,0 @@ -export PYTHONPATH=.:vendor -cd ~/SpotManager -python spot/spot_manager.py --settings=./examples/config/es_settings.json diff --git a/examples/scripts/run_es6.sh b/examples/scripts/run_es6.sh old mode 100644 new mode 100755 diff --git a/vendor/jx_base/__init__.py b/vendor/jx_base/__init__.py index 6c8f19f..bc7db29 100644 --- a/vendor/jx_base/__init__.py +++ b/vendor/jx_base/__init__.py @@ -5,21 +5,26 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals from uuid import uuid4 -from mo_dots import wrap, coalesce, listwrap -from mo_future import text_type -from mo_json import value2json +from jx_base.expressions import jx_expression +from jx_python.expressions import Literal, Python +from mo_dots import coalesce, listwrap, wrap +from mo_dots.datas import register_data +from mo_dots.lists import last +from mo_future import is_text, text +from mo_json import value2json, true, false, null from mo_logs import Log from mo_logs.strings import expand_template, quote +ENABLE_CONSTRAINTS = True + + def generateGuid(): """Gets a random GUID. Note: python's UUID generation library is used here. @@ -32,7 +37,7 @@ def generateGuid(): print(a) print(uuid.UUID(a).hex) """ - return text_type(uuid4()) + return text(uuid4()) def _exec(code, name): @@ -46,7 +51,7 @@ def _exec(code, name): Log.error("Can not make class\n{{code}}", code=code, cause=e) -_ = listwrap +_ = listwrap, last, true, false, null def DataClass(name, columns, constraint=None): @@ -72,18 +77,26 @@ def DataClass(name, columns, constraint=None): :return: The class that has been created """ - from jx_python.expressions import jx_expression - - columns = wrap([{"name": c, "required": True, "nulls": False, "type": object} if isinstance(c, text_type) else c for c in columns]) + columns = wrap( + [ + {"name": c, "required": True, "nulls": False, "type": object} + if is_text(c) + else c + for c in columns + ] + ) slots = columns.name - required = wrap(filter(lambda c: c.required and not c.nulls and not c.default, columns)).name + required = wrap( + filter(lambda c: c.required and not c.nulls and not c.default, columns) + ).name nulls = wrap(filter(lambda c: c.nulls, columns)).name defaults = {c.name: coalesce(c.default, None) for c in columns} - types = {c.name: coalesce(c.type, object) for c in columns} + types = {c.name: coalesce(c.jx_type, object) for c in columns} code = expand_template( -""" + """ from __future__ import unicode_literals +from mo_future import is_text, is_binary from collections import Mapping meta = None @@ -95,10 +108,16 @@ class {{class_name}}(Mapping): def _constraint(row, rownum, rows): - try: - return {{constraint_expr}} - except Exception as e: - return False + code = {{constraint_expr|quote}} + if {{constraint_expr}}: + return + Log.error( + "constraint\\n{" + "{code}}\\nnot satisfied {" + "{expect}}\\n{" + "{value|indent}}", + code={{constraint_expr|quote}}, + expect={{constraint}}, + value=row, + cause=e + ) def __init__(self, **kwargs): if not kwargs: @@ -115,8 +134,7 @@ class {{class_name}}(Mapping): if illegal: Log.error("{"+"{names}} are not a valid properties", names=illegal) - if not self._constraint(0, [self]): - Log.error("constraint not satisfied {"+"{expect}}\\n{"+"{value|indent}}", expect={{constraint}}, value=self) + self._constraint(0, [self]) def __getitem__(self, item): return getattr(self, item) @@ -128,9 +146,12 @@ class {{class_name}}(Mapping): def __setattr__(self, item, value): if item not in {{slots}}: Log.error("{"+"{item|quote}} not valid attribute", item=item) + + if value==None and item in {{required}}: + Log.error("Expecting property {"+"{item}}", item=item) + object.__setattr__(self, item, value) - if not self._constraint(0, [self]): - Log.error("constraint not satisfied {"+"{expect}}\\n{"+"{value|indent}}", expect={{constraint}}, value=self) + self._constraint(0, [self]) def __getattr__(self, item): Log.error("{"+"{item|quote}} not valid attribute", item=item) @@ -170,63 +191,72 @@ class {{class_name}}(Mapping): "slots": "(" + (", ".join(quote(s) for s in slots)) + ")", "required": "{" + (", ".join(quote(s) for s in required)) + "}", "nulls": "{" + (", ".join(quote(s) for s in nulls)) + "}", - "defaults": jx_expression({"literal": defaults}).to_python(), + "defaults": Literal(defaults).to_python(), "len_slots": len(slots), "dict": "{" + (", ".join(quote(s) + ": self." + s for s in slots)) + "}", - "assign": "; ".join("_set(output, "+quote(s)+", self."+s+")" for s in slots), - "types": "{" + (",".join(quote(k) + ": " + v.__name__ for k, v in types.items())) + "}", - "constraint_expr": jx_expression(constraint).to_python(), - "constraint": value2json(constraint) - } + "assign": "; ".join( + "_set(output, " + quote(s) + ", self." + s + ")" for s in slots + ), + "types": "{" + + (",".join(quote(k) + ": " + v.__name__ for k, v in types.items())) + + "}", + "constraint_expr": Python[jx_expression(not ENABLE_CONSTRAINTS or constraint)].to_python(), + "constraint": value2json(constraint), + }, ) - return _exec(code, name) + output = _exec(code, name) + register_data(output) + return output -class TableDesc(DataClass( +TableDesc = DataClass( "Table", - [ - "name", - "url", - "query_path", - "timestamp" - ], - constraint={"and": [ - {"eq": [{"last": "query_path"}, {"literal": "."}]} - ]} -)): - @property - def columns(self): - raise NotImplementedError() - # return singlton.get_columns(table_name=self.name) + ["name", "url", "query_path", {"name": "last_updated", "nulls": False}, "columns"], + constraint={"and": [{"eq": [{"last": "query_path"}, {"literal": "."}]}]}, +) Column = DataClass( "Column", [ - # "table", - "names", # MAP FROM TABLE NAME TO COLUMN NAME (ONE COLUMN CAN HAVE MULTIPLE NAMES) + "name", "es_column", "es_index", "es_type", - {"name": "jx_type", "nulls": True}, + "jx_type", {"name": "useSource", "default": False}, - {"name": "nested_path", "nulls": True}, # AN ARRAY OF PATHS (FROM DEEPEST TO SHALLOWEST) INDICATING THE JSON SUB-ARRAYS + "nested_path", # AN ARRAY OF PATHS (FROM DEEPEST TO SHALLOWEST) INDICATING THE JSON SUB-ARRAYS {"name": "count", "nulls": True}, {"name": "cardinality", "nulls": True}, - {"name": "multi", "nulls": True}, + {"name": "multi", "nulls": False}, {"name": "partitions", "nulls": True}, - {"name": "last_updated", "nulls": True} + "last_updated", ], - constraint={"and": [ - {"eq": [{"last": "nested_path"}, {"literal": "."}]} - ]} + constraint={ + "and": [ + {"not": {"find": {"es_column": "null"}}}, + {"not": {"eq": {"es_column": "string"}}}, + {"not": {"eq": {"es_type": "object", "jx_type": "exists"}}}, + {"eq": [{"last": "nested_path"}, {"literal": "."}]}, + { + "when": {"eq": [{"literal": ".~N~"}, {"right": {"es_column": 4}}]}, + "then": {"gt": {"multi": 1}}, + "else": True, + }, + { + "when": {"gte": [{"count": "nested_path"}, 2]}, + "then": {"ne": [{"first": {"right": {"nested_path": 2}}}, {"literal": "."}]}, # SECOND-LAST ELEMENT + "else": True + } + ] + }, ) - - from jx_base.container import Container from jx_base.namespace import Namespace from jx_base.facts import Facts from jx_base.snowflake import Snowflake from jx_base.table import Table from jx_base.schema import Schema + + diff --git a/vendor/jx_base/container.py b/vendor/jx_base/container.py index 05af5a9..f16e862 100644 --- a/vendor/jx_base/container.py +++ b/vendor/jx_base/container.py @@ -5,18 +5,14 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from collections import Mapping from copy import copy -from mo_dots import Data -from mo_dots import set_default, split_field, wrap, join_field -from mo_future import generator_types, text_type +from mo_dots import Data, is_data, is_many, join_field, set_default, split_field, wrap +from mo_future import is_text from mo_logs import Log type2container = Data() @@ -28,7 +24,6 @@ _Query = None def _delayed_imports(): - global type2container global _ListContainer global _Cube global _run @@ -47,9 +42,9 @@ def _delayed_imports(): class Container(object): """ - CONTAINERS HOLD MULTIPLE FACTS AND CAN HANDLE + CONTAINERS HOLD MULTIPLE INDICES AND CAN HANDLE GENERAL JSON QUERY EXPRESSIONS ON ITS CONTENTS - METADATA FOR A Container IS CALL A Namespace + METADATA FOR A Container IS CALLED A Namespace """ @@ -67,9 +62,9 @@ class Container(object): return frum elif isinstance(frum, _Query): return _run(frum) - elif isinstance(frum, (list, set) + generator_types): + elif is_many(frum): return _ListContainer(frum) - elif isinstance(frum, text_type): + elif is_text(frum): # USE DEFAULT STORAGE TO FIND Container if not config.default.settings: Log.error("expecting jx_base.container.config.default.settings to contain default elasticsearch connection info") @@ -83,7 +78,7 @@ class Container(object): ) settings.type = None # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY return type2container["elasticsearch"](settings) - elif isinstance(frum, Mapping): + elif is_data(frum): frum = wrap(frum) if frum.type and type2container[frum.type]: return type2container[frum.type](frum.settings) @@ -119,10 +114,6 @@ class Container(object): def window(self, window): raise NotImplementedError() - def having(self, having): - _ = having - raise NotImplementedError() - def format(self, format): _ = format raise NotImplementedError() diff --git a/vendor/jx_base/dimensions.py b/vendor/jx_base/dimensions.py index 2bc59d4..4c57adb 100644 --- a/vendor/jx_base/dimensions.py +++ b/vendor/jx_base/dimensions.py @@ -5,19 +5,14 @@ # License, v. 2.0. If a copy of the MPL was not distributed with self file, # You can obtain one at http:# mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from collections import Mapping +from __future__ import absolute_import, division, unicode_literals +from jx_base.domains import ALGEBRAIC, Domain, KNOWN +from mo_dots import Data, FlatList, Null, coalesce, is_data, is_list, join_field, listwrap, split_field, wrap import mo_dots as dot -from jx_base.domains import Domain, ALGEBRAIC, KNOWN -from mo_dots import Null, coalesce, join_field, split_field, Data -from mo_dots import wrap, listwrap -from mo_dots.lists import FlatList +from mo_future import transpose from mo_logs import Log from mo_math import SUM from mo_times.timer import Timer @@ -56,7 +51,7 @@ class Dimension(object): fields = coalesce(dim.field, dim.fields) if not fields: return # NO FIELDS TO SEARCH - elif isinstance(fields, Mapping): + elif is_data(fields): self.fields = wrap(fields) edges = wrap([{"name": k, "value": v, "allowNulls": False} for k, v in self.fields.items()]) else: @@ -88,7 +83,7 @@ class Dimension(object): temp = Data(partitions=[]) for i, count in enumerate(parts): a = dim.path(d.getEnd(d.partitions[i])) - if not isinstance(a, list): + if not is_list(a): Log.error("The path function on " + dim.name + " must return an ARRAY of parts") addParts( temp, @@ -98,7 +93,7 @@ class Dimension(object): ) self.value = coalesce(dim.value, "name") self.partitions = temp.partitions - elif isinstance(fields, Mapping): + elif is_data(fields): self.value = "name" # USE THE "name" ATTRIBUTE OF PARTS partitions = FlatList() @@ -135,7 +130,7 @@ class Dimension(object): array = parts.data.values()[0].cube # DIG DEEP INTO RESULT (ASSUME SINGLE VALUE CUBE, WITH NULL AT END) def edges2value(*values): - if isinstance(fields, Mapping): + if is_data(fields): output = Data() for e, v in transpose(edges, values): output[e.name] = v @@ -192,7 +187,7 @@ class Dimension(object): def getDomain(self, **kwargs): # kwargs.depth IS MEANT TO REACH INTO SUB-PARTITIONS kwargs = wrap(kwargs) - kwargs.depth = coalesce(kwargs.depth, len(self.fields)-1 if isinstance(self.fields, list) else None) + kwargs.depth = coalesce(kwargs.depth, len(self.fields)-1 if is_list(self.fields) else None) if not self.partitions and self.edges: # USE EACH EDGE AS A PARTITION, BUT isFacet==True SO IT ALLOWS THE OVERLAP diff --git a/vendor/jx_base/domains.py b/vendor/jx_base/domains.py index d5a699a..ae5935b 100644 --- a/vendor/jx_base/domains.py +++ b/vendor/jx_base/domains.py @@ -5,23 +5,17 @@ # License, v. 2.0. If a copy of the MPL was not distributed with self file, # You can obtain one at http:# mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import itertools -from collections import Mapping from numbers import Number -from mo_future import text_type - from jx_base.expressions import jx_expression from mo_collections.unique_index import UniqueIndex -from mo_dots import coalesce, Data, set_default, Null, listwrap -from mo_dots import wrap -from mo_dots.lists import FlatList +from mo_dots import Data, FlatList, Null, coalesce, is_container, is_data, listwrap, set_default, unwrap, wrap +from mo_future import text from mo_logs import Log from mo_math import MAX, MIN from mo_times.dates import Date @@ -210,7 +204,12 @@ class SimpleSetDomain(Domain): DOMAIN IS A LIST OF OBJECTS, EACH WITH A value PROPERTY """ - __slots__ = ["NULL", "partitions", "map", "order"] + __slots__ = [ + "NULL", # THE value FOR NULL + "partitions", # LIST OF {name, value, dataIndex} dicts + "map", # MAP FROM value TO name + "order" # MAP FROM value TO dataIndex + ] def __init__(self, **desc): Domain.__init__(self, **desc) @@ -225,7 +224,7 @@ class SimpleSetDomain(Domain): if isinstance(self.key, set): Log.error("problem") - if not desc.key and (len(desc.partitions)==0 or isinstance(desc.partitions[0], (text_type, Number, tuple))): + if not desc.key and (len(desc.partitions)==0 or isinstance(desc.partitions[0], (text, Number, tuple))): # ASSUME PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS self.key = "value" self.map = {} @@ -236,7 +235,7 @@ class SimpleSetDomain(Domain): self.map[p] = part self.order[p] = i if isinstance(p, (int, float)): - text_part = text_type(float(p)) # ES CAN NOT HANDLE NUMERIC PARTS + text_part = text(float(p)) # ES CAN NOT HANDLE NUMERIC PARTS self.map[text_part] = part self.order[text_part] = i self.label = coalesce(self.label, "name") @@ -246,15 +245,18 @@ class SimpleSetDomain(Domain): if desc.partitions and desc.dimension.fields and len(desc.dimension.fields) > 1: self.key = desc.key self.map = UniqueIndex(keys=desc.dimension.fields) - elif desc.partitions and isinstance(desc.key, (list, set)): + elif desc.partitions and is_container(desc.key): # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE self.key = desc.key self.map = UniqueIndex(keys=desc.key) - elif desc.partitions and isinstance(desc.partitions[0][desc.key], Mapping): + elif desc.partitions and is_data(desc.partitions[0][desc.key]): + # LOOKS LIKE OBJECTS + # sorted = desc.partitions[desc.key] + self.key = desc.key self.map = UniqueIndex(keys=desc.key) - # self.key = UNION(set(d[desc.key].keys()) for d in desc.partitions) - # self.map = UniqueIndex(keys=self.key) + self.order = {p[self.key]: p.dataIndex for p in desc.partitions} + self.partitions = desc.partitions elif len(desc.partitions) == 0: # CREATE AN EMPTY DOMAIN self.key = "value" @@ -376,7 +378,7 @@ class SetDomain(Domain): if isinstance(self.key, set): Log.error("problem") - if isinstance(desc.partitions[0], (int, float, text_type)): + if isinstance(desc.partitions[0], (int, float, text)): # ASSMUE PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS self.key = "value" self.order[None] = len(desc.partitions) @@ -388,14 +390,14 @@ class SetDomain(Domain): elif desc.partitions and desc.dimension.fields and len(desc.dimension.fields) > 1: self.key = desc.key self.map = UniqueIndex(keys=desc.dimension.fields) - elif desc.partitions and isinstance(desc.key, (list, set)): + elif desc.partitions and is_container(desc.key): # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE self.key = desc.key self.map = UniqueIndex(keys=desc.key) - elif desc.partitions and isinstance(desc.partitions[0][desc.key], Mapping): + elif desc.partitions and is_data(desc.partitions[0][desc.key]): self.key = desc.key self.map = UniqueIndex(keys=desc.key) - # self.key = UNION(set(d[desc.key].keys()) for d in desc.partitions) + # self.key = UNION(*set(d[desc.key].keys()) for d in desc.partitions) # self.map = UniqueIndex(keys=self.key) elif desc.key == None: Log.error("Domains must have keys") @@ -663,7 +665,7 @@ class RangeDomain(Domain): if not self.key: Log.error("Must have a key value") - parts = list(listwrap(self.partitions)) + parts =listwrap(self.partitions) for i, p in enumerate(parts): self.min = MIN([self.min, p.min]) self.max = MAX([self.max, p.max]) @@ -675,10 +677,10 @@ class RangeDomain(Domain): # VERIFY PARTITIONS DO NOT OVERLAP, HOLES ARE FINE for p, q in itertools.product(parts, parts): - if p is not q and p.min <= q.min and q.min < p.max: + if p.min <= q.min and q.min < p.max and unwrap(p) is not unwrap(q): Log.error("partitions overlap!") - self.partitions = parts + self.partitions = wrap(parts) return elif any([self.min == None, self.max == None, self.interval == None]): Log.error("Can not handle missing parameter") diff --git a/vendor/jx_base/expressions.py b/vendor/jx_base/expressions.py deleted file mode 100644 index 39788e3..0000000 --- a/vendor/jx_base/expressions.py +++ /dev/null @@ -1,2878 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -import operator -from collections import Mapping -from decimal import Decimal - -import mo_json -from jx_base.queries import is_variable_name, get_property_name -from mo_dots import coalesce, wrap, Null, split_field -from mo_future import text_type, utf8_json_encoder, get_function_name, zip_longest -from mo_json import scrub -from mo_json.typed_encoder import IS_NULL, OBJECT, BOOLEAN, python_type_to_json_type, NUMBER, INTEGER, STRING -from mo_logs import Log, Except -from mo_math import Math, MAX, MIN, UNION -from mo_times.dates import Date, unicode2Date - -ALLOW_SCRIPTING = False -EMPTY_DICT = {} - - -def extend(cls): - """ - DECORATOR TO ADD METHODS TO CLASSES - :param cls: THE CLASS TO ADD THE METHOD TO - :return: - """ - def extender(func): - setattr(cls, get_function_name(func), func) - return func - return extender - - -def simplified(func): - def mark_as_simple(self): - if self.simplified: - return self - - output = func(self) - output.simplified = True - return output - return mark_as_simple - - -def jx_expression(expr, schema=None): - # UPDATE THE VARIABLE WITH THIER KNOWN TYPES - output = _jx_expression(expr) - if not schema: - return output - for v in output.vars(): - leaves = schema.leaves(v.var) - if len(leaves) == 0: - v.data_type = IS_NULL - if len(leaves) == 1: - v.data_type = list(leaves)[0].jx_type - return output - - -def _jx_expression(expr): - """ - WRAP A JSON EXPRESSION WITH OBJECT REPRESENTATION - """ - if isinstance(expr, Expression): - Log.error("Expecting JSON, not expression") - - if expr is None: - return TRUE - elif expr in (True, False, None) or expr == None or isinstance(expr, (float, int, Decimal, Date)): - return Literal(None, expr) - elif isinstance(expr, text_type): - return Variable(expr) - elif isinstance(expr, (list, tuple)): - return TupleOp("tuple", list(map(jx_expression, expr))) # FORMALIZE - - expr = wrap(expr) - try: - items = expr.items() - except Exception as e: - Log.error("programmer error expr = {{value|quote}}", value=expr, cause=e) - - for item in items: - op, term = item - class_ = operators.get(op) - if class_: - return class_.define(expr) - else: - if not items: - return NULL - raise Log.error("{{operator|quote}} is not a known operator", operator=op) - - -class Expression(object): - data_type = OBJECT - has_simple_form = False - - def __init__(self, op, terms): - self.simplified = False - if isinstance(terms, (list, tuple)): - if not all(isinstance(t, Expression) for t in terms): - Log.error("Expecting an expression") - elif isinstance(terms, Mapping): - if not all(isinstance(k, Variable) and isinstance(v, Literal) for k, v in terms.items()): - Log.error("Expecting an {: }") - elif terms == None: - pass - else: - if not isinstance(terms, Expression): - Log.error("Expecting an expression") - - @classmethod - def define(cls, expr): - """ - GENERAL SUPPORT FOR BUILDING EXPRESSIONS FROM JSON EXPRESSIONS - OVERRIDE THIS IF AN OPERATOR EXPECTS COMPLICATED PARAMETERS - :param expr: Data representing a JSON Expression - :return: Python parse tree - """ - - try: - items = expr.items() - except Exception as e: - Log.error("programmer error expr = {{value|quote}}", value=expr, cause=e) - - for item in items: - op, term = item - class_ = operators.get(op) - if class_: - clauses = {k: jx_expression(v) for k, v in expr.items() if k != op} - break - else: - if not items: - return NULL - raise Log.error("{{operator|quote}} is not a known operator", operator=op) - - if term == None: - return class_(op, [], **clauses) - elif isinstance(term, list): - terms = list(map(_jx_expression, term)) - return class_(op, terms, **clauses) - elif isinstance(term, Mapping): - items = term.items() - if class_.has_simple_form: - if len(items) == 1: - k, v = items[0] - return class_(op, [Variable(k), Literal(None, v)], **clauses) - else: - return class_(op, {k: Literal(None, v) for k, v in items}, **clauses) - else: - return class_(op, jx_expression(term), **clauses) - else: - if op in ["literal", "date", "offset"]: - return class_(op, term, **clauses) - else: - return class_(op, _jx_expression(term), **clauses) - - @property - def name(self): - return self.__class__.__name__ - - @property - def many(self): - """ - :return: True IF THE EXPRESSION RETURNS A MULTIVALUE (WHICH IS NOT A LIST OR A TUPLE) - """ - return False - - def __data__(self): - raise NotImplementedError - - def vars(self): - raise Log.error("{{type}} has no `vars` method", type=self.__class__.__name__) - - def map(self, map): - raise Log.error("{{type}} has no `map` method", type=self.__class__.__name__) - - def missing(self): - """ - THERE IS PLENTY OF OPPORTUNITY TO SIMPLIFY missing EXPRESSIONS - OVERRIDE THIS METHOD TO SIMPLIFY - :return: - """ - if self.type == BOOLEAN: - Log.error("programmer error") - return MissingOp("missing", self) - - def exists(self): - """ - THERE IS PLENTY OF OPPORTUNITY TO SIMPLIFY exists EXPRESSIONS - OVERRIDE THIS METHOD TO SIMPLIFY - :return: - """ - return NotOp("not", self.missing()) - - def is_true(self): - """ - :return: True, IF THIS EXPRESSION ALWAYS RETURNS BOOLEAN true - """ - return FALSE # GOOD DEFAULT ASSUMPTION - - def is_false(self): - """ - :return: True, IF THIS EXPRESSION ALWAYS RETURNS BOOLEAN false - """ - return FALSE # GOOD DEFAULT ASSUMPTION - - def partial_eval(self): - """ - ATTEMPT TO SIMPLIFY THE EXPRESSION: - PREFERABLY RETURNING A LITERAL, BUT MAYBE A SIMPLER EXPRESSION, OR self IF NOT POSSIBLE - """ - self.simplified = True - return self - - @property - def type(self): - return self.data_type - - def __eq__(self, other): - Log.note("this is slow on {{type}}", type=text_type(self.__class__.__name__)) - if other is None: - return False - return self.__data__() == other.__data__() - - -class Variable(Expression): - - def __init__(self, var): - """ - :param var: DOT DELIMITED PATH INTO A DOCUMENT - :param verify: True - VERIFY THIS IS A VALID NAME (use False for trusted code only) - """ - Expression.__init__(self, "", None) - self.var = get_property_name(var) - - def __call__(self, row, rownum=None, rows=None): - path = split_field(self.var) - for p in path: - row = row.get(p) - if row is None: - return None - if isinstance(row, list) and len(row) == 1: - return row[0] - return row - - def __data__(self): - return self.var - - @property - def many(self): - return True - - def vars(self): - return {self} - - def map(self, map_): - return Variable(coalesce(map_.get(self.var), self.var)) - - def __hash__(self): - return self.var.__hash__() - - def __eq__(self, other): - if isinstance(other, Variable): - return self.var == other.var - elif isinstance(other, text_type): - return self.var == other - return False - - def __unicode__(self): - return self.var - - def __str__(self): - return str(self.var) -IDENTITY = Variable(".") - - -class OffsetOp(Expression): - """ - OFFSET INDEX INTO A TUPLE - """ - - def __init__(self, op, var): - Expression.__init__(self, "offset", None) - if not Math.is_integer(var): - Log.error("Expecting an integer") - self.var = var - - def __call__(self, row, rownum=None, rows=None): - try: - return row[self.var] - except Exception: - return None - - def __data__(self): - return {"offset": self.var} - - def vars(self): - return {} - - def __hash__(self): - return self.var.__hash__() - - def __eq__(self, other): - return self.var == other - - def __unicode__(self): - return text_type(self.var) - - def __str__(self): - return str(self.var) - - -class RowsOp(Expression): - has_simple_form = True - - def __init__(self, op, term): - Expression.__init__(self, op, term) - self.var, self.offset = term - if isinstance(self.var, Variable): - if isinstance(self.var, Variable) and not any(self.var.var.startswith(p) for p in ["row.", "rows.", "rownum"]): # VARIABLES ARE INTERPRETED LITERALLY - self.var = Literal("literal", self.var.var) - else: - Log.error("can not handle") - else: - Log.error("can not handle") - - def __data__(self): - if isinstance(self.var, Literal) and isinstance(self.offset, Literal): - return {"rows": {self.var.json, self.offset.value}} - else: - return {"rows": [self.var.__data__(), self.offset.__data__()]} - - def vars(self): - return self.var.vars() | self.offset.vars() | {"rows", "rownum"} - - def map(self, map_): - return BinaryOp("rows", [self.var.map(map_), self.offset.map(map_)]) - - -class GetOp(Expression): - has_simple_form = True - - def __init__(self, op, term): - Expression.__init__(self, op, term) - self.var, self.offset = term - - def __data__(self): - if isinstance(self.var, Literal) and isinstance(self.offset, Literal): - return {"get": {self.var.json, self.offset.value}} - else: - return {"get": [self.var.__data__(), self.offset.__data__()]} - - def vars(self): - return self.var.vars() | self.offset.vars() - - def map(self, map_): - return BinaryOp("get", [self.var.map(map_), self.offset.map(map_)]) - - -class SelectOp(Expression): - has_simple_form = True - - def __init__(self, op, terms): - self.terms = terms - - @classmethod - def define(cls, expr): - term = expr.select - terms = [] - if not isinstance(term, list): - raise Log.error("Expecting a list") - for t in term: - if isinstance(t, text_type): - if not is_variable_name(t): - Log.error("expecting {{value}} a simple dot-delimited path name", value=t) - terms.append({"name": t, "value": jx_expression(t)}) - elif t.name == None: - if t.value == None: - Log.error("expecting select parameters to have name and value properties") - elif isinstance(t.value, text_type): - if not is_variable_name(t): - Log.error("expecting {{value}} a simple dot-delimited path name", value=t.value) - else: - terms.append({"name": t.value, "value": jx_expression(t.value)}) - else: - Log.error("expecting a name property") - else: - terms.append({"name": t.name, "value": jx_expression(t.value)}) - return SelectOp("select", terms) - - def __data__(self): - return {"select": [ - { - "name": t.name.__data__(), - "value": t.value.__data__() - } - for t in self.terms - ]} - - def vars(self): - return UNION(t.value for t in self.terms) - - def map(self, map_): - return SelectOp("select", [ - {"name": t.name, "value": t.value.map(map_)} - for t in self.terms - ]) - - -class ScriptOp(Expression): - """ - ONLY FOR WHEN YOU TRUST THE SCRIPT SOURCE - """ - - def __init__(self, op, script, data_type=OBJECT): - Expression.__init__(self, op, None) - if not isinstance(script, text_type): - Log.error("expecting text of a script") - self.simplified = True - self.script = script - self.data_type = data_type - - @classmethod - def define(cls, expr): - if ALLOW_SCRIPTING: - Log.warning("Scripting has been activated: This has known security holes!!\nscript = {{script|quote}}", script=expr.script.term) - return ScriptOp("script", expr.script) - else: - Log.error("scripting is disabled") - - def vars(self): - return set() - - def map(self, map_): - return self - - def __unicode__(self): - return self.script - - def __str__(self): - return str(self.script) - - -_json_encoder = utf8_json_encoder - - -def value2json(value): - try: - scrubbed = scrub(value, scrub_number=float) - return text_type(_json_encoder(scrubbed)) - except Exception as e: - e = Except.wrap(e) - Log.warning("problem serializing {{type}}", type=text_type(repr(value)), cause=e) - raise e - - -class Literal(Expression): - """ - A literal JSON document - """ - - def __new__(cls, op, term): - if term == None: - return NULL - if term is True: - return TRUE - if term is False: - return FALSE - if isinstance(term, Mapping) and term.date: - # SPECIAL CASE - return DateOp(None, term.date) - return object.__new__(cls) - - def __init__(self, op, term): - Expression.__init__(self, "", None) - self.simplified = True - self.term = term - - @classmethod - def define(cls, expr): - return Literal(None, expr.literal) - - def __nonzero__(self): - return True - - def __eq__(self, other): - if other == None: - if self.term == None: - return True - else: - return False - elif self.term == None: - return False - - if isinstance(other, Literal): - return (self.term == other.term) or (self.json == other.json) - - def __data__(self): - return {"literal": self.value} - - @property - def value(self): - return self.term - - @property - def json(self): - if self.term == "": - self._json = '""' - else: - self._json = value2json(self.term) - - return self._json - - def vars(self): - return set() - - def map(self, map_): - return self - - def missing(self): - if self.term in [None, Null]: - return TRUE - if self.value == '': - return TRUE - return FALSE - - def __call__(self, row=None, rownum=None, rows=None): - return self.value - - def __unicode__(self): - return self._json - - def __str__(self): - return str(self._json) - - @property - def type(self): - return python_type_to_json_type[self.term.__class__] - - def partial_eval(self): - return self -ZERO = Literal("literal", 0) -ONE = Literal("literal", 1) - - -class NullOp(Literal): - """ - FOR USE WHEN EVERYTHING IS EXPECTED TO BE AN Expression - USE IT TO EXPECT A NULL VALUE IN assertAlmostEqual - """ - data_type = OBJECT - - def __new__(cls, *args, **kwargs): - return object.__new__(cls, *args, **kwargs) - - def __init__(self, op=None, term=None): - Literal.__init__(self, op, None) - - def __nonzero__(self): - return False - - def __eq__(self, other): - return other == None - - def __gt__(self, other): - return False - - def __lt__(self, other): - return False - - def __ge__(self, other): - if other == None: - return True - return False - - def __le__(self, other): - if other == None: - return True - return False - - def __data__(self): - return {"null": {}} - - def vars(self): - return set() - - def map(self, map_): - return self - - def missing(self): - return TRUE - - def exists(self): - return FALSE - - def __call__(self, row=None, rownum=None, rows=None): - return Null - - def __unicode__(self): - return "null" - - def __str__(self): - return b"null" - - def __data__(self): - return None -NULL = NullOp() - - -class TrueOp(Literal): - data_type = BOOLEAN - - def __new__(cls, *args, **kwargs): - return object.__new__(cls, *args, **kwargs) - - def __init__(self, op=None, term=None): - Literal.__init__(self, op, True) - - @classmethod - def define(cls, expr): - return TRUE - - def __nonzero__(self): - return True - - def __eq__(self, other): - return (other is TRUE) or (other is True) - - def __data__(self): - return True - - def vars(self): - return set() - - def map(self, map_): - return self - - def missing(self): - return FALSE - - def is_true(self): - return TRUE - - def is_false(self): - return FALSE - - def __call__(self, row=None, rownum=None, rows=None): - return True - - def __unicode__(self): - return "true" - - def __str__(self): - return b"true" -TRUE = TrueOp() - - -class FalseOp(Literal): - data_type = BOOLEAN - - def __new__(cls, *args, **kwargs): - return object.__new__(cls, *args, **kwargs) - - def __init__(self, op=None, term=None): - Literal.__init__(self, op, False) - - @classmethod - def define(cls, expr): - return FALSE - - def __nonzero__(self): - return False - - def __eq__(self, other): - return (other is FALSE) or (other is False) - - def __data__(self): - return False - - def vars(self): - return set() - - def map(self, map_): - return self - - def missing(self): - return FALSE - - def is_true(self): - return FALSE - - def is_false(self): - return TRUE - - def __call__(self, row=None, rownum=None, rows=None): - return False - - def __unicode__(self): - return "false" - - def __str__(self): - return b"false" -FALSE = FalseOp() - - -class DateOp(Literal): - date_type = NUMBER - - def __init__(self, op, term): - if hasattr(self, "date"): - return - if isinstance(term, text_type): - self.date = term - else: - self.date = coalesce(term.literal, term) - v = unicode2Date(self.date) - if isinstance(v, Date): - Literal.__init__(self, op, v.unix) - else: - Literal.__init__(self, op, v.seconds) - - @classmethod - def define(cls, expr): - return DateOp("date", expr.date) - - def __data__(self): - return {"date": self.date} - - def __call__(self, row=None, rownum=None, rows=None): - return Date(self.date) - - -class TupleOp(Expression): - date_type = OBJECT - - def __init__(self, op, terms): - Expression.__init__(self, op, terms) - if terms == None: - self.terms = [] - elif isinstance(terms, list): - self.terms = terms - else: - self.terms = [terms] - - def __data__(self): - return {"tuple": [t.__data__() for t in self.terms]} - - def vars(self): - output = set() - for t in self.terms: - output |= t.vars() - return output - - def map(self, map_): - return TupleOp("tuple", [t.map(map_) for t in self.terms]) - - def missing(self): - return FALSE - - -class LeavesOp(Expression): - date_type = OBJECT - - def __init__(self, op, term, prefix=None): - Expression.__init__(self, op, term) - self.term = term - self.prefix = prefix - - def __data__(self): - if self.prefix: - return {"leaves": self.term.__data__(), "prefix": self.prefix} - else: - return {"leaves": self.term.__data__()} - - def vars(self): - return self.term.vars() - - def map(self, map_): - return LeavesOp("leaves", self.term.map(map_)) - - def missing(self): - return FALSE - - -class BinaryOp(Expression): - has_simple_form = True - data_type = NUMBER - - operators = { - "sub": "-", - "subtract": "-", - "minus": "-", - "mul": "*", - "mult": "*", - "multiply": "*", - "div": "/", - "divide": "/", - "exp": "**", - "mod": "%" - } - - def __init__(self, op, terms, default=NULL): - Expression.__init__(self, op, terms) - if op not in BinaryOp.operators: - Log.error("{{op|quote}} not a recognized operator", op=op) - self.op = op - self.lhs, self.rhs = terms - self.default = default - - @property - def name(self): - return self.op; - - def __data__(self): - if isinstance(self.lhs, Variable) and isinstance(self.rhs, Literal): - return {self.op: {self.lhs.var, self.rhs.value}, "default": self.default} - else: - return {self.op: [self.lhs.__data__(), self.rhs.__data__()], "default": self.default} - - def vars(self): - return self.lhs.vars() | self.rhs.vars() | self.default.vars() - - def map(self, map_): - return BinaryOp(self.op, [self.lhs.map(map_), self.rhs.map(map_)], default=self.default.map(map_)) - - def missing(self): - if self.default.exists(): - return FALSE - else: - return OrOp("or", [self.lhs.missing(), self.rhs.missing()]) - - # @simplified - # def partial_eval(self): - # lhs = FirstOp("first", self.lhs).partial_eval() - # rhs = FirstOp("first", self.rhs).partial_eval() - # default_ = FirstOp("first", self.default).partial_eval() - # return BinaryOp(self.op, [lhs, rhs], default=default_) - - -class InequalityOp(Expression): - has_simple_form = True - data_type = BOOLEAN - - operators = { - "gt": ">", - "gte": ">=", - "lte": "<=", - "lt": "<" - } - - def __init__(self, op, terms): - Expression.__init__(self, op, terms) - if op not in InequalityOp.operators: - Log.error("{{op|quote}} not a recognized operator", op=op) - self.op = op - self.lhs, self.rhs = terms - - @property - def name(self): - return self.op; - - def __data__(self): - if isinstance(self.lhs, Variable) and isinstance(self.rhs, Literal): - return {self.op: {self.lhs.var, self.rhs.value}} - else: - return {self.op: [self.lhs.__data__(), self.rhs.__data__()]} - - def __eq__(self, other): - if not isinstance(other, InequalityOp): - return False - return self.op == other.op and self.lhs == other.lhs and self.rhs == other.rhs - - def vars(self): - return self.lhs.vars() | self.rhs.vars() - - def map(self, map_): - return InequalityOp(self.op, [self.lhs.map(map_), self.rhs.map(map_)]) - - def missing(self): - return FALSE - - @simplified - def partial_eval(self): - lhs = self.lhs.partial_eval() - rhs = self.rhs.partial_eval() - - if isinstance(lhs, Literal) and isinstance(rhs, Literal): - return Literal(None, builtin_ops[self.op](lhs, rhs)) - - return InequalityOp(self.op, [lhs, rhs]) - - -class DivOp(Expression): - has_simple_form = True - data_type = NUMBER - - def __init__(self, op, terms, default=NULL): - Expression.__init__(self, op, terms) - self.lhs, self.rhs = terms - self.default = default - - def __data__(self): - if isinstance(self.lhs, Variable) and isinstance(self.rhs, Literal): - return {"div": {self.lhs.var, self.rhs.value}, "default": self.default} - else: - return {"div": [self.lhs.__data__(), self.rhs.__data__()], "default": self.default} - - def vars(self): - return self.lhs.vars() | self.rhs.vars() | self.default.vars() - - def map(self, map_): - return DivOp("div", [self.lhs.map(map_), self.rhs.map(map_)], default=self.default.map(map_)) - - def missing(self): - return AndOp("and", [ - self.default.missing(), - OrOp("or", [self.lhs.missing(), self.rhs.missing(), EqOp("eq", [self.rhs, ZERO])]) - ]).partial_eval() - - -class FloorOp(Expression): - has_simple_form = True - data_type = NUMBER - - def __init__(self, op, terms, default=NULL): - Expression.__init__(self, op, terms) - if len(terms) == 1: - self.lhs = terms[0] - self.rhs = ONE - else: - self.lhs, self.rhs = terms - self.default = default - - def __data__(self): - if isinstance(self.lhs, Variable) and isinstance(self.rhs, Literal): - return {"floor": {self.lhs.var, self.rhs.value}, "default": self.default} - else: - return {"floor": [self.lhs.__data__(), self.rhs.__data__()], "default": self.default} - - def vars(self): - return self.lhs.vars() | self.rhs.vars() | self.default.vars() - - def map(self, map_): - return FloorOp("floor", [self.lhs.map(map_), self.rhs.map(map_)], default=self.default.map(map_)) - - def missing(self): - if self.default.exists(): - return FALSE - else: - return OrOp("or", [self.lhs.missing(), self.rhs.missing(), EqOp("eq", [self.rhs, ZERO])]) - - -class EqOp(Expression): - has_simple_form = True - data_type = BOOLEAN - - def __new__(cls, op, terms): - if isinstance(terms, list): - return object.__new__(cls) - - items = terms.items() - if len(items) == 1: - if isinstance(items[0][1], list): - return InOp("in", items[0]) - else: - return EqOp("eq", items[0]) - else: - acc = [] - for lhs, rhs in items: - if rhs.json.startswith("["): - acc.append(InOp("in", [Variable(lhs), rhs])) - else: - acc.append(EqOp("eq", [Variable(lhs), rhs])) - return AndOp("and", acc) - - def __init__(self, op, terms): - Expression.__init__(self, op, terms) - self.op = op - self.lhs, self.rhs = terms - - def __data__(self): - if isinstance(self.lhs, Variable) and isinstance(self.rhs, Literal): - return {"eq": {self.lhs.var, self.rhs.value}} - else: - return {"eq": [self.lhs.__data__(), self.rhs.__data__()]} - - def __eq__(self, other): - if isinstance(other, EqOp): - return self.lhs == other.lhs and self.rhs == other.rhs - return False - - def vars(self): - return self.lhs.vars() | self.rhs.vars() - - def map(self, map_): - return EqOp(self.op, [self.lhs.map(map_), self.rhs.map(map_)]) - - def missing(self): - return FALSE - - def exists(self): - return TRUE - - @simplified - def partial_eval(self): - lhs = self.lhs.partial_eval() - rhs = self.rhs.partial_eval() - - if isinstance(lhs, Literal) and isinstance(rhs, Literal): - return TRUE if builtin_ops["eq"](lhs.value, rhs.value) else FALSE - else: - return CaseOp( - "case", - [ - WhenOp("when", lhs.missing(), **{"then": rhs.missing()}), - WhenOp("when", rhs.missing(), **{"then": FALSE}), - BasicEqOp("eq", [lhs, rhs]) - ] - ).partial_eval() - - -class NeOp(Expression): - has_simple_form = True - data_type = BOOLEAN - - def __init__(self, op, terms): - Expression.__init__(self, op, terms) - if isinstance(terms, (list, tuple)): - self.lhs, self.rhs = terms - elif isinstance(terms, Mapping): - self.rhs, self.lhs = terms.items()[0] - else: - Log.error("logic error") - - def __data__(self): - if isinstance(self.lhs, Variable) and isinstance(self.rhs, Literal): - return {"ne": {self.lhs.var, self.rhs.value}} - else: - return {"ne": [self.lhs.__data__(), self.rhs.__data__()]} - - def vars(self): - return self.lhs.vars() | self.rhs.vars() - - def map(self, map_): - return NeOp("ne", [self.lhs.map(map_), self.rhs.map(map_)]) - - def missing(self): - return FALSE # USING THE decisive EQUAILTY https://github.com/mozilla/jx-sqlite/blob/master/docs/Logical%20Equality.md#definitions - - @simplified - def partial_eval(self): - output = NotOp("not", EqOp("eq", [self.lhs, self.rhs])).partial_eval() - return output - - -class NotOp(Expression): - data_type = BOOLEAN - - def __init__(self, op, term): - Expression.__init__(self, op, term) - self.term = term - - def __data__(self): - return {"not": self.term.__data__()} - - def __eq__(self, other): - if not isinstance(other, NotOp): - return False - return self.term == other.term - - def vars(self): - return self.term.vars() - - def map(self, map_): - return NotOp("not", self.term.map(map_)) - - def missing(self): - return self.term.missing() - - @simplified - def partial_eval(self): - def inverse(term): - if term is TRUE: - return FALSE - elif term is FALSE: - return TRUE - elif isinstance(term, NullOp): - return TRUE - elif isinstance(term, Literal): - Log.error("`not` operator expects a Boolean term") - elif isinstance(term, WhenOp): - output = WhenOp( - "when", - term.when, - **{"then": inverse(term.then), "else": inverse(term.els_)} - ).partial_eval() - elif isinstance(term, CaseOp): - output = CaseOp( - "case", - [ - WhenOp("when", w.when, **{"then": inverse(w.then)}) if isinstance(w, WhenOp) else inverse(w) - for w in term.whens - ] - ).partial_eval() - elif isinstance(term, AndOp): - output = OrOp("or", [inverse(t) for t in term.terms]).partial_eval() - elif isinstance(term, OrOp): - output = AndOp("and", [inverse(t) for t in term.terms]).partial_eval() - elif isinstance(term, MissingOp): - output = NotOp("not", term.expr.missing()) - elif isinstance(term, ExistsOp): - output = term.field.missing().partial_eval() - elif isinstance(term, NotOp): - output = term.term.partial_eval() - elif isinstance(term, NeOp): - output = EqOp("eq", [term.lhs, term.rhs]).partial_eval() - elif isinstance(term, (BasicIndexOfOp, BasicSubstringOp)): - return FALSE - else: - output = NotOp("not", term) - - return output - - output = inverse(self.term.partial_eval()) - return output - - -class AndOp(Expression): - data_type = BOOLEAN - - def __init__(self, op, terms): - Expression.__init__(self, op, terms) - if terms == None: - self.terms = [] - elif isinstance(terms, list): - self.terms = terms - else: - self.terms = [terms] - - def __data__(self): - return {"and": [t.__data__() for t in self.terms]} - - def __eq__(self, other): - if isinstance(other, AndOp): - return all(a == b for a, b in zip_longest(self.terms, other.terms)) - return False - - def vars(self): - output = set() - for t in self.terms: - output |= t.vars() - return output - - def map(self, map_): - return AndOp("and", [t.map(map_) for t in self.terms]) - - def missing(self): - return FALSE - - @simplified - def partial_eval(self): - or_terms = [[]] # LIST OF TUPLES FOR or-ing and and-ing - for i, t in enumerate(self.terms): - simple = BooleanOp("boolean", t).partial_eval() - if simple is TRUE: - continue - elif simple is FALSE: - return FALSE - elif isinstance(simple, AndOp): - for and_terms in or_terms: - and_terms.extend([tt for tt in simple.terms if tt not in and_terms]) - continue - elif isinstance(simple, OrOp): - or_terms = [ - and_terms + [o] - for o in simple.terms - for and_terms in or_terms - ] - continue - elif simple.type != BOOLEAN: - Log.error("expecting boolean value") - - for and_terms in list(or_terms): - if NotOp("not", simple).partial_eval() in and_terms: - or_terms.remove(and_terms) - elif simple not in and_terms: - and_terms.append(simple) - - if len(or_terms) == 1: - and_terms = or_terms[0] - if len(and_terms) == 0: - return TRUE - elif len(and_terms) == 1: - return and_terms[0] - else: - return AndOp("and", and_terms) - - return OrOp("or", [ - AndOp("and", and_terms) if len(and_terms) > 1 else and_terms[0] - for and_terms in or_terms - ]) - -class OrOp(Expression): - data_type = BOOLEAN - - def __init__(self, op, terms): - Expression.__init__(self, op, terms) - self.terms = terms - - def __data__(self): - return {"or": [t.__data__() for t in self.terms]} - - def vars(self): - output = set() - for t in self.terms: - output |= t.vars() - return output - - def map(self, map_): - return OrOp("or", [t.map(map_) for t in self.terms]) - - def missing(self): - return FALSE - - def __call__(self, row=None, rownum=None, rows=None): - return any(t(row, rownum, rows) for t in self.terms) - - def __eq__(self, other): - if not isinstance(other, OrOp): - return False - if len(self.terms) != len(other.terms): - return False - return all(t == u for t, u in zip(self.terms, other.terms)) - - @simplified - def partial_eval(self): - terms = [] - ands = [] - for t in self.terms: - simple = t.partial_eval() - if simple is TRUE: - return TRUE - elif simple in (FALSE, NULL): - pass - elif isinstance(simple, OrOp): - terms.extend(tt for tt in simple.terms if tt not in terms) - elif isinstance(simple, AndOp): - ands.append(simple) - elif simple.type != BOOLEAN: - Log.error("expecting boolean value") - elif simple not in terms: - terms.append(simple) - - if ands: # REMOVE TERMS THAT ARE MORE RESTRICTIVE THAN OTHERS - for a in ands: - for tt in a.terms: - if tt in terms: - break - else: - terms.append(a) - - if len(terms) == 0: - return FALSE - if len(terms) == 1: - return terms[0] - return OrOp("or", terms) - - -class LengthOp(Expression): - data_type = INTEGER - - def __init__(self, op, term): - Expression.__init__(self, op, [term]) - self.term = term - - def __eq__(self, other): - if isinstance(other, LengthOp): - return self.term == other.term - - def __data__(self): - return {"length": self.term.__data__()} - - def vars(self): - return self.term.vars() - - def map(self, map_): - return LengthOp("length", self.term.map(map_)) - - def missing(self): - return self.term.missing() - - @simplified - def partial_eval(self): - term = self.term.partial_eval() - if isinstance(term, Literal): - if isinstance(term.value, text_type): - return Literal(None, len(term.value)) - else: - return NULL - else: - return LengthOp("length", term) - -class FirstOp(Expression): - def __init__(self, op, term): - Expression.__init__(self, op, [term]) - self.term = term - self.data_type = self.term.type - - def __data__(self): - return {"first": self.term.__data__()} - - def vars(self): - return self.term.vars() - - def map(self, map_): - return LastOp("last", self.term.map(map_)) - - def missing(self): - return self.term.missing() - - @simplified - def partial_eval(self): - term = self.term.partial_eval() - if isinstance(self.term, FirstOp): - return term - elif term.type != OBJECT and not term.many: - return term - elif term is NULL: - return term - elif isinstance(term, Literal): - Log.error("not handled yet") - else: - return FirstOp("first", term) - - -class LastOp(Expression): - def __init__(self, op, term): - Expression.__init__(self, op, [term]) - self.term = term - self.data_type = self.term.type - - def __data__(self): - return {"last": self.term.__data__()} - - def vars(self): - return self.term.vars() - - def map(self, map_): - return LastOp("last", self.term.map(map_)) - - def missing(self): - return self.term.missing() - - @simplified - def partial_eval(self): - term = self.term.partial_eval() - if isinstance(self.term, LastOp): - return term - elif term.type != OBJECT and not term.many: - return term - elif term is NULL: - return term - elif isinstance(term, Literal): - if isinstance(term, list): - if len(term)>0: - return term[-1] - return NULL - return term - else: - return LastOp("last", term) - - -class BooleanOp(Expression): - data_type = BOOLEAN - - def __init__(self, op, term): - Expression.__init__(self, op, [term]) - self.term = term - - def __data__(self): - return {"boolean": self.term.__data__()} - - def vars(self): - return self.term.vars() - - def map(self, map_): - return BooleanOp("boolean", self.term.map(map_)) - - def missing(self): - return self.term.missing() - - @simplified - def partial_eval(self): - term = self.term.partial_eval() - if term is TRUE: - return TRUE - elif term in (FALSE, NULL): - return FALSE - elif term.type == BOOLEAN: - return term - - is_missing = NotOp("not", term.missing()).partial_eval() - return is_missing - - -class IsBooleanOp(Expression): - data_type = BOOLEAN - - def __init__(self, op, term): - Expression.__init__(self, op, [term]) - self.term = term - - def __data__(self): - return {"is_boolean": self.term.__data__()} - - def vars(self): - return self.term.vars() - - def map(self, map_): - return IsBooleanOp("is_boolean", self.term.map(map_)) - - def missing(self): - return FALSE - - -class IntegerOp(Expression): - data_type = INTEGER - - def __init__(self, op, term): - Expression.__init__(self, op, [term]) - self.term = term - - def __data__(self): - return {"integer": self.term.__data__()} - - def vars(self): - return self.term.vars() - - def map(self, map_): - return IntegerOp("integer", self.term.map(map_)) - - def missing(self): - return self.term.missing() - - @simplified - def partial_eval(self): - term = FirstOp("first", self.term).partial_eval() - if isinstance(term, CoalesceOp): - return CoalesceOp("coalesce", [IntegerOp("integer", t) for t in term.terms]) - if term.type == INTEGER: - return term - return IntegerOp("integer", term) - - -class IsIntegerOp(Expression): - data_type = BOOLEAN - - def __init__(self, op, term): - Expression.__init__(self, op, [term]) - self.term = term - - def __data__(self): - return {"is_integer": self.term.__data__()} - - def vars(self): - return self.term.vars() - - def map(self, map_): - return IsIntegerOp("is_integer", self.term.map(map_)) - - def missing(self): - return FALSE - - -class NumberOp(Expression): - data_type = NUMBER - - def __init__(self, op, term): - Expression.__init__(self, op, [term]) - self.term = term - - def __data__(self): - return {"number": self.term.__data__()} - - def vars(self): - return self.term.vars() - - def map(self, map_): - return NumberOp("number", self.term.map(map_)) - - def missing(self): - return self.term.missing() - - @simplified - def partial_eval(self): - term = FirstOp("first", self.term).partial_eval() - if isinstance(term, CoalesceOp): - return CoalesceOp("coalesce", [NumberOp("number", t) for t in term.terms]) - return self - -class IsNumberOp(Expression): - data_type = BOOLEAN - - def __init__(self, op, term): - Expression.__init__(self, op, [term]) - self.term = term - - def __data__(self): - return {"is_number": self.term.__data__()} - - def vars(self): - return self.term.vars() - - def map(self, map_): - return IsNumberOp("is_number", self.term.map(map_)) - - def missing(self): - return FALSE - - @simplified - def partial_eval(self): - term = self.term.partial_eval() - - if isinstance(term, NullOp): - return FALSE - elif term.type in (INTEGER, NUMBER): - return TRUE - elif term.type == OBJECT: - return self - else: - return FALSE - - - -class StringOp(Expression): - data_type = STRING - - def __init__(self, op, term): - Expression.__init__(self, op, [term]) - self.term = term - - def __data__(self): - return {"string": self.term.__data__()} - - def vars(self): - return self.term.vars() - - def map(self, map_): - return StringOp("string", self.term.map(map_)) - - def missing(self): - return self.term.missing() - - @simplified - def partial_eval(self): - term = FirstOp("first", self.term).partial_eval() - if isinstance(term, CoalesceOp): - return CoalesceOp("coalesce", [StringOp("string", t).partial_eval() for t in term.terms]) - elif isinstance(term, Literal): - if term.type == STRING: - return term - else: - return Literal("literal", mo_json.value2json(term.value)) - return self - - -class IsStringOp(Expression): - data_type = BOOLEAN - - def __init__(self, op, term): - Expression.__init__(self, op, [term]) - self.term = term - - def __data__(self): - return {"is_string": self.term.__data__()} - - def vars(self): - return self.term.vars() - - def map(self, map_): - return IsStringOp("is_number", self.term.map(map_)) - - def missing(self): - return FALSE - - -class CountOp(Expression): - has_simple_form = False - data_type = INTEGER - - def __init__(self, op, terms, **clauses): - Expression.__init__(self, op, terms) - self.terms = terms - - def __data__(self): - return {"count": [t.__data__() for t in self.terms]} - - def vars(self): - output = set() - for t in self.terms: - output |= t.vars() - return output - - def map(self, map_): - return CountOp("count", [t.map(map_) for t in self.terms]) - - def missing(self): - return FALSE - - def exists(self): - return TrueOp - - -class MaxOp(Expression): - data_type = NUMBER - - def __init__(self, op, terms): - Expression.__init__(self, op, terms) - if terms == None: - self.terms = [] - elif isinstance(terms, list): - self.terms = terms - else: - self.terms = [terms] - - def __data__(self): - return {"max": [t.__data__() for t in self.terms]} - - def vars(self): - output = set() - for t in self.terms: - output |= t.vars() - return output - - def map(self, map_): - return MaxOp("max", [t.map(map_) for t in self.terms]) - - def missing(self): - return FALSE - - @simplified - def partial_eval(self): - maximum = None - terms = [] - for t in self.terms: - simple = t.partial_eval() - if isinstance(simple, NullOp): - pass - elif isinstance(simple, Literal): - maximum = MAX([maximum, simple.value]) - else: - terms.append(simple) - if len(terms) == 0: - if maximum == None: - return NULL - else: - return Literal(None, maximum) - else: - if maximum == None: - output = MaxOp("max", terms) - else: - output = MaxOp("max", [Literal(None, maximum)] + terms) - - return output - - -class MinOp(Expression): - data_type = NUMBER - - def __init__(self, op, terms): - Expression.__init__(self, op, terms) - if terms == None: - self.terms = [] - elif isinstance(terms, list): - self.terms = terms - else: - self.terms = [terms] - - def __data__(self): - return {"min": [t.__data__() for t in self.terms]} - - def vars(self): - output = set() - for t in self.terms: - output |= t.vars() - return output - - def map(self, map_): - return MinOp("min", [t.map(map_) for t in self.terms]) - - def missing(self): - return FALSE - - @simplified - def partial_eval(self): - minimum = None - terms = [] - for t in self.terms: - simple = t.partial_eval() - if isinstance(simple, NullOp): - pass - elif isinstance(simple, Literal): - minimum = MIN([minimum, simple.value]) - else: - terms.append(simple) - if len(terms) == 0: - if minimum == None: - return NULL - else: - return Literal(None, minimum) - else: - if minimum == None: - output = MinOp("min", terms) - else: - output = MinOp("min", [Literal(None, minimum)] + terms) - - return output - - -class MultiOp(Expression): - has_simple_form = True - data_type = NUMBER - - def __init__(self, op, terms, **clauses): - Expression.__init__(self, op, terms) - self.op = op - self.terms = terms - self.default = coalesce(clauses.get("default"), NULL) - self.nulls = coalesce(clauses.get("nulls"), FALSE) - - def __data__(self): - return {self.op: [t.__data__() for t in self.terms], "default": self.default, "nulls": self.nulls} - - def vars(self): - output = set() - for t in self.terms: - output |= t.vars() - return output - - def map(self, map_): - return MultiOp(self.op, [t.map(map_) for t in self.terms], **{"default": self.default, "nulls": self.nulls}) - - def missing(self): - if self.nulls: - if isinstance(self.default, NullOp): - return AndOp("and", [t.missing() for t in self.terms]) - else: - return TRUE - else: - if isinstance(self.default, NullOp): - return OrOp("or", [t.missing() for t in self.terms]) - else: - return FALSE - - def exists(self): - if self.nulls: - return OrOp("or", [t.exists() for t in self.terms]) - else: - return AndOp("and", [t.exists() for t in self.terms]) - - @simplified - def partial_eval(self): - acc = None - terms = [] - for t in self.terms: - simple = t.partial_eval() - if isinstance(simple, NullOp): - pass - elif isinstance(simple, Literal): - if acc is None: - acc = simple.value - else: - acc = builtin_ops[self.op](acc, simple.value) - else: - terms.append(simple) - if len(terms) == 0: - if acc == None: - return NULL - else: - return Literal(None, acc) - else: - if acc is None: - output = MultiOp(self.op, terms, default=self.default, nulls=self.nulls) - else: - output = MultiOp(self.op, [Literal(None, acc)] + terms, default=self.default, nulls=self.nulls) - - return output - - -def AddOp(op, terms, **clauses): - return MultiOp("add", terms, **clauses) - - -def MultOp(op, terms, **clauses): - return MultiOp("mult", terms, **clauses) - - -# def MaxOp(op, terms, **clauses): -# return MaxOp("max", terms, **clauses) -# -# -# def MinOp(op, terms, **clauses): -# return MinOp("min", terms, **clauses) - - -class RegExpOp(Expression): - has_simple_form = True - data_type = BOOLEAN - - def __init__(self, op, terms): - Expression.__init__(self, op, terms) - self.var, self.pattern = terms - - def __data__(self): - return {"regexp": {self.var.var: self.pattern}} - - def vars(self): - return {self.var} - - def map(self, map_): - return RegExpOp("regex", [self.var.map(map_), self.pattern]) - - def missing(self): - return FALSE - - def exists(self): - return TRUE - - -class CoalesceOp(Expression): - has_simple_form = True - - def __init__(self, op, terms): - Expression.__init__(self, op, terms) - self.terms = terms - - def __data__(self): - return {"coalesce": [t.__data__() for t in self.terms]} - - def __eq__(self, other): - if isinstance(other, CoalesceOp): - if len(self.terms) == len(other.terms): - return all(s == o for s, o in zip(self.terms, other.terms)) - return False - - def missing(self): - # RETURN true FOR RECORDS THE WOULD RETURN NULL - return AndOp("and", [v.missing() for v in self.terms]) - - def vars(self): - output = set() - for v in self.terms: - output |= v.vars() - return output - - def map(self, map_): - return CoalesceOp("coalesce", [v.map(map_) for v in self.terms]) - - @simplified - def partial_eval(self): - terms = [] - for t in self.terms: - simple = FirstOp("first", t).partial_eval() - if simple is NULL: - pass - elif isinstance(simple, Literal): - terms.append(simple) - break - else: - terms.append(simple) - - if len(terms) == 0: - return NULL - elif len(terms) == 1: - return terms[0] - else: - return CoalesceOp("coalesce", terms) - - -class MissingOp(Expression): - data_type = BOOLEAN - - def __init__(self, op, term): - Expression.__init__(self, op, term) - self.expr = term - - def __data__(self): - return {"missing": self.expr.__data__()} - - def __eq__(self, other): - if not isinstance(other, MissingOp): - return False - else: - return self.expr == other.expr - - def vars(self): - return self.expr.vars() - - def map(self, map_): - return MissingOp("missing", self.expr.map(map_)) - - def missing(self): - return FALSE - - def exists(self): - return TRUE - - @simplified - def partial_eval(self): - expr = self.expr.partial_eval() - if isinstance(expr, Variable) and expr.var == "_id": - return FALSE - if isinstance(expr, Literal): - if expr is NULL: - return TRUE - elif expr.value == None: - Log.error("not expected") - else: - return FALSE - self.simplified = True - return self - - -class ExistsOp(Expression): - data_type = BOOLEAN - - def __init__(self, op, term): - Expression.__init__(self, op, [term]) - self.field = term - - def __data__(self): - return {"exists": self.field.__data__()} - - def vars(self): - return self.field.vars() - - def map(self, map_): - return ExistsOp("exists", self.field.map(map_)) - - def missing(self): - return FALSE - - def exists(self): - return TRUE - - @simplified - def partial_eval(self): - return NotOp("not", self.field.missing()).partial_eval() - - -class PrefixOp(Expression): - has_simple_form = True - data_type = BOOLEAN - - def __init__(self, op, term): - Expression.__init__(self, op, term) - if not term: - self.expr = None - self.prefix=None - elif isinstance(term, Mapping): - self.expr, self.prefix = term.items()[0] - else: - self.expr, self.prefix = term - - def __data__(self): - if not self.expr: - return {"prefix": {}} - elif isinstance(self.expr, Variable) and isinstance(self.prefix, Literal): - return {"prefix": {self.expr.var: self.prefix.value}} - else: - return {"prefix": [self.expr.__data__(), self.prefix.__data__()]} - - def vars(self): - if not self.expr: - return set() - return self.expr.vars() | self.prefix.vars() - - def map(self, map_): - if not self.expr: - return self - else: - return PrefixOp("prefix", [self.expr.map(map_), self.prefix.map(map_)]) - - def missing(self): - return FALSE - - -class SuffixOp(Expression): - has_simple_form = True - - def __init__(self, op, term): - Expression.__init__(self, op, term) - if not term: - self.expr = self.suffix = None - elif isinstance(term, Mapping): - self.expr, self.suffix = term.items()[0] - else: - self.expr, self.suffix = term - - def __data__(self): - if self.expr is None: - return {"suffix": {}} - elif isinstance(self.expr, Variable) and isinstance(self.suffix, Literal): - return {"suffix": {self.expr.var: self.suffix.value}} - else: - return {"suffix": [self.expr.__data__(), self.suffix.__data__()]} - - def vars(self): - if self.expr is None: - return set() - return self.expr.vars() | self.suffix.vars() - - def map(self, map_): - if self.expr is None: - return TRUE - else: - return SuffixOp("suffix", [self.expr.map(map_), self.suffix.map(map_)]) - - -class ConcatOp(Expression): - has_simple_form = True - data_type = STRING - - def __init__(self, op, term, **clauses): - Expression.__init__(self, op, term) - if isinstance(term, Mapping): - self.terms = term.items()[0] - else: - self.terms = term - self.separator = clauses.get("separator", Literal(None, "")) - self.default = clauses.get("default", NULL) - if not isinstance(self.separator, Literal): - Log.error("Expecting a literal separator") - - @classmethod - def define(cls, expr): - term = expr.concat - if isinstance(term, Mapping): - k, v = term.items()[0] - terms = [Variable(k), Literal("literal", v)] - else: - terms = map(jx_expression, term) - - return ConcatOp( - "concat", - terms, - **{k: Literal(None, v) for k, v in expr.items() if k in ["default", "separator"]} - ) - - def __data__(self): - if isinstance(self.value, Variable) and isinstance(self.length, Literal): - output = {"concat": {self.terms[0].var: self.terms[2].value}} - else: - output = {"concat": [t.__data__() for t in self.terms]} - if self.separator.json != '""': - output["separator"] = self.terms[2].value - return output - - def vars(self): - if not self.terms: - return set() - return set.union(*(t.vars() for t in self.terms)) - - def map(self, map_): - return ConcatOp("concat", [t.map(map_) for t in self.terms], separator=self.separator) - - def missing(self): - return AndOp("and", [t.missing() for t in self.terms] + [self.default.missing()]).partial_eval() - - -class UnixOp(Expression): - """ - FOR USING ON DATABASES WHICH HAVE A DATE COLUMNS: CONVERT TO UNIX - """ - has_simple_form = True - data_type = NUMBER - - def __init__(self, op, term): - Expression.__init__(self, op, term) - self.value = term - - def vars(self): - return self.value.vars() - - def map(self, map_): - return UnixOp("map", self.value.map(map_)) - - def missing(self): - return self.value.missing() - - -class FromUnixOp(Expression): - """ - FOR USING ON DATABASES WHICH HAVE A DATE COLUMNS: CONVERT TO UNIX - """ - data_type = NUMBER - - - def __init__(self, op, term): - Expression.__init__(self, op, term) - self.value = term - - def vars(self): - return self.value.vars() - - def map(self, map_): - return FromUnixOp("map", self.value.map(map_)) - - def missing(self): - return self.value.missing() - - -class LeftOp(Expression): - has_simple_form = True - data_type = STRING - - def __init__(self, op, term): - Expression.__init__(self, op, term) - if isinstance(term, Mapping): - self.value, self.length = term.items()[0] - else: - self.value, self.length = term - - def __data__(self): - if isinstance(self.value, Variable) and isinstance(self.length, Literal): - return {"left": {self.value.var: self.length.value}} - else: - return {"left": [self.value.__data__(), self.length.__data__()]} - - def vars(self): - return self.value.vars() | self.length.vars() - - def map(self, map_): - return LeftOp("left", [self.value.map(map_), self.length.map(map_)]) - - def missing(self): - return OrOp("or", [self.value.missing(), self.length.missing()]).partial_eval() - - @simplified - def partial_eval(self): - value = self.value.partial_eval() - length = self.length.partial_eval() - max_length = LengthOp("length", value) - - return WhenOp( - "when", - self.missing(), - **{ - "else": BasicSubstringOp("substring", [ - value, - ZERO, - MaxOp("max", [ZERO, MinOp("min", [length, max_length])]) - ]) - } - ).partial_eval() - - -class NotLeftOp(Expression): - has_simple_form = True - data_type = STRING - - def __init__(self, op, term): - Expression.__init__(self, op, term) - if isinstance(term, Mapping): - self.value, self.length = term.items()[0] - else: - self.value, self.length = term - - def __data__(self): - if isinstance(self.value, Variable) and isinstance(self.length, Literal): - return {"not_left": {self.value.var: self.length.value}} - else: - return {"not_left": [self.value.__data__(), self.length.__data__()]} - - def vars(self): - return self.value.vars() | self.length.vars() - - def map(self, map_): - return NotLeftOp(None, [self.value.map(map_), self.length.map(map_)]) - - def missing(self): - return OrOp(None, [self.value.missing(), self.length.missing()]) - - @simplified - def partial_eval(self): - value = self.value.partial_eval() - length = self.length.partial_eval() - max_length = LengthOp("length", value) - - return WhenOp( - "when", - self.missing(), - **{ - "else": BasicSubstringOp("substring", [ - value, - MaxOp("max", [ZERO, MinOp("min", [length, max_length])]), - max_length - ]) - } - ).partial_eval() - - -class RightOp(Expression): - has_simple_form = True - data_type = STRING - - def __init__(self, op, term): - Expression.__init__(self, op, term) - if isinstance(term, Mapping): - self.value, self.length = term.items()[0] - else: - self.value, self.length = term - - def __data__(self): - if isinstance(self.value, Variable) and isinstance(self.length, Literal): - return {"right": {self.value.var: self.length.value}} - else: - return {"right": [self.value.__data__(), self.length.__data__()]} - - def vars(self): - return self.value.vars() | self.length.vars() - - def map(self, map_): - return RightOp("right", [self.value.map(map_), self.length.map(map_)]) - - def missing(self): - return OrOp(None, [self.value.missing(), self.length.missing()]) - - @simplified - def partial_eval(self): - value = self.value.partial_eval() - length = self.length.partial_eval() - max_length = LengthOp("length", value) - - return WhenOp( - "when", - self.missing(), - **{ - "else": BasicSubstringOp("substring", [ - value, - MaxOp("max", [ZERO, MinOp("min", [max_length, BinaryOp("sub", [max_length, length])])]), - max_length - ]) - } - ).partial_eval() - - -class NotRightOp(Expression): - has_simple_form = True - data_type = STRING - - def __init__(self, op, term): - Expression.__init__(self, op, term) - if isinstance(term, Mapping): - self.value, self.length = term.items()[0] - else: - self.value, self.length = term - - def __data__(self): - if isinstance(self.value, Variable) and isinstance(self.length, Literal): - return {"not_right": {self.value.var: self.length.value}} - else: - return {"not_right": [self.value.__data__(), self.length.__data__()]} - - def vars(self): - return self.value.vars() | self.length.vars() - - def map(self, map_): - return NotRightOp(None, [self.value.map(map_), self.length.map(map_)]) - - def missing(self): - return OrOp(None, [self.value.missing(), self.length.missing()]) - - @simplified - def partial_eval(self): - value = self.value.partial_eval() - length = self.length.partial_eval() - max_length = LengthOp("length", value) - - return WhenOp( - "when", - self.missing(), - **{ - "else": BasicSubstringOp("substring", [ - value, - ZERO, - MaxOp("max", [ZERO, MinOp("min", [max_length, BinaryOp("sub", [max_length, length])])]) - ]) - } - ).partial_eval() - - -class FindOp(Expression): - """ - RETURN INDEX OF find IN value, ELSE RETURN null - """ - has_simple_form = True - data_type = INTEGER - - def __init__(self, op, term, **kwargs): - Expression.__init__(self, op, term) - self.value, self.find = term - self.default = kwargs.get("default", NULL) - self.start = kwargs.get("start", ZERO).partial_eval() - if isinstance(self.start, NullOp): - self.start = ZERO - - def __data__(self): - if isinstance(self.value, Variable) and isinstance(self.find, Literal): - output = { - "find": {self.value.var, self.find.value}, - "start":self.start.__data__() - } - else: - output = { - "find": [self.value.__data__(), self.find.__data__()], - "start":self.start.__data__() - } - if self.default: - output["default"]=self.default.__data__() - return output - - def vars(self): - return self.value.vars() | self.find.vars() | self.default.vars() | self.start.vars() - - def map(self, map_): - return FindOp( - "find", - [self.value.map(map_), self.find.map(map_)], - start=self.start.map(map_), - default=self.default.map(map_) - ) - - def missing(self): - return AndOp("and", [ - self.default.missing(), - OrOp("or", [ - self.value.missing(), - self.find.missing(), - EqOp("eq", [BasicIndexOfOp("", [ - self.value, - self.find, - self.start - ]), Literal(None, -1)]) - ]) - ]).partial_eval() - - def exists(self): - return TRUE - - @simplified - def partial_eval(self): - index = BasicIndexOfOp("indexOf", [ - self.value, - self.find, - self.start - ]).partial_eval() - - output = WhenOp( - "when", - OrOp("or", [ - self.value.missing(), - self.find.missing(), - BasicEqOp("eq", [index, Literal(None, -1)]) - ]), - **{"then": self.default, "else": index} - ).partial_eval() - return output - - -class SplitOp(Expression): - has_simple_form = True - - def __init__(self, op, term, **kwargs): - Expression.__init__(self, op, term) - self.value, self.find = term - - def __data__(self): - if isinstance(self.value, Variable) and isinstance(self.find, Literal): - return {"split": {self.value.var, self.find.value}} - else: - return {"split": [self.value.__data__(), self.find.__data__()]} - - def vars(self): - return self.value.vars() | self.find.vars() | self.default.vars() | self.start.vars() - - def map(self, map_): - return FindOp( - "find", - [self.value.map(map_), self.find.map(map_)], - start=self.start.map(map_), - default=self.default.map(map_) - ) - - def missing(self): - v = self.value.to_es_script(not_null=True) - find = self.find.to_es_script(not_null=True) - index = v + ".indexOf(" + find + ", " + self.start.to_es_script() + ")" - - return AndOp("and", [ - self.default.missing(), - OrOp("or", [ - self.value.missing(), - self.find.missing(), - EqOp("eq", [ScriptOp("script", index), Literal(None, -1)]) - ]) - ]) - - def exists(self): - return TRUE - - -class BetweenOp(Expression): - data_type = STRING - - def __init__(self, op, value, prefix, suffix, default=NULL, start=NULL): - Expression.__init__(self, op, []) - self.value = value - self.prefix = prefix - self.suffix = suffix - self.default = default - self.start = start - if isinstance(self.prefix, Literal) and isinstance(self.suffix, Literal): - pass - else: - Log.error("Expecting literal prefix and suffix only") - - @classmethod - def define(cls, expr): - term = expr.between - if isinstance(term, list): - return BetweenOp( - "between", - value=jx_expression(term[0]), - prefix=jx_expression(term[1]), - suffix=jx_expression(term[2]), - default=jx_expression(expr.default), - start=jx_expression(expr.start) - ) - elif isinstance(term, Mapping): - var, vals = term.items()[0] - if isinstance(vals, list) and len(vals) == 2: - return BetweenOp( - "between", - value=Variable(var), - prefix=Literal(None, vals[0]), - suffix=Literal(None, vals[1]), - default=jx_expression(expr.default), - start=jx_expression(expr.start) - ) - else: - Log.error("`between` parameters are expected to be in {var: [prefix, suffix]} form") - else: - Log.error("`between` parameters are expected to be in {var: [prefix, suffix]} form") - - def vars(self): - return self.value.vars() | self.prefix.vars() | self.suffix.vars() | self.default.vars() | self.start.vars() - - def map(self, map_): - return BetweenOp( - "between", - [self.value.map(map_), self.prefix.map(map_), self.suffix.map(map_)], - default=self.default.map(map_), - start=self.start.map(map_) - ) - - def missing(self): - return self.partial_eval().missing() - - def __data__(self): - if isinstance(self.value, Variable) and isinstance(self.prefix, Literal) and isinstance(self.suffix, Literal): - output = wrap({"between": {self.value.var: [self.prefix.value, self.suffix.value]}}) - else: - output = wrap({"between": [self.value.__data__(), self.prefix.__data__(), self.suffix.__data__()]}) - if self.start: - output.start = self.start.__data__() - if self.default: - output.default = self.default.__data__() - return output - - @simplified - def partial_eval(self): - value = self.value.partial_eval() - - start_index = CaseOp( - "case", - [ - WhenOp("when", self.prefix.missing(), **{"then": ZERO}), - WhenOp("when", IsNumberOp("is_number", self.prefix), **{"then": MaxOp("max", [ZERO, self.prefix])}), - FindOp("find", [value, self.prefix], start=self.start) - ] - ).partial_eval() - - len_prefix = CaseOp( - "case", - [ - WhenOp("when", self.prefix.missing(), **{"then": ZERO}), - WhenOp("when", IsNumberOp("is_number", self.prefix), **{"then": ZERO}), - LengthOp("length", self.prefix) - ] - ).partial_eval() - - end_index = CaseOp( - "case", - [ - WhenOp("when", start_index.missing(), **{"then": NULL}), - WhenOp("when", self.suffix.missing(), **{"then": LengthOp("length", value)}), - WhenOp("when", IsNumberOp("is_number", self.suffix), **{"then": MinOp("min", [self.suffix, LengthOp("length", value)])}), - FindOp("find", [value, self.suffix], start=MultiOp("add", [start_index, len_prefix])) - ] - ).partial_eval() - - start_index = MultiOp("add", [start_index, len_prefix]).partial_eval() - substring = BasicSubstringOp("substring", [value, start_index, end_index]).partial_eval() - - between = WhenOp( - "when", - end_index.missing(), - **{ - "then": self.default, - "else": substring - } - ).partial_eval() - - return between - - -class InOp(Expression): - has_simple_form = True - data_type = BOOLEAN - - def __init__(self, op, term): - Expression.__init__(self, op, term) - self.value, self.superset = term - - def __data__(self): - if isinstance(self.value, Variable) and isinstance(self.superset, Literal): - return {"in": {self.value.var: self.superset.value}} - else: - return {"in": [self.value.__data__(), self.superset.__data__()]} - - def __eq__(self, other): - if isinstance(other, InOp): - return self.value == other.value and self.superset == other.superset - return False - - def vars(self): - return self.value.vars() - - def map(self, map_): - return InOp("in", [self.value.map(map_), self.superset.map(map_)]) - - @simplified - def partial_eval(self): - value = self.value.partial_eval() - superset = self.superset.partial_eval() - if superset is NULL: - return FALSE - elif isinstance(value, Literal) and isinstance(superset, Literal): - return Literal(None, self()) - else: - return self - - def __call__(self): - return self.value() in self.superset() - - def missing(self): - return FALSE - - -class RangeOp(Expression): - has_simple_form = True - data_type = BOOLEAN - - def __new__(cls, op, term, *args): - Expression.__new__(cls, *args) - field, comparisons = term # comparisons IS A Literal() - return AndOp("and", [operators[op](op, [field, Literal(None, value)]) for op, value in comparisons.value.items()]) - - def __init__(self, op, term): - Log.error("Should never happen!") - - -class WhenOp(Expression): - def __init__(self, op, term, **clauses): - Expression.__init__(self, op, [term]) - - self.when = term - self.then = coalesce(clauses.get("then"), NULL) - self.els_ = coalesce(clauses.get("else"), NULL) - - if self.then is NULL: - self.data_type = self.els_.type - elif self.els_ is NULL: - self.data_type = self.then.type - elif self.then.type == self.els_.type: - self.data_type = self.then.type - elif self.then.type in (INTEGER, NUMBER) and self.els_.type in (INTEGER, NUMBER): - self.data_type = NUMBER - else: - self.data_type = OBJECT - - def __data__(self): - return {"when": self.when.__data__(), "then": self.then.__data__() if self.then else None, "else": self.els_.__data__() if self.els_ else None} - - def vars(self): - return self.when.vars() | self.then.vars() | self.els_.vars() - - def map(self, map_): - return WhenOp("when", self.when.map(map_), **{"then": self.then.map(map_), "else": self.els_.map(map_)}) - - def missing(self): - return OrOp("or", [ - AndOp("and", [self.when, self.then.missing()]), - AndOp("and", [NotOp("not", self.when), self.els_.missing()]) - ]).partial_eval() - - @simplified - def partial_eval(self): - when = BooleanOp("boolean", self.when).partial_eval() - - if when is TRUE: - return self.then.partial_eval() - elif when in [FALSE, NULL]: - return self.els_.partial_eval() - elif isinstance(when, Literal): - Log.error("Expecting `when` clause to return a Boolean, or `null`") - - then = self.then.partial_eval() - els_ = self.els_.partial_eval() - - if then is TRUE: - if els_ is FALSE: - return when - elif els_ is TRUE: - return TRUE - elif then is FALSE: - if els_ is FALSE: - return FALSE - elif els_ is TRUE: - return NotOp("not", when).partial_eval() - - return WhenOp("when", when, **{"then": then, "else": els_}) - - -class CaseOp(Expression): - def __init__(self, op, terms, **clauses): - if not isinstance(terms, (list, tuple)): - Log.error("case expression requires a list of `when` sub-clauses") - Expression.__init__(self, op, terms) - if len(terms) <= 1: - Log.error("Expecting at least two clauses") - else: - for w in terms[:-1]: - if not isinstance(w, WhenOp) or w.els_: - Log.error("case expression does not allow `else` clause in `when` sub-clause") - self.whens = terms - - def __data__(self): - return {"case": [w.__data__() for w in self.whens]} - - def __eq__(self, other): - if isinstance(other, CaseOp): - return all(s==o for s, o in zip(self.whens, other.whens)) - - def vars(self): - output = set() - for w in self.whens: - output |= w.vars() - return output - - def map(self, map_): - return CaseOp("case", [w.map(map_) for w in self.whens]) - - def missing(self): - m = self.whens[-1].missing() - for w in reversed(self.whens[0:-1]): - when = w.when.partial_eval() - if when is FALSE: - pass - elif when is TRUE: - m = w.then.partial_eval().missing() - else: - m = OrOp("or", [AndOp("and", [when, w.then.partial_eval().missing()]), m]) - return m.partial_eval() - - @simplified - def partial_eval(self): - whens = [] - for w in self.whens[:-1]: - when = w.when.partial_eval() - if when is TRUE: - whens.append(w.then.partial_eval()) - break - elif when is FALSE: - pass - else: - whens.append(WhenOp("when", when, **{"then": w.then.partial_eval()})) - else: - whens.append(self.whens[-1].partial_eval()) - - if len(whens) == 1: - return whens[0] - else: - return CaseOp("case", whens) - - @property - def type(self): - types = set(w.then.type if isinstance(w, WhenOp) else w.type for w in self.whens) - if len(types) > 1: - return OBJECT - else: - return list(types)[0] - - - -class BasicIndexOfOp(Expression): - """ - PLACEHOLDER FOR BASIC value.indexOf(find, start) (CAN NOT DEAL WITH NULLS) - """ - data_type = INTEGER - - def __init__(self, op, params): - Expression.__init__(self, op, params) - self.value, self.find, self.start = params - - def __data__(self): - return {"basic.indexOf": [self.value.__data__(), self.find.__data__(), self.start.__data__()]} - - def vars(self): - return self.value.vars() | self.find.vars() | self.start.vars() - - def missing(self): - return FALSE - - @simplified - def partial_eval(self): - start = IntegerOp("integer", MaxOp("max", [ZERO, self.start])).partial_eval() - return BasicIndexOfOp("indexOf", [ - StringOp("string", self.value).partial_eval(), - StringOp("string", self.find).partial_eval(), - start - ]) - - -class BasicEqOp(Expression): - """ - PLACEHOLDER FOR BASIC `==` OPERATOR (CAN NOT DEAL WITH NULLS) - """ - data_type = BOOLEAN - - def __init__(self, op, terms): - self.lhs, self.rhs = terms - - def __data__(self): - return {"basic.eq": [self.lhs.__data__(), self.rhs.__data__()]} - - def missing(self): - return FALSE - - def __eq__(self, other): - if not isinstance(other, EqOp): - return False - return self.lhs==other.lhs and self.rhs==other.rhs - - -class BasicSubstringOp(Expression): - """ - PLACEHOLDER FOR BASIC value.substring(start, end) (CAN NOT DEAL WITH NULLS) - """ - data_type = STRING - - def __init__(self, op, terms): - self.value, self.start, self.end = terms - - def __data__(self): - return {"basic.substring": [self.value.__data__(), self.start.__data__(), self.end.__data__()]} - - def missing(self): - return FALSE - - - -operators = { - "add": MultiOp, - "and": AndOp, - "between": BetweenOp, - "case": CaseOp, - "coalesce": CoalesceOp, - "concat": ConcatOp, - "count": CountOp, - "date": DateOp, - "div": DivOp, - "divide": DivOp, - "eq": EqOp, - "exists": ExistsOp, - "exp": BinaryOp, - "find": FindOp, - "first": FirstOp, - "floor": FloorOp, - "from_unix": FromUnixOp, - "get": GetOp, - "gt": InequalityOp, - "gte": InequalityOp, - "in": InOp, - "instr": FindOp, - "is_number": IsNumberOp, - "is_string": IsStringOp, - "last": LastOp, - "left": LeftOp, - "length": LengthOp, - "literal": Literal, - "lt": InequalityOp, - "lte": InequalityOp, - "match_all": TrueOp, - "max": MaxOp, - "minus": BinaryOp, - "missing": MissingOp, - "mod": BinaryOp, - "mul": MultiOp, - "mult": MultiOp, - "multiply": MultiOp, - "ne": NeOp, - "neq": NeOp, - "not": NotOp, - "not_left": NotLeftOp, - "not_right": NotRightOp, - "null": NullOp, - "number": NumberOp, - "offset": OffsetOp, - "or": OrOp, - "postfix": SuffixOp, - "prefix": PrefixOp, - "range": RangeOp, - "regex": RegExpOp, - "regexp": RegExpOp, - "right": RightOp, - "rows": RowsOp, - "script": ScriptOp, - "select": SelectOp, - "split": SplitOp, - "string": StringOp, - "suffix": SuffixOp, - "sub": BinaryOp, - "subtract": BinaryOp, - "sum": MultiOp, - "term": EqOp, - "terms": InOp, - "tuple": TupleOp, - "unix": UnixOp, - "when": WhenOp, -} - - -builtin_ops = { - "ne": operator.ne, - "eq": operator.eq, - "gte": operator.ge, - "gt": operator.gt, - "lte": operator.le, - "lt": operator.lt, - "add": operator.add, - "sum": operator.add, - "mul": operator.mul, - "mult": operator.mul, - "multiply": operator.mul, - "max": lambda *v: max(v), - "min": lambda *v: min(v) -} diff --git a/vendor/jx_base/expressions/__init__.py b/vendor/jx_base/expressions/__init__.py new file mode 100644 index 0000000..89e5d3a --- /dev/null +++ b/vendor/jx_base/expressions/__init__.py @@ -0,0 +1,169 @@ +from jx_base.expressions._utils import simplified, extend, jx_expression, merge_types, operators, language, _jx_expression +from jx_base.expressions.abs_op import AbsOp +from jx_base.expressions.add_op import AddOp +from jx_base.expressions.and_op import AndOp +from jx_base.expressions.base_binary_op import BaseBinaryOp +from jx_base.expressions.base_inequality_op import BaseInequalityOp +from jx_base.expressions.base_multi_op import BaseMultiOp +from jx_base.expressions.basic_add_op import BasicAddOp +from jx_base.expressions.basic_eq_op import BasicEqOp +from jx_base.expressions.basic_index_of_op import BasicIndexOfOp +from jx_base.expressions.basic_mul_op import BasicMulOp +from jx_base.expressions.basic_multi_op import BasicMultiOp +from jx_base.expressions.basic_starts_with_op import BasicStartsWithOp +from jx_base.expressions.basic_substring_op import BasicSubstringOp +from jx_base.expressions.between_op import BetweenOp +from jx_base.expressions.boolean_op import BooleanOp +from jx_base.expressions.case_op import CaseOp +from jx_base.expressions.coalesce_op import CoalesceOp +from jx_base.expressions.concat_op import ConcatOp +from jx_base.expressions.count_op import CountOp +from jx_base.expressions.date_op import DateOp +from jx_base.expressions.div_op import DivOp +from jx_base.expressions.eq_op import EqOp +from jx_base.expressions.es_nested_op import EsNestedOp +from jx_base.expressions.es_script import EsScript +from jx_base.expressions.exists_op import ExistsOp +from jx_base.expressions.exp_op import ExpOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FalseOp, FALSE +from jx_base.expressions.find_op import FindOp +from jx_base.expressions.first_op import FirstOp +from jx_base.expressions.floor_op import FloorOp +from jx_base.expressions.from_unix_op import FromUnixOp +from jx_base.expressions.get_op import GetOp +from jx_base.expressions.gt_op import GtOp +from jx_base.expressions.gte_op import GteOp +from jx_base.expressions.in_op import InOp +from jx_base.expressions.integer_op import IntegerOp +from jx_base.expressions.is_boolean_op import IsBooleanOp +from jx_base.expressions.is_integer_op import IsIntegerOp +from jx_base.expressions.is_number_op import IsNumberOp +from jx_base.expressions.is_string_op import IsStringOp +from jx_base.expressions.last_op import LastOp +from jx_base.expressions.leaves_op import LeavesOp +from jx_base.expressions.left_op import LeftOp +from jx_base.expressions.length_op import LengthOp +from jx_base.expressions.literal import Literal, ONE, ZERO, register_literal, is_literal +from jx_base.expressions.lt_op import LtOp +from jx_base.expressions.lte_op import LteOp +from jx_base.expressions.max_op import MaxOp +from jx_base.expressions.min_op import MinOp +from jx_base.expressions.missing_op import MissingOp +from jx_base.expressions.mod_op import ModOp +from jx_base.expressions.mul_op import MulOp +from jx_base.expressions.ne_op import NeOp +from jx_base.expressions.not_left_op import NotLeftOp +from jx_base.expressions.not_op import NotOp +from jx_base.expressions.not_right_op import NotRightOp +from jx_base.expressions.null_op import NullOp, NULL +from jx_base.expressions.number_op import NumberOp +from jx_base.expressions.offset_op import OffsetOp +from jx_base.expressions.or_op import OrOp +from jx_base.expressions.prefix_op import PrefixOp +from jx_base.expressions.python_script import PythonScript +from jx_base.expressions.query_op import QueryOp +from jx_base.expressions.range_op import RangeOp +from jx_base.expressions.reg_exp_op import RegExpOp +from jx_base.expressions.right_op import RightOp +from jx_base.expressions.rows_op import RowsOp +from jx_base.expressions.script_op import ScriptOp +from jx_base.expressions.select_op import SelectOp +from jx_base.expressions.split_op import SplitOp +from jx_base.expressions.sql_eq_op import SqlEqOp +from jx_base.expressions.sql_instr_op import SqlInstrOp +from jx_base.expressions.sql_script import SQLScript +from jx_base.expressions.sql_substr_op import SqlSubstrOp +from jx_base.expressions.string_op import StringOp +from jx_base.expressions.sub_op import SubOp +from jx_base.expressions.suffix_op import SuffixOp +from jx_base.expressions.true_op import TrueOp, TRUE +from jx_base.expressions.tuple_op import TupleOp +from jx_base.expressions.union_op import UnionOp +from jx_base.expressions.unix_op import UnixOp +from jx_base.expressions.variable import Variable, IDENTITY +from jx_base.expressions.when_op import WhenOp +from mo_dots import set_default + +set_default(operators, { + "abs": AbsOp, + "add": AddOp, + "and": AndOp, + "basic.add": BasicAddOp, + "basic.mul": BasicMulOp, + "between": BetweenOp, + "case": CaseOp, + "coalesce": CoalesceOp, + "concat": ConcatOp, + "count": CountOp, + "date": DateOp, + "div": DivOp, + "divide": DivOp, + "eq": EqOp, + "exists": ExistsOp, + "exp": ExpOp, + "find": FindOp, + "first": FirstOp, + "floor": FloorOp, + "from_unix": FromUnixOp, + "get": GetOp, + "gt": GtOp, + "gte": GteOp, + "in": InOp, + "instr": FindOp, + "is_number": IsNumberOp, + "is_string": IsStringOp, + "last": LastOp, + "left": LeftOp, + "length": LengthOp, + "literal": Literal, + "lt": LtOp, + "lte": LteOp, + "match_all": TrueOp, + "max": MaxOp, + "minus": SubOp, + "missing": MissingOp, + "mod": ModOp, + "mul": MulOp, + "mult": MulOp, + "multiply": MulOp, + "ne": NeOp, + "neq": NeOp, + "not": NotOp, + "not_left": NotLeftOp, + "not_right": NotRightOp, + "null": NullOp, + "number": NumberOp, + "offset": OffsetOp, + "or": OrOp, + "postfix": SuffixOp, + "prefix": PrefixOp, + "range": RangeOp, + "regex": RegExpOp, + "regexp": RegExpOp, + "right": RightOp, + "rows": RowsOp, + "script": ScriptOp, + "select": SelectOp, + "split": SplitOp, + "string": StringOp, + "suffix": SuffixOp, + "sub": SubOp, + "subtract": SubOp, + "sum": AddOp, + "term": EqOp, + "terms": InOp, + "tuple": TupleOp, + "union": UnionOp, + "unix": UnixOp, + "when": WhenOp, +}) + +language.register_ops(vars()) + +register_literal(NullOp) +register_literal(FalseOp) +register_literal(TrueOp) +register_literal(DateOp) +register_literal(Literal) + diff --git a/vendor/jx_base/expressions/_utils.py b/vendor/jx_base/expressions/_utils.py new file mode 100644 index 0000000..c66a613 --- /dev/null +++ b/vendor/jx_base/expressions/_utils.py @@ -0,0 +1,180 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +import operator + +from jx_base.language import is_expression, Language +from mo_dots import Null, is_sequence +from mo_future import ( + first, + get_function_name, + is_text, + items as items_, + text, + utf8_json_encoder, +) +from mo_json import BOOLEAN, INTEGER, IS_NULL, NUMBER, OBJECT, STRING, scrub +from mo_logs import Except, Log +from mo_math import is_number +from mo_times import Date + +ALLOW_SCRIPTING = False +EMPTY_DICT = {} + +Literal, TRUE, NULL, TupleOp, Variable = [None] * 5 + +def extend(cls): + """ + DECORATOR TO ADD METHODS TO CLASSES + :param cls: THE CLASS TO ADD THE METHOD TO + :return: + """ + + def extender(func): + setattr(cls, get_function_name(func), func) + return func + + return extender + + +def simplified(func): + def mark_as_simple(self): + if self.simplified: + return self + + output = func(self) + output.simplified = True + return output + + func_name = get_function_name(func) + mark_as_simple.__name__ = func_name + return mark_as_simple + + +def jx_expression(expr, schema=None): + if expr == None: + return None + + # UPDATE THE VARIABLE WITH THIER KNOWN TYPES + if not schema: + output = _jx_expression(expr, language) + return output + output = _jx_expression(expr, language) + for v in output.vars(): + leaves = schema.leaves(v.var) + if len(leaves) == 0: + v.data_type = IS_NULL + if len(leaves) == 1: + v.data_type = first(leaves).jx_type + return output + + +def _jx_expression(expr, lang): + """ + WRAP A JSON EXPRESSION WITH OBJECT REPRESENTATION + """ + if is_expression(expr): + # CONVERT TO lang + new_op = lang[expr] + if not new_op: + # CAN NOT BE FOUND, TRY SOME PARTIAL EVAL + return language[expr.get_id()].partial_eval() + return expr + # return new_op(expr.args) # THIS CAN BE DONE, BUT IT NEEDS MORE CODING, AND I WOULD EXPECT IT TO BE SLOW + + if expr is None: + return TRUE + elif is_text(expr): + return Variable(expr) + elif expr in (True, False, None) or expr == None or is_number(expr): + return Literal(expr) + elif expr.__class__ is Date: + return Literal(expr.unix) + elif is_sequence(expr): + return lang[TupleOp([_jx_expression(e, lang) for e in expr])] + + # expr = wrap(expr) + try: + items = items_(expr) + + for op, term in items: + # ONE OF THESE IS THE OPERATOR + full_op = operators.get(op) + if full_op: + class_ = lang.ops[full_op.get_id()] + if class_: + return class_.define(expr) + + # THIS LANGUAGE DOES NOT SUPPORT THIS OPERATOR, GOTO BASE LANGUAGE AND GET THE MACRO + class_ = language[op.get_id()] + output = class_.define(expr).partial_eval() + return _jx_expression(output, lang) + else: + if not items: + return NULL + raise Log.error("{{instruction|json}} is not known", instruction=expr) + + except Exception as e: + Log.error("programmer error expr = {{value|quote}}", value=expr, cause=e) + + +language = Language(None) + + +_json_encoder = utf8_json_encoder + + +def value2json(value): + try: + scrubbed = scrub(value, scrub_number=float) + return text(_json_encoder(scrubbed)) + except Exception as e: + e = Except.wrap(e) + Log.warning("problem serializing {{type}}", type=text(repr(value)), cause=e) + raise e + + +def merge_types(jx_types): + """ + :param jx_types: ITERABLE OF jx TYPES + :return: ONE TYPE TO RULE THEM ALL + """ + return _merge_types[max(_merge_score[t] for t in jx_types)] + + +_merge_score = {IS_NULL: 0, BOOLEAN: 1, INTEGER: 2, NUMBER: 3, STRING: 4, OBJECT: 5} +_merge_types = {v: k for k, v in _merge_score.items()} + +builtin_ops = { + "ne": operator.ne, + "eq": operator.eq, + "gte": operator.ge, + "gt": operator.gt, + "lte": operator.le, + "lt": operator.lt, + "add": operator.add, + "sub": operator.sub, + "mul": operator.mul, + "max": lambda *v: max(v), + "min": lambda *v: min(v), +} + +operators = {} diff --git a/vendor/jx_base/expressions/abs_op.py b/vendor/jx_base/expressions/abs_op.py new file mode 100644 index 0000000..1734ec8 --- /dev/null +++ b/vendor/jx_base/expressions/abs_op.py @@ -0,0 +1,54 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.expression import Expression +from jx_base.language import is_op +from mo_json import NUMBER + + +class AbsOp(Expression): + data_type = NUMBER + + def __init__(self, term): + Expression.__init__(self, term) + self.term = term + + def __data__(self): + return {"abs": self.term.__data__()} + + def __eq__(self, other): + if not is_op(other, AbsOp): + return False + return self.term == other.term + + def vars(self): + return self.term.vars() + + def map(self, map_): + return self.lang[AbsOp(self.term.map(map_))] + + def missing(self): + return self.term.missing() + + @simplified + def partial_eval(self): + return AbsOp(self.term.partial_eval()) diff --git a/vendor/jx_base/expressions/add_op.py b/vendor/jx_base/expressions/add_op.py new file mode 100644 index 0000000..a961440 --- /dev/null +++ b/vendor/jx_base/expressions/add_op.py @@ -0,0 +1,26 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.base_multi_op import BaseMultiOp + + +class AddOp(BaseMultiOp): + op = "add" diff --git a/vendor/jx_base/expressions/and_op.py b/vendor/jx_base/expressions/and_op.py new file mode 100644 index 0000000..0edcd63 --- /dev/null +++ b/vendor/jx_base/expressions/and_op.py @@ -0,0 +1,115 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.boolean_op import BooleanOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.true_op import TRUE +from jx_base.language import is_op +from mo_dots import is_many +from mo_future import zip_longest +from mo_json import BOOLEAN + +NotOp = None +OrOp = None + +class AndOp(Expression): + data_type = BOOLEAN + + def __init__(self, terms): + Expression.__init__(self, terms) + if terms == None: + self.terms = [] + elif is_many(terms): + self.terms = terms + else: + self.terms = [terms] + + def __data__(self): + return {"and": [t.__data__() for t in self.terms]} + + def __eq__(self, other): + if is_op(other, AndOp): + return all(a == b for a, b in zip_longest(self.terms, other.terms)) + return False + + def vars(self): + output = set() + for t in self.terms: + output |= t.vars() + return output + + def map(self, map_): + return self.lang[AndOp([t.map(map_) for t in self.terms])] + + def missing(self): + return FALSE + + @simplified + def partial_eval(self): + or_terms = [[]] # LIST OF TUPLES FOR or-ing and and-ing + for i, t in enumerate(self.terms): + simple = self.lang[BooleanOp(t)].partial_eval() + if simple.type != BOOLEAN: + simple = simple.exists() + + if simple is self.lang[TRUE]: + continue + elif simple is FALSE: + return FALSE + elif is_op(simple, AndOp): + for and_terms in or_terms: + and_terms.extend([tt for tt in simple.terms if tt not in and_terms]) + continue + elif is_op(simple, OrOp): + or_terms = [ + and_terms + ([o] if o not in and_terms else []) + for o in simple.terms + for and_terms in or_terms + if self.lang[NotOp(o)].partial_eval() not in and_terms + ] + continue + for and_terms in list(or_terms): + if self.lang[NotOp(simple)].partial_eval() in and_terms: + or_terms.remove(and_terms) + elif simple not in and_terms: + and_terms.append(simple) + + if len(or_terms) == 0: + return FALSE + elif len(or_terms) == 1: + and_terms = or_terms[0] + if len(and_terms) == 0: + return TRUE + elif len(and_terms) == 1: + return and_terms[0] + else: + return self.lang[AndOp(and_terms)] + + return self.lang[ + OrOp( + [ + AndOp(and_terms) if len(and_terms) > 1 else and_terms[0] + for and_terms in or_terms + ] + ) + ].partial_eval() diff --git a/vendor/jx_base/expressions/base_binary_op.py b/vendor/jx_base/expressions/base_binary_op.py new file mode 100644 index 0000000..4c8e4b7 --- /dev/null +++ b/vendor/jx_base/expressions/base_binary_op.py @@ -0,0 +1,78 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import builtin_ops, simplified +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.literal import Literal +from jx_base.expressions.literal import is_literal +from jx_base.expressions.null_op import NULL +from jx_base.expressions.or_op import OrOp +from jx_base.expressions.variable import Variable +from jx_base.language import is_op +from mo_json import NUMBER + + +class BaseBinaryOp(Expression): + has_simple_form = True + data_type = NUMBER + op = None + + def __init__(self, terms, default=NULL): + Expression.__init__(self, terms) + self.lhs, self.rhs = terms + self.default = default + + @property + def name(self): + return self.op + + def __data__(self): + if is_op(self.lhs, Variable) and is_literal(self.rhs): + return {self.op: {self.lhs.var, self.rhs.value}, "default": self.default} + else: + return { + self.op: [self.lhs.__data__(), self.rhs.__data__()], + "default": self.default, + } + + def vars(self): + return self.lhs.vars() | self.rhs.vars() | self.default.vars() + + def map(self, map_): + return self.__class__( + [self.lhs.map(map_), self.rhs.map(map_)], default=self.default.map(map_) + ) + + def missing(self): + if self.default.exists(): + return FALSE + else: + return self.lang[OrOp([self.lhs.missing(), self.rhs.missing()])] + + @simplified + def partial_eval(self): + lhs = self.lhs.partial_eval() + rhs = self.rhs.partial_eval() + default = self.default.partial_eval() + if is_literal(lhs) and is_literal(rhs): + return Literal(builtin_ops[self.op](lhs.value, rhs.value)) + return self.__class__([lhs, rhs], default=default) diff --git a/vendor/jx_base/expressions/base_inequality_op.py b/vendor/jx_base/expressions/base_inequality_op.py new file mode 100644 index 0000000..4b7b536 --- /dev/null +++ b/vendor/jx_base/expressions/base_inequality_op.py @@ -0,0 +1,73 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import builtin_ops, simplified +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.literal import Literal +from jx_base.expressions.literal import is_literal +from jx_base.expressions.variable import Variable +from jx_base.language import is_op +from mo_json import BOOLEAN + + +class BaseInequalityOp(Expression): + has_simple_form = True + data_type = BOOLEAN + op = None + + def __init__(self, terms): + Expression.__init__(self, terms) + self.lhs, self.rhs = terms + + @property + def name(self): + return self.op + + def __data__(self): + if is_op(self.lhs, Variable) and is_literal(self.rhs): + return {self.op: {self.lhs.var, self.rhs.value}} + else: + return {self.op: [self.lhs.__data__(), self.rhs.__data__()]} + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return False + return self.op == other.op and self.lhs == other.lhs and self.rhs == other.rhs + + def vars(self): + return self.lhs.vars() | self.rhs.vars() + + def map(self, map_): + return self.__class__([self.lhs.map(map_), self.rhs.map(map_)]) + + def missing(self): + return FALSE + + @simplified + def partial_eval(self): + lhs = self.lhs.partial_eval() + rhs = self.rhs.partial_eval() + + if is_literal(lhs) and is_literal(rhs): + return Literal(builtin_ops[self.op](lhs, rhs)) + + return self.__class__([lhs, rhs]) diff --git a/vendor/jx_base/expressions/base_multi_op.py b/vendor/jx_base/expressions/base_multi_op.py new file mode 100644 index 0000000..e3d6748 --- /dev/null +++ b/vendor/jx_base/expressions/base_multi_op.py @@ -0,0 +1,142 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified, builtin_ops, operators +from jx_base.expressions.and_op import AndOp +from jx_base.expressions.coalesce_op import CoalesceOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.literal import Literal, ZERO, ONE, is_literal +from jx_base.expressions.null_op import NULL +from jx_base.expressions.or_op import OrOp +from jx_base.expressions.true_op import TRUE +from jx_base.expressions.when_op import WhenOp +from mo_dots import coalesce +from mo_json import NUMBER + + +class BaseMultiOp(Expression): + has_simple_form = True + data_type = NUMBER + op = None + + def __init__(self, terms, **clauses): + Expression.__init__(self, terms) + self.terms = terms + self.default = coalesce(clauses.get("default"), NULL) + self.nulls = coalesce( + clauses.get("nulls"), FALSE + ) # nulls==True WILL HAVE OP RETURN null ONLY IF ALL OPERANDS ARE null + + def __data__(self): + return { + self.op: [t.__data__() for t in self.terms], + "default": self.default, + "nulls": self.nulls, + } + + def vars(self): + output = set() + for t in self.terms: + output |= t.vars() + return output + + def map(self, map_): + return self.__class__( + [t.map(map_) for t in self.terms], + **{"default": self.default, "nulls": self.nulls} + ) + + def missing(self): + if self.nulls: + if self.default is NULL: + return self.lang[AndOp([t.missing() for t in self.terms])] + else: + return TRUE + else: + if self.default is NULL: + return self.lang[OrOp([t.missing() for t in self.terms])] + else: + return FALSE + + def exists(self): + if self.nulls: + return self.lang[OrOp([t.exists() for t in self.terms])] + else: + return self.lang[AndOp([t.exists() for t in self.terms])] + + @simplified + def partial_eval(self): + acc = None + terms = [] + for t in self.terms: + simple = t.partial_eval() + if simple is NULL: + pass + elif is_literal(simple): + if acc is None: + acc = simple.value + else: + acc = builtin_ops[self.op](acc, simple.value) + else: + terms.append(simple) + + lang = self.lang + if len(terms) == 0: + if acc == None: + return self.default.partial_eval() + else: + return lang[Literal(acc)] + elif self.nulls: + # DECISIVE + if acc is not None: + terms.append(Literal(acc)) + + output = lang[ + WhenOp( + AndOp([t.missing() for t in terms]), + **{ + "then": self.default, + "else": operators["basic." + self.op]( + [CoalesceOp([t, _jx_identity[self.op]]) for t in terms] + ), + } + ) + ].partial_eval() + else: + # CONSERVATIVE + if acc is not None: + terms.append(lang[Literal(acc)]) + + output = lang[ + WhenOp( + lang[OrOp([t.missing() for t in terms])], + **{ + "then": self.default, + "else": operators["basic." + self.op](terms), + } + ) + ].partial_eval() + + return output + + +_jx_identity = {"add": ZERO, "mul": ONE} diff --git a/vendor/jx_base/expressions/basic_add_op.py b/vendor/jx_base/expressions/basic_add_op.py new file mode 100644 index 0000000..0378215 --- /dev/null +++ b/vendor/jx_base/expressions/basic_add_op.py @@ -0,0 +1,26 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.basic_multi_op import BasicMultiOp + + +class BasicAddOp(BasicMultiOp): + op = "basic.add" diff --git a/vendor/jx_base/expressions/basic_eq_op.py b/vendor/jx_base/expressions/basic_eq_op.py new file mode 100644 index 0000000..9a4d478 --- /dev/null +++ b/vendor/jx_base/expressions/basic_eq_op.py @@ -0,0 +1,48 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.language import is_op +from mo_json import BOOLEAN + + +class BasicEqOp(Expression): + """ + PLACEHOLDER FOR BASIC `==` OPERATOR (CAN NOT DEAL WITH NULLS) + """ + + data_type = BOOLEAN + + def __init__(self, terms): + Expression.__init__(self, terms) + self.lhs, self.rhs = terms + + def __data__(self): + return {"basic.eq": [self.lhs.__data__(), self.rhs.__data__()]} + + def missing(self): + return FALSE + + def __eq__(self, other): + if not is_op(other, BasicEqOp): + return False + return self.lhs == other.lhs and self.rhs == other.rhs diff --git a/vendor/jx_base/expressions/basic_index_of_op.py b/vendor/jx_base/expressions/basic_index_of_op.py new file mode 100644 index 0000000..08e4d6f --- /dev/null +++ b/vendor/jx_base/expressions/basic_index_of_op.py @@ -0,0 +1,67 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +from __future__ import absolute_import, division, unicode_literals + +from jx_base.language import is_op + +from jx_base.expressions._utils import simplified +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.integer_op import IntegerOp +from jx_base.expressions.literal import ZERO +from jx_base.expressions.max_op import MaxOp +from jx_base.expressions.string_op import StringOp +from mo_json import INTEGER + + +class BasicIndexOfOp(Expression): + """ + PLACEHOLDER FOR BASIC value.indexOf(find, start) (CAN NOT DEAL WITH NULLS) + """ + + data_type = INTEGER + + def __init__(self, params): + Expression.__init__(self, params) + self.value, self.find, self.start = params + + def __data__(self): + return { + "basic.indexOf": [ + self.value.__data__(), + self.find.__data__(), + self.start.__data__(), + ] + } + + def vars(self): + return self.value.vars() | self.find.vars() | self.start.vars() + + def missing(self): + return FALSE + + @simplified + def partial_eval(self): + start = IntegerOp(MaxOp([ZERO, self.start])).partial_eval() + return self.lang[ + BasicIndexOfOp( + [ + StringOp(self.value).partial_eval(), + StringOp(self.find).partial_eval(), + start, + ] + ) + ] + + def __eq__(self, other): + if not is_op(other, BasicIndexOfOp): + return False + return self.value == self.value and self.find == other.find and self.start == other.start diff --git a/vendor/jx_base/expressions/basic_mul_op.py b/vendor/jx_base/expressions/basic_mul_op.py new file mode 100644 index 0000000..c05a17c --- /dev/null +++ b/vendor/jx_base/expressions/basic_mul_op.py @@ -0,0 +1,26 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.basic_multi_op import BasicMultiOp + + +class BasicMulOp(BasicMultiOp): + op = "basic.mul" diff --git a/vendor/jx_base/expressions/basic_multi_op.py b/vendor/jx_base/expressions/basic_multi_op.py new file mode 100644 index 0000000..7c3a52d --- /dev/null +++ b/vendor/jx_base/expressions/basic_multi_op.py @@ -0,0 +1,82 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified, builtin_ops +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.literal import Literal +from jx_base.expressions.null_op import NULL +from jx_base.language import is_op +from mo_json import NUMBER + + +class BasicMultiOp(Expression): + """ + PLACEHOLDER FOR BASIC OPERATOR (CAN NOT DEAL WITH NULLS) + """ + + data_type = NUMBER + op = None + + def __init__(self, terms): + Expression.__init__(self, terms) + self.terms = terms + + def vars(self): + output = set() + for t in self.terms: + output.update(t.vars()) + return output + + def map(self, map): + return self.__class__([t.map(map) for t in self.terms]) + + def __data__(self): + return {self.op: [t.__data__() for t in self.terms]} + + def missing(self): + return FALSE + + @simplified + def partial_eval(self): + acc = None + terms = [] + for t in self.terms: + simple = t.partial_eval() + if simple is NULL: + pass + elif is_op(simple, Literal): + if acc is None: + acc = simple.value + else: + acc = builtin_ops[self.op](acc, simple.value) + else: + terms.append(simple) + if len(terms) == 0: + if acc == None: + return self.default.partial_eval() + else: + return Literal(acc) + else: + if acc is not None: + terms.append(Literal(acc)) + + return self.__class__(terms) diff --git a/vendor/jx_base/expressions/basic_starts_with_op.py b/vendor/jx_base/expressions/basic_starts_with_op.py new file mode 100644 index 0000000..e1caa17 --- /dev/null +++ b/vendor/jx_base/expressions/basic_starts_with_op.py @@ -0,0 +1,63 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.string_op import StringOp +from jx_base.language import is_op +from mo_json import BOOLEAN + + +class BasicStartsWithOp(Expression): + """ + PLACEHOLDER FOR BASIC value.startsWith(find, start) (CAN NOT DEAL WITH NULLS) + """ + + data_type = BOOLEAN + + def __init__(self, params): + Expression.__init__(self, params) + self.value, self.prefix = params + + def __data__(self): + return {"basic.startsWith": [self.value.__data__(), self.prefix.__data__()]} + + def __eq__(self, other): + if is_op(other, BasicStartsWithOp): + return self.value == other.value and self.prefix == other.prefix + + def vars(self): + return self.value.vars() | self.prefix.vars() + + def missing(self): + return FALSE + + @simplified + def partial_eval(self): + return self.lang[ + BasicStartsWithOp( + [ + StringOp(self.value).partial_eval(), + StringOp(self.prefix).partial_eval(), + ] + ) + ] diff --git a/vendor/jx_base/expressions/basic_substring_op.py b/vendor/jx_base/expressions/basic_substring_op.py new file mode 100644 index 0000000..c6f7f4f --- /dev/null +++ b/vendor/jx_base/expressions/basic_substring_op.py @@ -0,0 +1,48 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from mo_json import STRING + + +class BasicSubstringOp(Expression): + """ + PLACEHOLDER FOR BASIC value.substring(start, end) (CAN NOT DEAL WITH NULLS) + """ + + data_type = STRING + + def __init__(self, terms): + Expression.__init__(self, terms) + self.value, self.start, self.end = terms + + def __data__(self): + return { + "basic.substring": [ + self.value.__data__(), + self.start.__data__(), + self.end.__data__(), + ] + } + + def missing(self): + return FALSE diff --git a/vendor/jx_base/expressions/between_op.py b/vendor/jx_base/expressions/between_op.py new file mode 100644 index 0000000..db7046b --- /dev/null +++ b/vendor/jx_base/expressions/between_op.py @@ -0,0 +1,183 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import jx_expression, simplified +from jx_base.expressions.add_op import AddOp +from jx_base.expressions.basic_substring_op import BasicSubstringOp +from jx_base.expressions.case_op import CaseOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.find_op import FindOp +from jx_base.expressions.is_number_op import IsNumberOp +from jx_base.expressions.length_op import LengthOp +from jx_base.expressions.literal import Literal, ZERO, is_literal +from jx_base.expressions.max_op import MaxOp +from jx_base.expressions.min_op import MinOp +from jx_base.expressions.null_op import NULL +from jx_base.expressions.variable import Variable +from jx_base.expressions.when_op import WhenOp +from jx_base.language import is_op +from mo_dots import is_data, is_sequence, wrap, coalesce +from mo_json import STRING +from mo_logs import Log + + +class BetweenOp(Expression): + data_type = STRING + + def __init__(self, value, prefix, suffix, default=NULL, start=NULL): + Expression.__init__(self, []) + self.value = value + self.prefix = coalesce(prefix, NULL) + self.suffix = coalesce(suffix, NULL) + self.default = coalesce(default, NULL) + self.start = coalesce(start, NULL) + if is_literal(self.prefix) and is_literal(self.suffix): + pass + else: + Log.error("Expecting literal prefix and suffix only") + + @classmethod + def define(cls, expr): + term = expr.between + if is_sequence(term): + return cls.lang[ + BetweenOp( + value=jx_expression(term[0]), + prefix=jx_expression(term[1]), + suffix=jx_expression(term[2]), + default=jx_expression(expr.default), + start=jx_expression(expr.start), + ) + ] + elif is_data(term): + var, vals = term.items()[0] + if is_sequence(vals) and len(vals) == 2: + return cls.lang[ + BetweenOp( + value=Variable(var), + prefix=Literal(vals[0]), + suffix=Literal(vals[1]), + default=jx_expression(expr.default), + start=jx_expression(expr.start), + ) + ] + else: + Log.error( + "`between` parameters are expected to be in {var: [prefix, suffix]} form" + ) + else: + Log.error( + "`between` parameters are expected to be in {var: [prefix, suffix]} form" + ) + + def vars(self): + return ( + self.value.vars() + | self.prefix.vars() + | self.suffix.vars() + | self.default.vars() + | self.start.vars() + ) + + def map(self, map_): + return BetweenOp( + self.value.map(map_), + self.prefix.map(map_), + self.suffix.map(map_), + default=self.default.map(map_), + start=self.start.map(map_), + ) + + def __data__(self): + if ( + is_op(self.value, Variable) + and is_literal(self.prefix) + and is_literal(self.suffix) + ): + output = wrap( + {"between": {self.value.var: [self.prefix.value, self.suffix.value]}} + ) + else: + output = wrap( + { + "between": [ + self.value.__data__(), + self.prefix.__data__(), + self.suffix.__data__(), + ] + } + ) + if self.start: + output.start = self.start.__data__() + if self.default: + output.default = self.default.__data__() + return output + + @simplified + def partial_eval(self): + value = self.value.partial_eval() + + start_index = self.lang[ + CaseOp( + [ + WhenOp(self.prefix.missing(), **{"then": ZERO}), + WhenOp( + IsNumberOp(self.prefix), **{"then": MaxOp([ZERO, self.prefix])} + ), + FindOp([value, self.prefix], start=self.start), + ] + ) + ].partial_eval() + + len_prefix = self.lang[ + CaseOp( + [ + WhenOp(self.prefix.missing(), **{"then": ZERO}), + WhenOp(IsNumberOp(self.prefix), **{"then": ZERO}), + LengthOp(self.prefix), + ] + ) + ].partial_eval() + + end_index = self.lang[ + CaseOp( + [ + WhenOp(start_index.missing(), **{"then": NULL}), + WhenOp(self.suffix.missing(), **{"then": LengthOp(value)}), + WhenOp( + IsNumberOp(self.suffix), + **{"then": MinOp([self.suffix, LengthOp(value)])} + ), + FindOp( + [value, self.suffix], start=AddOp([start_index, len_prefix]) + ), + ] + ) + ].partial_eval() + + start_index = AddOp([start_index, len_prefix]).partial_eval() + substring = BasicSubstringOp([value, start_index, end_index]).partial_eval() + + between = self.lang[ + WhenOp(end_index.missing(), **{"then": self.default, "else": substring}) + ].partial_eval() + + return between diff --git a/vendor/jx_base/expressions/boolean_op.py b/vendor/jx_base/expressions/boolean_op.py new file mode 100644 index 0000000..6e76c3a --- /dev/null +++ b/vendor/jx_base/expressions/boolean_op.py @@ -0,0 +1,62 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.null_op import NULL +from jx_base.expressions.true_op import TRUE +from mo_json import BOOLEAN + + +class BooleanOp(Expression): + data_type = BOOLEAN + + def __init__(self, term): + Expression.__init__(self, [term]) + self.term = term + + def __data__(self): + return {"boolean": self.term.__data__()} + + def vars(self): + return self.term.vars() + + def map(self, map_): + return self.lang[BooleanOp(self.term.map(map_))] + + def missing(self): + return self.term.missing() + + @simplified + def partial_eval(self): + term = self.lang[self.term].partial_eval() + if term is TRUE: + return TRUE + elif term in (FALSE, NULL): + return FALSE + elif term.type is BOOLEAN: + return term + elif term is self.term: + return self + + exists = self.lang[term].exists().partial_eval() + return exists diff --git a/vendor/jx_base/expressions/case_op.py b/vendor/jx_base/expressions/case_op.py new file mode 100644 index 0000000..98de2b0 --- /dev/null +++ b/vendor/jx_base/expressions/case_op.py @@ -0,0 +1,124 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import first_op, not_op, eq_op +from jx_base.expressions._utils import simplified +from jx_base.expressions.and_op import AndOp +from jx_base.expressions.not_op import NotOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.literal import NULL +from jx_base.expressions.or_op import OrOp +from jx_base.expressions.true_op import TRUE +from jx_base.expressions.when_op import WhenOp +from jx_base.language import is_op +from mo_dots import is_sequence +from mo_future import first +from mo_json import OBJECT, BOOLEAN +from mo_logs import Log + + +class CaseOp(Expression): + def __init__(self, terms, **clauses): + if not is_sequence(terms): + Log.error("case expression requires a list of `when` sub-clauses") + Expression.__init__(self, terms) + if len(terms) == 0: + Log.error("Expecting at least one clause") + + for w in terms[:-1]: + if not is_op(w, WhenOp) or w.els_ is not NULL: + Log.error( + "case expression does not allow `else` clause in `when` sub-clause" + ) + self.whens = terms + + def __data__(self): + return {"case": [w.__data__() for w in self.whens]} + + def __eq__(self, other): + if is_op(other, CaseOp): + return all(s == o for s, o in zip(self.whens, other.whens)) + + def vars(self): + output = set() + for w in self.whens: + output |= w.vars() + return output + + def map(self, map_): + return self.lang[CaseOp([w.map(map_) for w in self.whens])] + + def missing(self): + m = self.whens[-1].missing() + for w in reversed(self.whens[0:-1]): + when = w.when.partial_eval() + if when is FALSE: + pass + elif when is TRUE: + m = w.then.partial_eval().missing() + else: + m = self.lang[OrOp([AndOp([when, w.then.partial_eval().missing()]), m])] + return m.partial_eval() + + @simplified + def partial_eval(self): + if self.type == BOOLEAN: + nots = [] + ors = [] + for w in self.whens[:-1]: + ors.append(AndOp(nots + [w.when, w.then])) + nots.append(NotOp(w.when)) + ors.append(AndOp(nots + [self.whens[-1]])) + return self.lang[OrOp(ors)].partial_eval() + + whens = [] + for w in self.whens[:-1]: + when = self.lang[w.when].partial_eval() + if when is TRUE: + whens.append(self.lang[w.then].partial_eval()) + break + elif when is FALSE: + pass + else: + whens.append(self.lang[WhenOp(when, **{"then": w.then.partial_eval()})]) + else: + whens.append(self.lang[self.whens[-1]].partial_eval()) + + if len(whens) == 1: + return whens[0] + elif len(whens) == 2: + return self.lang[WhenOp(whens[0].when, **{"then": whens[0].then, "else": whens[1]})] + else: + return self.lang[CaseOp(whens)] + + @property + def type(self): + types = set(w.then.type if is_op(w, WhenOp) else w.type for w in self.whens) + if len(types) > 1: + return OBJECT + else: + return first(types) + + +first_op.CaseOp = CaseOp +not_op.CaseOp = CaseOp +eq_op.CaseOp = CaseOp diff --git a/vendor/jx_base/expressions/coalesce_op.py b/vendor/jx_base/expressions/coalesce_op.py new file mode 100644 index 0000000..95ea160 --- /dev/null +++ b/vendor/jx_base/expressions/coalesce_op.py @@ -0,0 +1,78 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.literal import is_literal +from jx_base.expressions.null_op import NULL +from jx_base.expressions._utils import simplified +from jx_base.expressions.and_op import AndOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.first_op import FirstOp +from jx_base.language import is_op + + +class CoalesceOp(Expression): + has_simple_form = True + + def __init__(self, terms): + Expression.__init__(self, terms) + self.terms = terms + + def __data__(self): + return {"coalesce": [t.__data__() for t in self.terms]} + + def __eq__(self, other): + if is_op(other, CoalesceOp): + if len(self.terms) == len(other.terms): + return all(s == o for s, o in zip(self.terms, other.terms)) + return False + + def missing(self): + # RETURN true FOR RECORDS THE WOULD RETURN NULL + return self.lang[AndOp([v.missing() for v in self.terms])] + + def vars(self): + output = set() + for v in self.terms: + output |= v.vars() + return output + + def map(self, map_): + return self.lang[CoalesceOp([v.map(map_) for v in self.terms])] + + @simplified + def partial_eval(self): + terms = [] + for t in self.terms: + simple = self.lang[FirstOp(t)].partial_eval() + if simple is NULL: + pass + elif is_literal(simple): + terms.append(simple) + break + else: + terms.append(simple) + + if len(terms) == 0: + return NULL + elif len(terms) == 1: + return terms[0] + else: + return self.lang[CoalesceOp(terms)] diff --git a/vendor/jx_base/expressions/concat_op.py b/vendor/jx_base/expressions/concat_op.py new file mode 100644 index 0000000..64a54ac --- /dev/null +++ b/vendor/jx_base/expressions/concat_op.py @@ -0,0 +1,86 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# + +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import jx_expression +from jx_base.expressions.and_op import AndOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.literal import Literal +from jx_base.expressions.literal import is_literal +from jx_base.expressions.null_op import NULL +from jx_base.expressions.variable import Variable +from jx_base.language import is_op +from jx_base.utils import is_variable_name +from mo_dots import is_data +from mo_future import first, is_text +from mo_json import STRING +from mo_logs import Log + + +class ConcatOp(Expression): + has_simple_form = True + data_type = STRING + + def __init__(self, terms, **clauses): + Expression.__init__(self, terms) + if is_data(terms): + self.terms = first(terms.items()) + else: + self.terms = terms + self.separator = clauses.get(str("separator"), Literal("")) + self.default = clauses.get(str("default"), NULL) + if not is_literal(self.separator): + Log.error("Expecting a literal separator") + + @classmethod + def define(cls, expr): + terms = expr["concat"] + if is_data(terms): + k, v = first(terms.items()) + terms = [Variable(k), Literal(v)] + else: + terms = [jx_expression(t) for t in terms] + + return cls.lang[ + ConcatOp( + terms, + **{ + k: Literal(v) + if is_text(v) and not is_variable_name(v) + else jx_expression(v) + for k, v in expr.items() + if k in ["default", "separator"] + } + ) + ] + + def __data__(self): + f, s = self.terms[0], self.terms[1] + if is_op(f, Variable) and is_literal(s): + output = {"concat": {f.var: s.value}} + else: + output = {"concat": [t.__data__() for t in self.terms]} + if self.separator.json != '""': + output["separator"] = self.separator.__data__() + return output + + def vars(self): + if not self.terms: + return set() + return set.union(*(t.vars() for t in self.terms)) + + def map(self, map_): + return self.lang[ + ConcatOp([t.map(map_) for t in self.terms], separator=self.separator) + ] + + def missing(self): + return self.lang[ + AndOp([t.missing() for t in self.terms] + [self.default.missing()]) + ].partial_eval() diff --git a/vendor/jx_base/expressions/count_op.py b/vendor/jx_base/expressions/count_op.py new file mode 100644 index 0000000..76bd6f4 --- /dev/null +++ b/vendor/jx_base/expressions/count_op.py @@ -0,0 +1,55 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.expression import Expression +from jx_base.expressions.true_op import TrueOp +from jx_base.expressions.tuple_op import TupleOp +from mo_dots import is_many +from mo_json import INTEGER + + +class CountOp(Expression): + has_simple_form = False + data_type = INTEGER + + def __init__(self, terms, **clauses): + Expression.__init__(self, terms) + if is_many(terms): + # SHORTCUT: ASSUME AN ARRAY OF IS A TUPLE + self.terms = self.lang[TupleOp(terms)] + else: + self.terms = terms + + def __data__(self): + return {"count": self.terms.__data__()} + + def vars(self): + return self.terms.vars() + + def map(self, map_): + return self.lang[CountOp(self.terms.map(map_))] + + def missing(self): + return FALSE + + def exists(self): + return TrueOp diff --git a/vendor/jx_base/expressions/date_op.py b/vendor/jx_base/expressions/date_op.py new file mode 100644 index 0000000..d037a42 --- /dev/null +++ b/vendor/jx_base/expressions/date_op.py @@ -0,0 +1,57 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import literal +from jx_base.expressions.literal import Literal +from mo_dots import coalesce +from mo_future import is_text +from mo_json import NUMBER +from mo_times.dates import unicode2Date, Date + + +class DateOp(Literal): + date_type = NUMBER + + def __init__(self, term): + if hasattr(self, "date"): + return + if is_text(term): + self.date = term + else: + self.date = coalesce(term.get("literal"), term) + v = unicode2Date(self.date) + if isinstance(v, Date): + Literal.__init__(self, v.unix) + else: + Literal.__init__(self, v.seconds) + + @classmethod + def define(cls, expr): + return cls.lang[DateOp(expr.get("date"))] + + def __data__(self): + return {"date": self.date} + + def __call__(self, row=None, rownum=None, rows=None): + return Date(self.date) + + +literal.DateOp=DateOp \ No newline at end of file diff --git a/vendor/jx_base/expressions/div_op.py b/vendor/jx_base/expressions/div_op.py new file mode 100644 index 0000000..b044ef6 --- /dev/null +++ b/vendor/jx_base/expressions/div_op.py @@ -0,0 +1,54 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified, builtin_ops +from jx_base.expressions.and_op import AndOp +from jx_base.expressions.base_binary_op import BaseBinaryOp +from jx_base.expressions.eq_op import EqOp +from jx_base.expressions.literal import Literal, ZERO, is_literal +from jx_base.expressions.or_op import OrOp + + +class DivOp(BaseBinaryOp): + op = "div" + + def missing(self): + return self.lang[ + AndOp( + [ + self.default.missing(), + OrOp( + [self.lhs.missing(), self.rhs.missing(), EqOp([self.rhs, ZERO])] + ), + ] + ) + ].partial_eval() + + @simplified + def partial_eval(self): + default = self.default.partial_eval() + rhs = self.rhs.partial_eval() + if rhs is ZERO: + return default + lhs = self.lhs.partial_eval() + if is_literal(lhs) and is_literal(rhs): + return Literal(builtin_ops[self.op](lhs.value, rhs.value)) + return self.__class__([lhs, rhs], default=default) diff --git a/vendor/jx_base/expressions/eq_op.py b/vendor/jx_base/expressions/eq_op.py new file mode 100644 index 0000000..8630564 --- /dev/null +++ b/vendor/jx_base/expressions/eq_op.py @@ -0,0 +1,104 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.and_op import AndOp +from jx_base.expressions.basic_eq_op import BasicEqOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.literal import is_literal +from jx_base.expressions.true_op import TRUE +from jx_base.expressions.variable import Variable +from jx_base.language import is_op, value_compare +from mo_dots import is_many +from mo_json import BOOLEAN + +CaseOp = None +InOp = None +WhneOp = None + +class EqOp(Expression): + has_simple_form = True + data_type = BOOLEAN + + def __new__(cls, terms): + if is_many(terms): + return object.__new__(cls) + + items = terms.items() + if len(items) == 1: + if is_many(items[0][1]): + return cls.lang[InOp(items[0])] + else: + return cls.lang[EqOp(items[0])] + else: + acc = [] + for lhs, rhs in items: + if rhs.json.startswith("["): + acc.append(cls.lang[InOp([Variable(lhs), rhs])]) + else: + acc.append(cls.lang[EqOp([Variable(lhs), rhs])]) + return cls.lang[AndOp(acc)] + + def __init__(self, terms): + Expression.__init__(self, terms) + self.lhs, self.rhs = terms + + def __data__(self): + if is_op(self.lhs, Variable) and is_literal(self.rhs): + return {"eq": {self.lhs.var, self.rhs.value}} + else: + return {"eq": [self.lhs.__data__(), self.rhs.__data__()]} + + def __eq__(self, other): + if is_op(other, EqOp): + return self.lhs == other.lhs and self.rhs == other.rhs + return False + + def vars(self): + return self.lhs.vars() | self.rhs.vars() + + def map(self, map_): + return self.lang[EqOp([self.lhs.map(map_), self.rhs.map(map_)])] + + def missing(self): + return FALSE + + def exists(self): + return TRUE + + @simplified + def partial_eval(self): + lhs = self.lang[self.lhs].partial_eval() + rhs = self.lang[self.rhs].partial_eval() + + if is_literal(lhs) and is_literal(rhs): + return FALSE if value_compare(lhs.value, rhs.value) else TRUE + else: + return self.lang[ + self.lang[CaseOp( + [ + WhenOp(lhs.missing(), **{"then": rhs.missing()}), + WhenOp(rhs.missing(), **{"then": FALSE}), + BasicEqOp([lhs, rhs]), + ] + )] + ].partial_eval() diff --git a/vendor/jx_base/expressions/es_nested_op.py b/vendor/jx_base/expressions/es_nested_op.py new file mode 100644 index 0000000..1aa98e1 --- /dev/null +++ b/vendor/jx_base/expressions/es_nested_op.py @@ -0,0 +1,50 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.expression import Expression +from jx_base.language import is_op +from mo_json import BOOLEAN + + +class EsNestedOp(Expression): + data_type = BOOLEAN + has_simple_form = False + + def __init__(self, terms): + Expression.__init__(self, terms) + self.path, self.query = terms + + @simplified + def partial_eval(self): + if self.path.var == ".": + return self.query.partial_eval() + return self.lang[ + EsNestedOp("es.nested", [self.path, self.query.partial_eval()]) + ] + + def __data__(self): + return {"es.nested": {self.path.var: self.query.__data__()}} + + def __eq__(self, other): + if is_op(other, EsNestedOp): + return self.path.var == other.path.var and self.query == other.query + return False diff --git a/vendor/jx_base/expressions/es_script.py b/vendor/jx_base/expressions/es_script.py new file mode 100644 index 0000000..27b11d5 --- /dev/null +++ b/vendor/jx_base/expressions/es_script.py @@ -0,0 +1,30 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression + + +class EsScript(Expression): + """ + REPRESENT A Painless SCRIPT + """ + + pass diff --git a/vendor/jx_base/expressions/exists_op.py b/vendor/jx_base/expressions/exists_op.py new file mode 100644 index 0000000..3694474 --- /dev/null +++ b/vendor/jx_base/expressions/exists_op.py @@ -0,0 +1,54 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from mo_json import BOOLEAN + +NotOp = None + + +class ExistsOp(Expression): + data_type = BOOLEAN + + def __init__(self, term): + Expression.__init__(self, [term]) + self.field = term + + def __data__(self): + return {"exists": self.field.__data__()} + + def vars(self): + return self.field.vars() + + def map(self, map_): + return self.lang[ExistsOp(self.field.map(map_))] + + def missing(self): + return FALSE + + def exists(self): + return TRUE + + @simplified + def partial_eval(self): + return self.lang[NotOp(self.field.missing())].partial_eval() diff --git a/vendor/jx_base/expressions/exp_op.py b/vendor/jx_base/expressions/exp_op.py new file mode 100644 index 0000000..7fe8dcd --- /dev/null +++ b/vendor/jx_base/expressions/exp_op.py @@ -0,0 +1,26 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.base_binary_op import BaseBinaryOp + + +class ExpOp(BaseBinaryOp): + op = "exp" diff --git a/vendor/jx_base/expressions/expression.py b/vendor/jx_base/expressions/expression.py new file mode 100644 index 0000000..90ab1ba --- /dev/null +++ b/vendor/jx_base/expressions/expression.py @@ -0,0 +1,178 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import operators, jx_expression, _jx_expression, simplified +from jx_base.language import BaseExpression, ID, is_expression, is_op +from mo_dots import is_data, is_sequence, is_container +from mo_future import items as items_, text +from mo_json import BOOLEAN, OBJECT, value2json +from mo_logs import Log + +FALSE, Literal, is_literal, MissingOp, NotOp, NULL, Variable = [None]*7 + + +class Expression(BaseExpression): + data_type = OBJECT + has_simple_form = False + + def __init__(self, args): + self.simplified = False + # SOME BASIC VERIFICATION THAT THESE ARE REASONABLE PARAMETERS + if is_sequence(args): + bad = [t for t in args if t != None and not is_expression(t)] + if bad: + Log.error("Expecting an expression, not {{bad}}", bad=bad) + elif is_data(args): + if not all(is_op(k, Variable) and is_literal(v) for k, v in args.items()): + Log.error("Expecting an {: }") + elif args == None: + pass + else: + if not is_expression(args): + Log.error("Expecting an expression") + + @classmethod + def get_id(cls): + return getattr(cls, ID) + + @classmethod + def define(cls, expr): + """ + GENERAL SUPPORT FOR BUILDING EXPRESSIONS FROM JSON EXPRESSIONS + OVERRIDE THIS IF AN OPERATOR EXPECTS COMPLICATED PARAMETERS + :param expr: Data representing a JSON Expression + :return: parse tree + """ + + try: + lang = cls.lang + items = items_(expr) + for item in items: + op, term = item + full_op = operators.get(op) + if full_op: + class_ = lang.ops[full_op.get_id()] + clauses = {k: jx_expression(v) for k, v in expr.items() if k != op} + break + else: + if not items: + return NULL + raise Log.error( + "{{operator|quote}} is not a known operator", operator=expr + ) + + if term == None: + return class_([], **clauses) + elif is_container(term): + terms = [jx_expression(t) for t in term] + return class_(terms, **clauses) + elif is_data(term): + items = items_(term) + if class_.has_simple_form: + if len(items) == 1: + k, v = items[0] + return class_([Variable(k), Literal(v)], **clauses) + else: + return class_({k: Literal(v) for k, v in items}, **clauses) + else: + return class_(_jx_expression(term, lang), **clauses) + else: + if op in ["literal", "date", "offset"]: + return class_(term, **clauses) + else: + return class_(_jx_expression(term, lang), **clauses) + except Exception as e: + Log.error("programmer error expr = {{value|quote}}", value=expr, cause=e) + + @property + def name(self): + return self.__class__.__name__ + + @property + def many(self): + """ + :return: True IF THE EXPRESSION RETURNS A MULTIVALUE (WHICH IS NOT A LIST OR A TUPLE) + """ + return False + + def __data__(self): + raise NotImplementedError + + def vars(self): + raise Log.error("{{type}} has no `vars` method", type=self.__class__.__name__) + + def map(self, map): + raise Log.error("{{type}} has no `map` method", type=self.__class__.__name__) + + def missing(self): + """ + THERE IS PLENTY OF OPPORTUNITY TO SIMPLIFY missing EXPRESSIONS + OVERRIDE THIS METHOD TO SIMPLIFY + :return: + """ + if self.type == BOOLEAN: + Log.error("programmer error") + return self.lang[MissingOp(self)] + + def exists(self): + """ + THERE IS PLENTY OF OPPORTUNITY TO SIMPLIFY exists EXPRESSIONS + OVERRIDE THIS METHOD TO SIMPLIFY + :return: + """ + return self.lang[NotOp(self.missing()).partial_eval()] + + def is_true(self): + """ + :return: True, IF THIS EXPRESSION ALWAYS RETURNS BOOLEAN true + """ + return FALSE # GOOD DEFAULT ASSUMPTION + + def is_false(self): + """ + :return: True, IF THIS EXPRESSION ALWAYS RETURNS BOOLEAN false + """ + return FALSE # GOOD DEFAULT ASSUMPTION + + @simplified + def partial_eval(self): + """ + ATTEMPT TO SIMPLIFY THE EXPRESSION: + PREFERABLY RETURNING A LITERAL, BUT MAYBE A SIMPLER EXPRESSION, OR self IF NOT POSSIBLE + """ + return self + + @property + def type(self): + return self.data_type + + def __eq__(self, other): + if other is None: + return False + if self.get_id() != other.get_id(): + return False + self_class = self.__class__ + Log.note("this is slow on {{type}}", type=text(self_class.__name__)) + return self.__data__() == other.__data__() + + def __str__(self): + return value2json(self.__data__(), pretty=True) + diff --git a/vendor/jx_base/expressions/false_op.py b/vendor/jx_base/expressions/false_op.py new file mode 100644 index 0000000..ccf879c --- /dev/null +++ b/vendor/jx_base/expressions/false_op.py @@ -0,0 +1,82 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import literal, expression +from jx_base.expressions.literal import Literal +from mo_json import BOOLEAN + +TRUE = None + + +class FalseOp(Literal): + data_type = BOOLEAN + + def __new__(cls, *args, **kwargs): + return object.__new__(cls, *args, **kwargs) + + def __init__(self, op=None, term=None): + Literal.__init__(self, False) + + @classmethod + def define(cls, expr): + return FALSE + + def __nonzero__(self): + return False + + def __eq__(self, other): + return (other is FALSE) or (other is False) + + def __data__(self): + return False + + def vars(self): + return set() + + def map(self, map_): + return self + + def missing(self): + return FALSE + + def is_true(self): + return FALSE + + def is_false(self): + return TRUE + + def __call__(self, row=None, rownum=None, rows=None): + return False + + def __unicode__(self): + return "false" + + def __str__(self): + return b"false" + + def __bool__(self): + return False + + +FALSE = FalseOp() + +expression.FALSE = FALSE +literal.FALSE = FALSE diff --git a/vendor/jx_base/expressions/find_op.py b/vendor/jx_base/expressions/find_op.py new file mode 100644 index 0000000..0f8f7dd --- /dev/null +++ b/vendor/jx_base/expressions/find_op.py @@ -0,0 +1,75 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression +from jx_base.expressions.literal import ZERO +from jx_base.expressions.literal import is_literal +from jx_base.expressions.null_op import NULL +from jx_base.expressions.variable import Variable +from jx_base.language import is_op +from mo_json import INTEGER + + +class FindOp(Expression): + """ + RETURN INDEX OF find IN value, ELSE RETURN null + """ + + has_simple_form = True + data_type = INTEGER + + def __init__(self, term, **kwargs): + Expression.__init__(self, term) + self.value, self.find = term + self.default = kwargs.get("default", NULL) + self.start = kwargs.get("start", ZERO).partial_eval() + if self.start is NULL: + self.start = ZERO + + def __data__(self): + if is_op(self.value, Variable) and is_literal(self.find): + output = { + "find": {self.value.var, self.find.value}, + "start": self.start.__data__(), + } + else: + output = { + "find": [self.value.__data__(), self.find.__data__()], + "start": self.start.__data__(), + } + if self.default is not NULL: + output["default"] = self.default.__data__() + return output + + def vars(self): + return ( + self.value.vars() + | self.find.vars() + | self.default.vars() + | self.start.vars() + ) + + def map(self, map_): + return FindOp( + [self.value.map(map_), self.find.map(map_)], + start=self.start.map(map_), + default=self.default.map(map_), + ) diff --git a/vendor/jx_base/expressions/first_op.py b/vendor/jx_base/expressions/first_op.py new file mode 100644 index 0000000..c89f13c --- /dev/null +++ b/vendor/jx_base/expressions/first_op.py @@ -0,0 +1,79 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.expression import Expression +from jx_base.expressions.last_op import LastOp +from jx_base.expressions.literal import is_literal +from jx_base.language import is_op +from mo_json import OBJECT +from mo_logs import Log + +CaseOp = None +WhenOp = None + + +class FirstOp(Expression): + def __init__(self, term): + Expression.__init__(self, [term]) + self.term = term + self.data_type = self.term.type + + def __data__(self): + return {"first": self.term.__data__()} + + def vars(self): + return self.term.vars() + + def map(self, map_): + return self.lang[LastOp(self.term.map(map_))] + + def missing(self): + return self.term.missing() + + @simplified + def partial_eval(self): + term = self.lang[self.term].partial_eval() + if is_op(term, FirstOp): + return term + elif is_op(term, CaseOp): # REWRITING + return self.lang[ + CaseOp( + [ + WhenOp(t.when, **{"then": FirstOp(t.then)}) + for t in term.whens[:-1] + ] + + [FirstOp(term.whens[-1])] + ) + ].partial_eval() + elif is_op(term, WhenOp): + return self.lang[ + WhenOp( + term.when, + **{"then": FirstOp(term.then), "else": FirstOp(term.els_)} + ) + ].partial_eval() + elif term.type != OBJECT and not term.many: + return term + elif is_literal(term): + Log.error("not handled yet") + else: + return self.lang[FirstOp(term)] diff --git a/vendor/jx_base/expressions/floor_op.py b/vendor/jx_base/expressions/floor_op.py new file mode 100644 index 0000000..4c79f29 --- /dev/null +++ b/vendor/jx_base/expressions/floor_op.py @@ -0,0 +1,72 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.eq_op import EqOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.literal import ZERO, ONE +from jx_base.expressions.literal import is_literal +from jx_base.expressions.null_op import NULL +from jx_base.expressions.or_op import OrOp +from jx_base.expressions.variable import Variable +from jx_base.language import is_op +from mo_json import NUMBER + + +class FloorOp(Expression): + has_simple_form = True + data_type = NUMBER + + def __init__(self, terms, default=NULL): + Expression.__init__(self, terms) + if len(terms) == 1: + self.lhs = terms[0] + self.rhs = ONE + else: + self.lhs, self.rhs = terms + self.default = default + + def __data__(self): + if is_op(self.lhs, Variable) and is_literal(self.rhs): + return {"floor": {self.lhs.var, self.rhs.value}, "default": self.default} + else: + return { + "floor": [self.lhs.__data__(), self.rhs.__data__()], + "default": self.default, + } + + def vars(self): + return self.lhs.vars() | self.rhs.vars() | self.default.vars() + + def map(self, map_): + return self.lang[ + FloorOp( + [self.lhs.map(map_), self.rhs.map(map_)], default=self.default.map(map_) + ) + ] + + def missing(self): + if self.default.exists(): + return FALSE + else: + return self.lang[ + OrOp([self.lhs.missing(), self.rhs.missing(), EqOp([self.rhs, ZERO])]) + ] diff --git a/vendor/jx_base/expressions/from_unix_op.py b/vendor/jx_base/expressions/from_unix_op.py new file mode 100644 index 0000000..df39eab --- /dev/null +++ b/vendor/jx_base/expressions/from_unix_op.py @@ -0,0 +1,44 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression +from mo_json import NUMBER + + +class FromUnixOp(Expression): + """ + FOR USING ON DATABASES WHICH HAVE A DATE COLUMNS: CONVERT TO UNIX + """ + + data_type = NUMBER + + def __init__(self, term): + Expression.__init__(self, term) + self.value = term + + def vars(self): + return self.value.vars() + + def map(self, map_): + return self.lang[FromUnixOp(self.value.map(map_))] + + def missing(self): + return self.value.missing() diff --git a/vendor/jx_base/expressions/get_op.py b/vendor/jx_base/expressions/get_op.py new file mode 100644 index 0000000..31cd93f --- /dev/null +++ b/vendor/jx_base/expressions/get_op.py @@ -0,0 +1,49 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression +from jx_base.expressions.literal import is_literal + + +class GetOp(Expression): + has_simple_form = True + + def __init__(self, term): + Expression.__init__(self, term) + self.var = term[0] + self.offsets = term[1:] + + def __data__(self): + if is_literal(self.var) and len(self.offsets) == 1 and is_literal(self.offset): + return {"get": {self.var.json, self.offsets[0].value}} + else: + return {"get": [self.var.__data__()] + [o.__data__() for o in self.offsets]} + + def vars(self): + output = self.var.vars() + for o in self.offsets: + output |= o.vars() + return output + + def map(self, map_): + return self.lang[ + GetOp([self.var.map(map_)] + [o.map(map_) for o in self.offsets]) + ] diff --git a/vendor/jx_base/expressions/gt_op.py b/vendor/jx_base/expressions/gt_op.py new file mode 100644 index 0000000..a2afe8d --- /dev/null +++ b/vendor/jx_base/expressions/gt_op.py @@ -0,0 +1,26 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.base_inequality_op import BaseInequalityOp + + +class GtOp(BaseInequalityOp): + op = "gt" diff --git a/vendor/jx_base/expressions/gte_op.py b/vendor/jx_base/expressions/gte_op.py new file mode 100644 index 0000000..cd1bd96 --- /dev/null +++ b/vendor/jx_base/expressions/gte_op.py @@ -0,0 +1,26 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.base_inequality_op import BaseInequalityOp + + +class GteOp(BaseInequalityOp): + op = "gte" diff --git a/vendor/jx_base/expressions/in_op.py b/vendor/jx_base/expressions/in_op.py new file mode 100644 index 0000000..b36fcc3 --- /dev/null +++ b/vendor/jx_base/expressions/in_op.py @@ -0,0 +1,85 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import eq_op +from jx_base.expressions._utils import simplified +from jx_base.expressions.eq_op import EqOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.literal import Literal +from jx_base.expressions.literal import is_literal +from jx_base.expressions.null_op import NULL +from jx_base.expressions.variable import Variable +from jx_base.language import is_op +from mo_dots import is_many +from mo_json import BOOLEAN + + +class InOp(Expression): + has_simple_form = True + data_type = BOOLEAN + + def __new__(cls, terms): + if is_op(terms[0], Variable) and is_op(terms[1], Literal): + name, value = terms + if not is_many(value.value): + return cls.lang[EqOp([name, Literal([value.value])])] + return object.__new__(cls) + + def __init__(self, term): + Expression.__init__(self, term) + self.value, self.superset = term + + def __data__(self): + if is_op(self.value, Variable) and is_literal(self.superset): + return {"in": {self.value.var: self.superset.value}} + else: + return {"in": [self.value.__data__(), self.superset.__data__()]} + + def __eq__(self, other): + if is_op(other, InOp): + return self.value == other.value and self.superset == other.superset + return False + + def vars(self): + return self.value.vars() + + def map(self, map_): + return self.lang[InOp([self.value.map(map_), self.superset.map(map_)])] + + @simplified + def partial_eval(self): + value = self.value.partial_eval() + superset = self.superset.partial_eval() + if superset is NULL: + return FALSE + elif is_literal(value) and is_literal(superset): + return self.lang[Literal(self())] + else: + return self.lang[InOp([value, superset])] + + def __call__(self): + return self.value() in self.superset() + + def missing(self): + return FALSE + +eq_op.InOp = InOp diff --git a/vendor/jx_base/expressions/integer_op.py b/vendor/jx_base/expressions/integer_op.py new file mode 100644 index 0000000..3d7b78b --- /dev/null +++ b/vendor/jx_base/expressions/integer_op.py @@ -0,0 +1,56 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.coalesce_op import CoalesceOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.first_op import FirstOp +from jx_base.language import is_op +from mo_json import INTEGER + + +class IntegerOp(Expression): + data_type = INTEGER + + def __init__(self, term): + Expression.__init__(self, [term]) + self.term = term + + def __data__(self): + return {"integer": self.term.__data__()} + + def vars(self): + return self.term.vars() + + def map(self, map_): + return self.lang[IntegerOp(self.term.map(map_))] + + def missing(self): + return self.term.missing() + + @simplified + def partial_eval(self): + term = self.lang[FirstOp(self.term)].partial_eval() + if is_op(term, CoalesceOp): + return self.lang[CoalesceOp([IntegerOp(t) for t in term.terms])] + if term.type == INTEGER: + return term + return self.lang[IntegerOp(term)] diff --git a/vendor/jx_base/expressions/is_boolean_op.py b/vendor/jx_base/expressions/is_boolean_op.py new file mode 100644 index 0000000..ad74dc5 --- /dev/null +++ b/vendor/jx_base/expressions/is_boolean_op.py @@ -0,0 +1,44 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from mo_json import BOOLEAN + + +class IsBooleanOp(Expression): + data_type = BOOLEAN + + def __init__(self, term): + Expression.__init__(self, [term]) + self.term = term + + def __data__(self): + return {"is_boolean": self.term.__data__()} + + def vars(self): + return self.term.vars() + + def map(self, map_): + return self.lang[IsBooleanOp(self.term.map(map_))] + + def missing(self): + return FALSE diff --git a/vendor/jx_base/expressions/is_integer_op.py b/vendor/jx_base/expressions/is_integer_op.py new file mode 100644 index 0000000..6fe17a9 --- /dev/null +++ b/vendor/jx_base/expressions/is_integer_op.py @@ -0,0 +1,44 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from mo_json import BOOLEAN + + +class IsIntegerOp(Expression): + data_type = BOOLEAN + + def __init__(self, term): + Expression.__init__(self, [term]) + self.term = term + + def __data__(self): + return {"is_integer": self.term.__data__()} + + def vars(self): + return self.term.vars() + + def map(self, map_): + return self.lang[IsIntegerOp(self.term.map(map_))] + + def missing(self): + return FALSE diff --git a/vendor/jx_base/expressions/is_number_op.py b/vendor/jx_base/expressions/is_number_op.py new file mode 100644 index 0000000..f185c85 --- /dev/null +++ b/vendor/jx_base/expressions/is_number_op.py @@ -0,0 +1,60 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.null_op import NULL +from jx_base.expressions.true_op import TRUE +from mo_json import BOOLEAN, INTEGER, NUMBER, OBJECT, NUMBER_TYPES + + +class IsNumberOp(Expression): + data_type = BOOLEAN + + def __init__(self, term): + Expression.__init__(self, [term]) + self.term = term + + def __data__(self): + return {"is_number": self.term.__data__()} + + def vars(self): + return self.term.vars() + + def map(self, map_): + return self.lang[IsNumberOp(self.term.map(map_))] + + def missing(self): + return FALSE + + @simplified + def partial_eval(self): + term = self.term.partial_eval() + + if term is NULL: + return FALSE + elif term.type in NUMBER_TYPES: + return TRUE + elif term.type == OBJECT: + return self + else: + return FALSE diff --git a/vendor/jx_base/expressions/is_string_op.py b/vendor/jx_base/expressions/is_string_op.py new file mode 100644 index 0000000..1cfe339 --- /dev/null +++ b/vendor/jx_base/expressions/is_string_op.py @@ -0,0 +1,44 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from mo_json import BOOLEAN + + +class IsStringOp(Expression): + data_type = BOOLEAN + + def __init__(self, term): + Expression.__init__(self, [term]) + self.term = term + + def __data__(self): + return {"is_string": self.term.__data__()} + + def vars(self): + return self.term.vars() + + def map(self, map_): + return self.lang[IsStringOp(self.term.map(map_))] + + def missing(self): + return FALSE diff --git a/vendor/jx_base/expressions/last_op.py b/vendor/jx_base/expressions/last_op.py new file mode 100644 index 0000000..eb18a3e --- /dev/null +++ b/vendor/jx_base/expressions/last_op.py @@ -0,0 +1,62 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.expression import Expression +from jx_base.expressions.literal import is_literal +from jx_base.expressions.null_op import NULL +from jx_base.language import is_op +from mo_dots import is_many +from mo_dots.lists import last +from mo_json import OBJECT + + +class LastOp(Expression): + def __init__(self, term): + Expression.__init__(self, [term]) + self.term = term + self.data_type = self.term.type + + def __data__(self): + return {"last": self.term.__data__()} + + def vars(self): + return self.term.vars() + + def map(self, map_): + return self.lang[LastOp(self.term.map(map_))] + + def missing(self): + return self.term.missing() + + @simplified + def partial_eval(self): + term = self.term.partial_eval() + if is_op(self.term, LastOp): + return term + elif term.type != OBJECT and not term.many: + return term + elif term is NULL: + return term + elif is_literal(term): + return last(term) + else: + return self.lang[LastOp(term)] diff --git a/vendor/jx_base/expressions/leaves_op.py b/vendor/jx_base/expressions/leaves_op.py new file mode 100644 index 0000000..b93a48c --- /dev/null +++ b/vendor/jx_base/expressions/leaves_op.py @@ -0,0 +1,48 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from mo_json import OBJECT + + +class LeavesOp(Expression): + date_type = OBJECT + + def __init__(self, term, prefix=None): + Expression.__init__(self, term) + self.term = term + self.prefix = prefix + + def __data__(self): + if self.prefix: + return {"leaves": self.term.__data__(), "prefix": self.prefix} + else: + return {"leaves": self.term.__data__()} + + def vars(self): + return self.term.vars() + + def map(self, map_): + return self.lang[LeavesOp(self.term.map(map_))] + + def missing(self): + return FALSE diff --git a/vendor/jx_base/expressions/left_op.py b/vendor/jx_base/expressions/left_op.py new file mode 100644 index 0000000..b4f3636 --- /dev/null +++ b/vendor/jx_base/expressions/left_op.py @@ -0,0 +1,81 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.basic_substring_op import BasicSubstringOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.length_op import LengthOp +from jx_base.expressions.literal import ZERO +from jx_base.expressions.literal import is_literal +from jx_base.expressions.max_op import MaxOp +from jx_base.expressions.min_op import MinOp +from jx_base.expressions.or_op import OrOp +from jx_base.expressions.variable import Variable +from jx_base.expressions.when_op import WhenOp +from jx_base.language import is_op +from mo_dots import is_data +from mo_json import STRING + + +class LeftOp(Expression): + has_simple_form = True + data_type = STRING + + def __init__(self, term): + Expression.__init__(self, term) + if is_data(term): + self.value, self.length = term.items()[0] + else: + self.value, self.length = term + + def __data__(self): + if is_op(self.value, Variable) and is_literal(self.length): + return {"left": {self.value.var: self.length.value}} + else: + return {"left": [self.value.__data__(), self.length.__data__()]} + + def vars(self): + return self.value.vars() | self.length.vars() + + def map(self, map_): + return self.lang[LeftOp([self.value.map(map_), self.length.map(map_)])] + + def missing(self): + return self.lang[ + OrOp([self.value.missing(), self.length.missing()]) + ].partial_eval() + + @simplified + def partial_eval(self): + value = self.lang[self.value].partial_eval() + length = self.lang[self.length].partial_eval() + max_length = LengthOp(value) + + return self.lang[ + WhenOp( + self.missing(), + **{ + "else": BasicSubstringOp( + [value, ZERO, MaxOp([ZERO, MinOp([length, max_length])])] + ) + } + ) + ].partial_eval() diff --git a/vendor/jx_base/expressions/length_op.py b/vendor/jx_base/expressions/length_op.py new file mode 100644 index 0000000..01da335 --- /dev/null +++ b/vendor/jx_base/expressions/length_op.py @@ -0,0 +1,64 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.expression import Expression +from jx_base.expressions.literal import Literal +from jx_base.expressions.literal import is_literal +from jx_base.expressions.null_op import NULL +from jx_base.language import is_op +from mo_future import is_text +from mo_json import INTEGER + + +class LengthOp(Expression): + data_type = INTEGER + + def __init__(self, term): + Expression.__init__(self, [term]) + self.term = term + + def __eq__(self, other): + if is_op(other, LengthOp): + return self.term == other.term + + def __data__(self): + return {"length": self.term.__data__()} + + def vars(self): + return self.term.vars() + + def map(self, map_): + return self.lang[LengthOp(self.term.map(map_))] + + def missing(self): + return self.term.missing() + + @simplified + def partial_eval(self): + term = self.lang[self.term].partial_eval() + if is_literal(term): + if is_text(term.value): + return self.lang[Literal(len(term.value))] + else: + return NULL + else: + return self.lang[LengthOp(term)] diff --git a/vendor/jx_base/expressions/literal.py b/vendor/jx_base/expressions/literal.py new file mode 100644 index 0000000..15790e8 --- /dev/null +++ b/vendor/jx_base/expressions/literal.py @@ -0,0 +1,142 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import _utils, expression +from jx_base.expressions._utils import simplified, value2json +from jx_base.expressions.expression import Expression +from mo_dots import Null, is_data +from mo_json import python_type_to_json_type + +DateOp, FALSE, TRUE, NULL = [None]*4 + +class Literal(Expression): + """ + A literal JSON document + """ + + def __new__(cls, term): + if term == None: + return NULL + if term is True: + return TRUE + if term is False: + return FALSE + if is_data(term) and term.get("date"): + # SPECIAL CASE + return cls.lang[DateOp(term.get("date"))] + return object.__new__(cls) + + def __init__(self, value): + Expression.__init__(self, None) + self.simplified = True + self._value = value + + @classmethod + def define(cls, expr): + return Literal(expr.get("literal")) + + def __nonzero__(self): + return True + + def __eq__(self, other): + if other == None: + if self._value == None: + return True + else: + return False + elif self._value == None: + return False + + if is_literal(other): + return (self._value == other._value) or (self.json == other.json) + + def __data__(self): + return {"literal": self.value} + + @property + def value(self): + return self._value + + @property + def json(self): + if self._value == "": + self._json = '""' + else: + self._json = value2json(self._value) + + return self._json + + def vars(self): + return set() + + def map(self, map_): + return self + + def missing(self): + if self._value in [None, Null]: + return TRUE + if self.value == "": + return TRUE + return FALSE + + def __call__(self, row=None, rownum=None, rows=None): + return self.value + + def __unicode__(self): + return self._json + + def __str__(self): + return str(self._json) + + @property + def type(self): + return python_type_to_json_type[self._value.__class__] + + @simplified + def partial_eval(self): + return self + + def str(self): + return str(self.value) + + +ZERO = Literal(0) +ONE = Literal(1) + + +literal_op_ids = tuple() + + +def register_literal(op): + global literal_op_ids + literal_op_ids = literal_op_ids+(op.get_id(),) + + +def is_literal(l): + try: + return l.get_id() in literal_op_ids + except Exception: + return False + + +_utils.Literal = Literal +expression.Literal = Literal +expression.is_literal=is_literal diff --git a/vendor/jx_base/expressions/lt_op.py b/vendor/jx_base/expressions/lt_op.py new file mode 100644 index 0000000..f64a13e --- /dev/null +++ b/vendor/jx_base/expressions/lt_op.py @@ -0,0 +1,26 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.base_inequality_op import BaseInequalityOp + + +class LtOp(BaseInequalityOp): + op = "lt" diff --git a/vendor/jx_base/expressions/lte_op.py b/vendor/jx_base/expressions/lte_op.py new file mode 100644 index 0000000..b532e97 --- /dev/null +++ b/vendor/jx_base/expressions/lte_op.py @@ -0,0 +1,26 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.base_inequality_op import BaseInequalityOp + + +class LteOp(BaseInequalityOp): + op = "lte" diff --git a/vendor/jx_base/expressions/max_op.py b/vendor/jx_base/expressions/max_op.py new file mode 100644 index 0000000..efae1dc --- /dev/null +++ b/vendor/jx_base/expressions/max_op.py @@ -0,0 +1,82 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.literal import Literal, is_literal +from jx_base.expressions.null_op import NULL +from mo_dots import is_many +from mo_json import NUMBER +from mo_math import MAX + + +class MaxOp(Expression): + data_type = NUMBER + + def __init__(self, terms): + Expression.__init__(self, terms) + if terms == None: + self.terms = [] + elif is_many(terms): + self.terms = [t for t in terms if t != None] + else: + self.terms = [terms] + + def __data__(self): + return {"max": [t.__data__() for t in self.terms]} + + def vars(self): + output = set() + for t in self.terms: + output |= t.vars() + return output + + def map(self, map_): + return self.lang[MaxOp([t.map(map_) for t in self.terms])] + + def missing(self): + return FALSE + + @simplified + def partial_eval(self): + maximum = None + terms = [] + for t in self.terms: + simple = t.partial_eval() + if simple is NULL: + pass + elif is_literal(simple): + maximum = MAX([maximum, simple.value]) + else: + terms.append(simple) + if len(terms) == 0: + if maximum == None: + return NULL + else: + return Literal(maximum) + else: + if maximum == None: + output = self.lang[MaxOp(terms)] + else: + output = self.lang[MaxOp([Literal(maximum)] + terms)] + + return output diff --git a/vendor/jx_base/expressions/min_op.py b/vendor/jx_base/expressions/min_op.py new file mode 100644 index 0000000..df4cc58 --- /dev/null +++ b/vendor/jx_base/expressions/min_op.py @@ -0,0 +1,85 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.literal import Literal +from jx_base.expressions.literal import is_literal +from jx_base.expressions.null_op import NULL +from jx_base.expressions.null_op import NullOp +from jx_base.language import is_op +from mo_dots import is_many +from mo_json import NUMBER +from mo_math import MIN + + +class MinOp(Expression): + data_type = NUMBER + + def __init__(self, terms): + Expression.__init__(self, terms) + if terms == None: + self.terms = [] + elif is_many(terms): + self.terms = terms + else: + self.terms = [terms] + + def __data__(self): + return {"min": [t.__data__() for t in self.terms]} + + def vars(self): + output = set() + for t in self.terms: + output |= t.vars() + return output + + def map(self, map_): + return self.lang[MinOp([t.map(map_) for t in self.terms])] + + def missing(self): + return FALSE + + @simplified + def partial_eval(self): + minimum = None + terms = [] + for t in self.terms: + simple = t.partial_eval() + if is_op(simple, NullOp): + pass + elif is_literal(simple): + minimum = MIN([minimum, simple.value]) + else: + terms.append(simple) + if len(terms) == 0: + if minimum == None: + return NULL + else: + return Literal(minimum) + else: + if minimum == None: + output = self.lang[MinOp(terms)] + else: + output = self.lang[MinOp([Literal(minimum)] + terms)] + + return output diff --git a/vendor/jx_base/expressions/missing_op.py b/vendor/jx_base/expressions/missing_op.py new file mode 100644 index 0000000..17b1016 --- /dev/null +++ b/vendor/jx_base/expressions/missing_op.py @@ -0,0 +1,68 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import expression +from jx_base.expressions._utils import simplified +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.true_op import TRUE +from jx_base.language import is_op +from mo_json import BOOLEAN + + +class MissingOp(Expression): + data_type = BOOLEAN + + def __init__(self, term): + Expression.__init__(self, term) + self.expr = term + + def __data__(self): + return {"missing": self.expr.__data__()} + + def __eq__(self, other): + if not is_op(other, MissingOp): + return False + else: + return self.expr == other.expr + + def vars(self): + return self.expr.vars() + + def map(self, map_): + return self.lang[MissingOp(self.expr.map(map_))] + + def missing(self): + return FALSE + + def exists(self): + return TRUE + + @simplified + def partial_eval(self): + output = self.lang[self.expr].partial_eval().missing() + if is_op(output, MissingOp): + return output + else: + return output.partial_eval() + + +expression.MissingOp = MissingOp diff --git a/vendor/jx_base/expressions/mod_op.py b/vendor/jx_base/expressions/mod_op.py new file mode 100644 index 0000000..9424c04 --- /dev/null +++ b/vendor/jx_base/expressions/mod_op.py @@ -0,0 +1,26 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.base_binary_op import BaseBinaryOp + + +class ModOp(BaseBinaryOp): + op = "mod" diff --git a/vendor/jx_base/expressions/mul_op.py b/vendor/jx_base/expressions/mul_op.py new file mode 100644 index 0000000..5ff1aee --- /dev/null +++ b/vendor/jx_base/expressions/mul_op.py @@ -0,0 +1,26 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.base_multi_op import BaseMultiOp + + +class MulOp(BaseMultiOp): + op = "mul" diff --git a/vendor/jx_base/expressions/ne_op.py b/vendor/jx_base/expressions/ne_op.py new file mode 100644 index 0000000..d4aa04a --- /dev/null +++ b/vendor/jx_base/expressions/ne_op.py @@ -0,0 +1,71 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import not_op +from jx_base.expressions._utils import simplified +from jx_base.expressions.eq_op import EqOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.literal import is_literal +from jx_base.expressions.not_op import NotOp +from jx_base.expressions.variable import Variable +from jx_base.language import is_op +from mo_dots import is_data, is_sequence +from mo_json import BOOLEAN +from mo_logs import Log + + +class NeOp(Expression): + has_simple_form = True + data_type = BOOLEAN + + def __init__(self, terms): + Expression.__init__(self, terms) + if is_sequence(terms): + self.lhs, self.rhs = terms + elif is_data(terms): + self.rhs, self.lhs = terms.items()[0] + else: + Log.error("logic error") + + def __data__(self): + if is_op(self.lhs, Variable) and is_literal(self.rhs): + return {"ne": {self.lhs.var, self.rhs.value}} + else: + return {"ne": [self.lhs.__data__(), self.rhs.__data__()]} + + def vars(self): + return self.lhs.vars() | self.rhs.vars() + + def map(self, map_): + return self.lang[NeOp([self.lhs.map(map_), self.rhs.map(map_)])] + + def missing(self): + return ( + FALSE + ) # USING THE decisive EQUAILTY https://github.com/mozilla/jx-sqlite/blob/master/docs/Logical%20Equality.md#definitions + + @simplified + def partial_eval(self): + output = self.lang[NotOp(EqOp([self.lhs, self.rhs]))].partial_eval() + return output + +not_op.NeOp = NeOp \ No newline at end of file diff --git a/vendor/jx_base/expressions/not_left_op.py b/vendor/jx_base/expressions/not_left_op.py new file mode 100644 index 0000000..4408eb3 --- /dev/null +++ b/vendor/jx_base/expressions/not_left_op.py @@ -0,0 +1,83 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.basic_substring_op import BasicSubstringOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.length_op import LengthOp +from jx_base.expressions.literal import ZERO +from jx_base.expressions.literal import is_literal +from jx_base.expressions.max_op import MaxOp +from jx_base.expressions.min_op import MinOp +from jx_base.expressions.or_op import OrOp +from jx_base.expressions.variable import Variable +from jx_base.expressions.when_op import WhenOp +from jx_base.language import is_op +from mo_dots import is_data +from mo_json import STRING + + +class NotLeftOp(Expression): + has_simple_form = True + data_type = STRING + + def __init__(self, term): + Expression.__init__(self, term) + if is_data(term): + self.value, self.length = term.items()[0] + else: + self.value, self.length = term + + def __data__(self): + if is_op(self.value, Variable) and is_literal(self.length): + return {"not_left": {self.value.var: self.length.value}} + else: + return {"not_left": [self.value.__data__(), self.length.__data__()]} + + def vars(self): + return self.value.vars() | self.length.vars() + + def map(self, map_): + return self.lang[NotLeftOp([self.value.map(map_), self.length.map(map_)])] + + def missing(self): + return self.lang[OrOp([self.value.missing(), self.length.missing()])] + + @simplified + def partial_eval(self): + value = self.lang[self.value].partial_eval() + length = self.length.partial_eval() + + if length is ZERO: + return value + + max_length = LengthOp(value) + output = self.lang[ + WhenOp( + self.missing(), + **{ + "else": BasicSubstringOp( + [value, MaxOp([ZERO, MinOp([length, max_length])]), max_length] + ) + } + ) + ].partial_eval() + return output diff --git a/vendor/jx_base/expressions/not_op.py b/vendor/jx_base/expressions/not_op.py new file mode 100644 index 0000000..9b0dc02 --- /dev/null +++ b/vendor/jx_base/expressions/not_op.py @@ -0,0 +1,126 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import and_op, exists_op, expression +from jx_base.expressions._utils import simplified +from jx_base.expressions.and_op import AndOp +from jx_base.expressions.basic_index_of_op import BasicIndexOfOp +from jx_base.expressions.basic_substring_op import BasicSubstringOp +from jx_base.expressions.eq_op import EqOp +from jx_base.expressions.exists_op import ExistsOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.literal import is_literal +from jx_base.expressions.missing_op import MissingOp +from jx_base.expressions.null_op import NULL +from jx_base.expressions.or_op import OrOp +from jx_base.expressions.true_op import TRUE +from jx_base.language import is_op +from mo_json import BOOLEAN +from mo_logs import Log + +CaseOp = None +NeOp = None +WhenOp = None + +class NotOp(Expression): + data_type = BOOLEAN + + def __init__(self, term): + Expression.__init__(self, term) + self.term = term + + def __data__(self): + return {"not": self.term.__data__()} + + def __eq__(self, other): + if not is_op(other, NotOp): + return False + return self.term == other.term + + def vars(self): + return self.term.vars() + + def map(self, map_): + return self.lang[NotOp(self.term.map(map_))] + + def missing(self): + return self.term.missing() + + @simplified + def partial_eval(self): + def inverse(term): + if term is TRUE: + return FALSE + elif term is FALSE: + return TRUE + elif term is NULL: + return TRUE + elif is_literal(term): + Log.error("`not` operator expects a Boolean term") + elif is_op(term, WhenOp): + output = self.lang[ + WhenOp( + term.when, + **{"then": inverse(term.then), "else": inverse(term.els_)} + ) + ].partial_eval() + elif is_op(term, CaseOp): # REWRITING + output = self.lang[ + CaseOp( + [ + WhenOp(w.when, **{"then": inverse(w.then)}) + if is_op(w, WhenOp) + else inverse(w) + for w in term.whens + ] + ) + ].partial_eval() + elif is_op(term, AndOp): + output = self.lang[ + OrOp([inverse(t) for t in term.terms]) + ].partial_eval() + elif is_op(term, OrOp): + output = self.lang[ + AndOp([inverse(t) for t in term.terms]) + ].partial_eval() + elif is_op(term, MissingOp): + output = self.lang[NotOp(term.expr.missing())] + elif is_op(term, ExistsOp): + output = term.field.missing().partial_eval() + elif is_op(term, NotOp): + output = self.lang[term.term].partial_eval() + elif is_op(term, NeOp): + output = self.lang[EqOp([term.lhs, term.rhs])].partial_eval() + elif is_op(term, BasicIndexOfOp) or is_op(term, BasicSubstringOp): + return FALSE + else: + output = self.lang[NotOp(term)] + + return output + + output = inverse(self.lang[self.term].partial_eval()) + return output + + +and_op.NotOp = NotOp +exists_op.NotOp = NotOp +expression.NotOp =NotOp diff --git a/vendor/jx_base/expressions/not_right_op.py b/vendor/jx_base/expressions/not_right_op.py new file mode 100644 index 0000000..8e83d26 --- /dev/null +++ b/vendor/jx_base/expressions/not_right_op.py @@ -0,0 +1,81 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.basic_substring_op import BasicSubstringOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.length_op import LengthOp +from jx_base.expressions.literal import ZERO +from jx_base.expressions.literal import is_literal +from jx_base.expressions.max_op import MaxOp +from jx_base.expressions.min_op import MinOp +from jx_base.expressions.or_op import OrOp +from jx_base.expressions.sub_op import SubOp +from jx_base.expressions.variable import Variable +from jx_base.expressions.when_op import WhenOp +from jx_base.language import is_op +from mo_dots import is_data +from mo_json import STRING + + +class NotRightOp(Expression): + has_simple_form = True + data_type = STRING + + def __init__(self, term): + Expression.__init__(self, term) + if is_data(term): + self.value, self.length = term.items()[0] + else: + self.value, self.length = term + + def __data__(self): + if is_op(self.value, Variable) and is_literal(self.length): + return {"not_right": {self.value.var: self.length.value}} + else: + return {"not_right": [self.value.__data__(), self.length.__data__()]} + + def vars(self): + return self.value.vars() | self.length.vars() + + def map(self, map_): + return self.lang[NotRightOp([self.value.map(map_), self.length.map(map_)])] + + def missing(self): + return self.lang[OrOp([self.value.missing(), self.length.missing()])] + + @simplified + def partial_eval(self): + value = self.lang[self.value].partial_eval() + length = self.length.partial_eval() + + if length is ZERO: + return value + + max_length = LengthOp(value) + part = BasicSubstringOp( + [ + value, + ZERO, + MaxOp([ZERO, MinOp([max_length, SubOp([max_length, length])])]), + ] + ) + return self.lang[WhenOp(self.missing(), **{"else": part})].partial_eval() diff --git a/vendor/jx_base/expressions/null_op.py b/vendor/jx_base/expressions/null_op.py new file mode 100644 index 0000000..ceaa78f --- /dev/null +++ b/vendor/jx_base/expressions/null_op.py @@ -0,0 +1,113 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import literal, _utils, expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.literal import Literal +from jx_base.expressions.true_op import TRUE +from jx_base.language import TYPE_ORDER +from mo_dots import Null +from mo_json import IS_NULL, OBJECT +from mo_logs import Log + + +class NullOp(Literal): + """ + FOR USE WHEN EVERYTHING IS EXPECTED TO BE AN Expression + USE IT TO EXPECT A NULL VALUE IN assertAlmostEqual + """ + + data_type = OBJECT + + @classmethod + def define(cls, expr): + return NULL + + def __new__(cls, *args, **kwargs): + return object.__new__(cls, *args, **kwargs) + + def __init__(self, op=None, term=None): + Literal.__init__(self, None) + + def __nonzero__(self): + return True + + def __eq__(self, other): + return other is NULL + + def __gt__(self, other): + return False + + def __lt__(self, other): + return False + + def __ge__(self, other): + if other == None: + return True + return False + + def __le__(self, other): + if other == None: + return True + return False + + def __data__(self): + return {"null": {}} + + def vars(self): + return set() + + def map(self, map_): + return self + + def missing(self): + return TRUE + + def exists(self): + return FALSE + + def __call__(self, row=None, rownum=None, rows=None): + return Null + + def __unicode__(self): + return "null" + + def __str__(self): + return b"null" + + @property + def type(self): + return IS_NULL + + def __hash__(self): + return id(None) + + def __bool__(self): + Log.error("Detecting truthiness of NullOp is too confusing to be allowed") + + +NULL = NullOp() +TYPE_ORDER[NullOp] = 9 +TYPE_ORDER[NULL] = 9 + +literal.NULL = NULL +_utils.NULL = NULL +expression.NULL=NULL diff --git a/vendor/jx_base/expressions/number_op.py b/vendor/jx_base/expressions/number_op.py new file mode 100644 index 0000000..b1291db --- /dev/null +++ b/vendor/jx_base/expressions/number_op.py @@ -0,0 +1,97 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.case_op import CaseOp +from jx_base.expressions.coalesce_op import CoalesceOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.first_op import FirstOp +from jx_base.expressions.literal import Literal, ZERO, ONE +from jx_base.expressions.literal import is_literal +from jx_base.expressions.null_op import NULL +from jx_base.expressions.true_op import TRUE +from jx_base.expressions.when_op import WhenOp +from jx_base.language import is_op +from mo_future import text +from mo_json import NUMBER +from mo_logs import Log +from mo_times import Date + + +class NumberOp(Expression): + data_type = NUMBER + + def __init__(self, term): + Expression.__init__(self, [term]) + self.term = term + + def __data__(self): + return {"number": self.term.__data__()} + + def vars(self): + return self.term.vars() + + def map(self, map_): + return self.lang[NumberOp(self.term.map(map_))] + + def missing(self): + return self.term.missing() + + @simplified + def partial_eval(self): + term = self.lang[FirstOp(self.term)].partial_eval() + + if is_literal(term): + if term is NULL: + return NULL + elif term is FALSE: + return ZERO + elif term is TRUE: + return ONE + + v = term.value + if isinstance(v, (text, Date)): + return self.lang[Literal(float(v))] + elif isinstance(v, (int, float)): + return term + else: + Log.error("can not convert {{value|json}} to number", value=term.value) + elif is_op(term, CaseOp): # REWRITING + return self.lang[ + CaseOp( + [ + WhenOp(t.when, **{"then": NumberOp(t.then)}) + for t in term.whens[:-1] + ] + + [NumberOp(term.whens[-1])] + ) + ].partial_eval() + elif is_op(term, WhenOp): # REWRITING + return self.lang[ + WhenOp( + term.when, + **{"then": NumberOp(term.then), "else": NumberOp(term.els_)} + ) + ].partial_eval() + elif is_op(term, CoalesceOp): + return self.lang[CoalesceOp([NumberOp(t) for t in term.terms])] + return self.lang[NumberOp(term)] diff --git a/vendor/jx_base/expressions/offset_op.py b/vendor/jx_base/expressions/offset_op.py new file mode 100644 index 0000000..9294afc --- /dev/null +++ b/vendor/jx_base/expressions/offset_op.py @@ -0,0 +1,61 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression +from mo_future import text +from mo_logs import Log +from mo_math import is_integer + + +class OffsetOp(Expression): + """ + OFFSET INDEX INTO A TUPLE + """ + + def __init__(self, var): + Expression.__init__(self, None) + if not is_integer(var): + Log.error("Expecting an integer") + self.var = var + + def __call__(self, row, rownum=None, rows=None): + try: + return row[self.var] + except Exception: + return None + + def __data__(self): + return {"offset": self.var} + + def vars(self): + return {} + + def __hash__(self): + return self.var.__hash__() + + def __eq__(self, other): + return self.var == other + + def __unicode__(self): + return text(self.var) + + def __str__(self): + return str(self.var) diff --git a/vendor/jx_base/expressions/or_op.py b/vendor/jx_base/expressions/or_op.py new file mode 100644 index 0000000..ec26001 --- /dev/null +++ b/vendor/jx_base/expressions/or_op.py @@ -0,0 +1,99 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import and_op +from jx_base.expressions._utils import simplified +from jx_base.expressions.and_op import AndOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.true_op import TRUE +from jx_base.language import is_op +from mo_json import BOOLEAN + + +class OrOp(Expression): + data_type = BOOLEAN + + def __init__(self, terms): + Expression.__init__(self, terms) + self.terms = terms + + def __data__(self): + return {"or": [t.__data__() for t in self.terms]} + + def vars(self): + output = set() + for t in self.terms: + output |= t.vars() + return output + + def map(self, map_): + return self.lang[OrOp([t.map(map_) for t in self.terms])] + + def missing(self): + return FALSE + + def __call__(self, row=None, rownum=None, rows=None): + return any(t(row, rownum, rows) for t in self.terms) + + def __eq__(self, other): + if not is_op(other, OrOp): + return False + if len(self.terms) != len(other.terms): + return False + return all(t == u for t, u in zip(self.terms, other.terms)) + + @simplified + def partial_eval(self): + terms = [] + ands = [] + for t in self.terms: + simple = self.lang[t].partial_eval() + if simple.type != BOOLEAN: + simple = simple.exists() + + if simple is TRUE: + return TRUE + elif simple is FALSE: + pass + elif is_op(simple, OrOp): + terms.extend([tt for tt in simple.terms if tt not in terms]) + elif is_op(simple, AndOp): + ands.append(simple) + elif simple not in terms: + terms.append(simple) + + if ands: # REMOVE TERMS THAT ARE MORE RESTRICTIVE THAN OTHERS + for a in ands: + for tt in a.terms: + if tt in terms: + break + else: + terms.append(a) + + if len(terms) == 0: + return FALSE + if len(terms) == 1: + return terms[0] + return self.lang[OrOp(terms)] + + +and_op.OrOp = OrOp diff --git a/vendor/jx_base/expressions/prefix_op.py b/vendor/jx_base/expressions/prefix_op.py new file mode 100644 index 0000000..9c33d8b --- /dev/null +++ b/vendor/jx_base/expressions/prefix_op.py @@ -0,0 +1,88 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.basic_starts_with_op import BasicStartsWithOp +from jx_base.expressions.case_op import CaseOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.literal import is_literal +from jx_base.expressions.null_op import NULL +from jx_base.expressions.true_op import TRUE +from jx_base.expressions.variable import Variable +from jx_base.expressions.when_op import WhenOp +from jx_base.language import is_op +from mo_dots import is_data +from mo_json import BOOLEAN + + +class PrefixOp(Expression): + has_simple_form = True + data_type = BOOLEAN + + def __init__(self, term): + Expression.__init__(self, term) + if not term: + self.expr = NULL + self.prefix = NULL + elif is_data(term): + self.expr, self.prefix = term.items()[0] + else: + self.expr, self.prefix = term + + def __data__(self): + if not self.expr: + return {"prefix": {}} + elif is_op(self.expr, Variable) and is_literal(self.prefix): + return {"prefix": {self.expr.var: self.prefix.value}} + else: + return {"prefix": [self.expr.__data__(), self.prefix.__data__()]} + + def vars(self): + if self.expr is NULL: + return set() + return self.expr.vars() | self.prefix.vars() + + def map(self, map_): + if not self.expr: + return self + else: + return self.lang[PrefixOp([self.expr.map(map_), self.prefix.map(map_)])] + + def missing(self): + return FALSE + + @simplified + def partial_eval(self): + return self.lang[ + CaseOp( + [ + WhenOp(self.prefix.missing(), then=TRUE), + WhenOp(self.expr.missing(), then=FALSE), + BasicStartsWithOp([self.expr, self.prefix]), + ] + ) + ].partial_eval() + + def __eq__(self, other): + if not is_op(other, PrefixOp): + return False + return self.expr == other.expr and self.prefix == other.prefix diff --git a/vendor/jx_base/expressions/python_script.py b/vendor/jx_base/expressions/python_script.py new file mode 100644 index 0000000..0b76608 --- /dev/null +++ b/vendor/jx_base/expressions/python_script.py @@ -0,0 +1,30 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression + + +class PythonScript(Expression): + """ + REPRESENT A Python SCRIPT + """ + + pass diff --git a/vendor/jx_base/expressions/query_op.py b/vendor/jx_base/expressions/query_op.py new file mode 100644 index 0000000..db4da02 --- /dev/null +++ b/vendor/jx_base/expressions/query_op.py @@ -0,0 +1,26 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression + + +class QueryOp(Expression): + pass diff --git a/vendor/jx_base/expressions/range_op.py b/vendor/jx_base/expressions/range_op.py new file mode 100644 index 0000000..f5c6989 --- /dev/null +++ b/vendor/jx_base/expressions/range_op.py @@ -0,0 +1,47 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import operators +from jx_base.expressions.and_op import AndOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.literal import Literal +from mo_json import BOOLEAN +from mo_logs import Log + + +class RangeOp(Expression): + has_simple_form = True + data_type = BOOLEAN + + def __new__(cls, term, *args): + Expression.__new__(cls, *args) + field, comparisons = term # comparisons IS A Literal() + return cls.lang[ + AndOp( + [ + getattr(cls.lang, operators[op])([field, Literal(value)]) + for op, value in comparisons.value.items() + ] + ) + ] + + def __init__(self, term): + Log.error("Should never happen!") diff --git a/vendor/jx_base/expressions/reg_exp_op.py b/vendor/jx_base/expressions/reg_exp_op.py new file mode 100644 index 0000000..77234ec --- /dev/null +++ b/vendor/jx_base/expressions/reg_exp_op.py @@ -0,0 +1,49 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.true_op import TRUE +from mo_json import BOOLEAN + + +class RegExpOp(Expression): + has_simple_form = True + data_type = BOOLEAN + + def __init__(self, terms): + Expression.__init__(self, terms) + self.var, self.pattern = terms + + def __data__(self): + return {"regexp": {self.var.var: self.pattern}} + + def vars(self): + return {self.var} + + def map(self, map_): + return self.lang[RegExpOp([self.var.map(map_), self.pattern])] + + def missing(self): + return FALSE + + def exists(self): + return TRUE diff --git a/vendor/jx_base/expressions/right_op.py b/vendor/jx_base/expressions/right_op.py new file mode 100644 index 0000000..bac6b6c --- /dev/null +++ b/vendor/jx_base/expressions/right_op.py @@ -0,0 +1,89 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified +from jx_base.expressions.basic_substring_op import BasicSubstringOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.length_op import LengthOp +from jx_base.expressions.literal import ZERO +from jx_base.expressions.literal import is_literal +from jx_base.expressions.max_op import MaxOp +from jx_base.expressions.min_op import MinOp +from jx_base.expressions.or_op import OrOp +from jx_base.expressions.sub_op import SubOp +from jx_base.expressions.variable import Variable +from jx_base.expressions.when_op import WhenOp +from jx_base.language import is_op +from mo_dots import is_data +from mo_json import STRING + + +class RightOp(Expression): + has_simple_form = True + data_type = STRING + + def __init__(self, term): + Expression.__init__(self, term) + if is_data(term): + self.value, self.length = term.items()[0] + else: + self.value, self.length = term + + if is_literal(self.value): + Log.note("") + + def __data__(self): + if is_op(self.value, Variable) and is_literal(self.length): + return {"right": {self.value.var: self.length.value}} + else: + return {"right": [self.value.__data__(), self.length.__data__()]} + + def vars(self): + return self.value.vars() | self.length.vars() + + def map(self, map_): + return self.lang[RightOp([self.value.map(map_), self.length.map(map_)])] + + def missing(self): + return self.lang[OrOp([self.value.missing(), self.length.missing()])] + + @simplified + def partial_eval(self): + value = self.lang[self.value].partial_eval() + length = self.lang[self.length].partial_eval() + max_length = LengthOp(value) + + return self.lang[ + WhenOp( + self.missing(), + **{ + "else": BasicSubstringOp( + [ + value, + MaxOp( + [ZERO, MinOp([max_length, SubOp([max_length, length])])] + ), + max_length, + ] + ) + } + ) + ].partial_eval() diff --git a/vendor/jx_base/expressions/rows_op.py b/vendor/jx_base/expressions/rows_op.py new file mode 100644 index 0000000..28f5eed --- /dev/null +++ b/vendor/jx_base/expressions/rows_op.py @@ -0,0 +1,56 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression +from jx_base.expressions.literal import Literal +from jx_base.expressions.literal import is_literal +from jx_base.expressions.variable import Variable +from jx_base.language import is_op +from mo_logs import Log + + +class RowsOp(Expression): + has_simple_form = True + + def __init__(self, term): + Expression.__init__(self, term) + self.var, self.offset = term + if is_op(self.var, Variable): + if is_op(self.var, Variable) and not any( + self.var.var.startswith(p) for p in ["row.", "rows.", "rownum"] + ): # VARIABLES ARE INTERPRETED LITERALLY + self.var = Literal(self.var.var) + else: + Log.error("can not handle") + else: + Log.error("can not handle") + + def __data__(self): + if is_literal(self.var) and is_literal(self.offset): + return {"rows": {self.var.json, self.offset.value}} + else: + return {"rows": [self.var.__data__(), self.offset.__data__()]} + + def vars(self): + return self.var.vars() | self.offset.vars() | {"rows", "rownum"} + + def map(self, map_): + return self.lang[RowsOp([self.var.map(map_), self.offset.map(map_)])] diff --git a/vendor/jx_base/expressions/script_op.py b/vendor/jx_base/expressions/script_op.py new file mode 100644 index 0000000..fc9977d --- /dev/null +++ b/vendor/jx_base/expressions/script_op.py @@ -0,0 +1,62 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression +from mo_future import is_text +from mo_json import OBJECT +from mo_logs import Log + + +class ScriptOp(Expression): + """ + ONLY FOR WHEN YOU TRUST THE SCRIPT SOURCE + """ + + def __init__(self, script, data_type=OBJECT): + Expression.__init__(self, None) + if not is_text(script): + Log.error("expecting text of a script") + self.simplified = True + self.script = script + self.data_type = data_type + + @classmethod + def define(cls, expr): + if ALLOW_SCRIPTING: + Log.warning( + "Scripting has been activated: This has known security holes!!\nscript = {{script|quote}}", + script=expr.script.term, + ) + return cls.lang[ScriptOp(expr.script)] + else: + Log.error("scripting is disabled") + + def vars(self): + return set() + + def map(self, map_): + return self + + def __unicode__(self): + return self.script + + def __str__(self): + return str(self.script) diff --git a/vendor/jx_base/expressions/select_op.py b/vendor/jx_base/expressions/select_op.py new file mode 100644 index 0000000..8d6aa9b --- /dev/null +++ b/vendor/jx_base/expressions/select_op.py @@ -0,0 +1,91 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import jx_expression, Expression, _jx_expression +from jx_base.utils import is_variable_name +from mo_dots import wrap, is_container +from mo_future import is_text +from mo_logs import Log +from mo_math import UNION + + +class SelectOp(Expression): + has_simple_form = True + + def __init__(self, terms): + """ + :param terms: list OF {"name":name, "value":value} DESCRIPTORS + """ + self.terms = terms + + @classmethod + def define(cls, expr): + expr = wrap(expr) + term = expr.select + terms = [] + if not is_container(term): + raise Log.error("Expecting a list") + for t in term: + if is_text(t): + if not is_variable_name(t): + Log.error( + "expecting {{value}} a simple dot-delimited path name", value=t + ) + terms.append({"name": t, "value": _jx_expression(t, cls.lang)}) + elif t.name == None: + if t.value == None: + Log.error( + "expecting select parameters to have name and value properties" + ) + elif is_text(t.value): + if not is_variable_name(t): + Log.error( + "expecting {{value}} a simple dot-delimited path name", + value=t.value, + ) + else: + terms.append( + { + "name": t.value, + "value": _jx_expression(t.value, cls.lang), + } + ) + else: + Log.error("expecting a name property") + else: + terms.append({"name": t.name, "value": jx_expression(t.value)}) + return cls.lang[SelectOp(terms)] + + def __data__(self): + return { + "select": [ + {"name": t.name, "value": t.value.__data__()} + for t in wrap(self.terms) + ] + } + + def vars(self): + return UNION(t.value for t in self.terms) + + def map(self, map_): + return SelectOp( + [{"name": t.name, "value": t.value.map(map_)} for t in self.terms] + ) diff --git a/vendor/jx_base/expressions/split_op.py b/vendor/jx_base/expressions/split_op.py new file mode 100644 index 0000000..fc6d600 --- /dev/null +++ b/vendor/jx_base/expressions/split_op.py @@ -0,0 +1,84 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.and_op import AndOp +from jx_base.expressions.eq_op import EqOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.find_op import FindOp +from jx_base.expressions.literal import Literal +from jx_base.expressions.literal import is_literal +from jx_base.expressions.or_op import OrOp +from jx_base.expressions.script_op import ScriptOp +from jx_base.expressions.true_op import TRUE +from jx_base.expressions.variable import Variable +from jx_base.language import is_op + + +class SplitOp(Expression): + has_simple_form = True + + def __init__(self, term, **kwargs): + Expression.__init__(self, term) + self.value, self.find = term + + def __data__(self): + if is_op(self.value, Variable) and is_literal(self.find): + return {"split": {self.value.var, self.find.value}} + else: + return {"split": [self.value.__data__(), self.find.__data__()]} + + def vars(self): + return ( + self.value.vars() + | self.find.vars() + | self.default.vars() + | self.start.vars() + ) + + def map(self, map_): + return FindOp( + [self.value.map(map_), self.find.map(map_)], + start=self.start.map(map_), + default=self.default.map(map_), + ) + + def missing(self): + v = self.value.to_es_script(not_null=True) + find = self.find.to_es_script(not_null=True) + index = v + ".indexOf(" + find + ", " + self.start.to_es_script() + ")" + + return self.lang[ + AndOp( + [ + self.default.missing(), + OrOp( + [ + self.value.missing(), + self.find.missing(), + EqOp([ScriptOp(index), Literal(-1)]), + ] + ), + ] + ) + ] + + def exists(self): + return TRUE diff --git a/vendor/jx_base/expressions/sql_eq_op.py b/vendor/jx_base/expressions/sql_eq_op.py new file mode 100644 index 0000000..a51d9f3 --- /dev/null +++ b/vendor/jx_base/expressions/sql_eq_op.py @@ -0,0 +1,45 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.eq_op import EqOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.language import is_op +from mo_json import BOOLEAN + + +class SqlEqOp(Expression): + data_type = BOOLEAN + + def __init__(self, terms): + Expression.__init__(self, terms) + self.lhs, self.rhs = terms + + def __data__(self): + return {"sql.eq": [self.lhs.__data__(), self.rhs.__data__()]} + + def missing(self): + return FALSE + + def __eq__(self, other): + if not is_op(other, EqOp): + return False + return self.lhs == other.lhs and self.rhs == other.rhs diff --git a/vendor/jx_base/expressions/sql_instr_op.py b/vendor/jx_base/expressions/sql_instr_op.py new file mode 100644 index 0000000..83a98c6 --- /dev/null +++ b/vendor/jx_base/expressions/sql_instr_op.py @@ -0,0 +1,41 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from mo_json import INTEGER + + +class SqlInstrOp(Expression): + data_type = INTEGER + + def __init__(self, params): + Expression.__init__(self, params) + self.value, self.find = params + + def __data__(self): + return {"sql.instr": [self.value.__data__(), self.find.__data__()]} + + def vars(self): + return self.value.vars() | self.find.vars() + + def missing(self): + return FALSE diff --git a/vendor/jx_base/expressions/sql_script.py b/vendor/jx_base/expressions/sql_script.py new file mode 100644 index 0000000..7ef5f6d --- /dev/null +++ b/vendor/jx_base/expressions/sql_script.py @@ -0,0 +1,30 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression + + +class SQLScript(Expression): + """ + REPRESENT A SQL SCRIPT + """ + + pass diff --git a/vendor/jx_base/expressions/sql_substr_op.py b/vendor/jx_base/expressions/sql_substr_op.py new file mode 100644 index 0000000..a9051d2 --- /dev/null +++ b/vendor/jx_base/expressions/sql_substr_op.py @@ -0,0 +1,47 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from mo_json import INTEGER + + +class SqlSubstrOp(Expression): + data_type = INTEGER + + def __init__(self, params): + Expression.__init__(self, params) + self.value, self.start, self.length = params + + def __data__(self): + return { + "sql.substr": [ + self.value.__data__(), + self.start.__data__(), + self.length.__data__(), + ] + } + + def vars(self): + return self.value.vars() | self.start.vars() | self.length.vars() + + def missing(self): + return FALSE diff --git a/vendor/jx_base/expressions/string_op.py b/vendor/jx_base/expressions/string_op.py new file mode 100644 index 0000000..f778665 --- /dev/null +++ b/vendor/jx_base/expressions/string_op.py @@ -0,0 +1,75 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +import mo_json +from jx_base.expressions._utils import simplified +from jx_base.expressions.coalesce_op import CoalesceOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.first_op import FirstOp +from jx_base.expressions.literal import Literal +from jx_base.expressions.literal import is_literal +from jx_base.expressions.null_op import NULL +from jx_base.language import is_op +from mo_json import STRING, IS_NULL + + +class StringOp(Expression): + data_type = STRING + + def __init__(self, term): + Expression.__init__(self, [term]) + self.term = term + + def __data__(self): + return {"string": self.term.__data__()} + + def vars(self): + return self.term.vars() + + def map(self, map_): + return self.lang[StringOp(self.term.map(map_))] + + def missing(self): + return self.term.missing() + + @simplified + def partial_eval(self): + term = self.term + if term.type is IS_NULL: + return NULL + term = self.lang[FirstOp(term)].partial_eval() + if is_op(term, StringOp): + return term.term.partial_eval() + elif is_op(term, CoalesceOp): + return self.lang[ + CoalesceOp([self.lang[StringOp(t)].partial_eval() for t in term.terms]) + ] + elif is_literal(term): + if term.type == STRING: + return term + else: + return self.lang[Literal(mo_json.value2json(term.value))] + return self + + def __eq__(self, other): + if not is_op(other, StringOp): + return False + return self.term == other.term diff --git a/vendor/jx_base/expressions/sub_op.py b/vendor/jx_base/expressions/sub_op.py new file mode 100644 index 0000000..5c2ee3f --- /dev/null +++ b/vendor/jx_base/expressions/sub_op.py @@ -0,0 +1,26 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.base_binary_op import BaseBinaryOp + + +class SubOp(BaseBinaryOp): + op = "sub" diff --git a/vendor/jx_base/expressions/suffix_op.py b/vendor/jx_base/expressions/suffix_op.py new file mode 100644 index 0000000..121620b --- /dev/null +++ b/vendor/jx_base/expressions/suffix_op.py @@ -0,0 +1,94 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +import re + +from jx_base.expressions._utils import simplified +from jx_base.expressions.and_op import AndOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.literal import Literal, is_literal +from jx_base.expressions.reg_exp_op import RegExpOp +from jx_base.expressions.true_op import TRUE +from jx_base.expressions.variable import Variable +from jx_base.expressions.when_op import WhenOp +from jx_base.language import is_op +from mo_dots import is_data +from mo_json import BOOLEAN, STRING +from mo_logs import Log + + +class SuffixOp(Expression): + has_simple_form = True + data_type = BOOLEAN + + def __init__(self, term): + Expression.__init__(self, term) + if not term: + self.expr = self.suffix = None + elif is_data(term): + self.expr, self.suffix = term.items()[0] + else: + self.expr, self.suffix = term + + def __data__(self): + if self.expr is None: + return {"suffix": {}} + elif is_op(self.expr, Variable) and is_literal(self.suffix): + return {"suffix": {self.expr.var: self.suffix.value}} + else: + return {"suffix": [self.expr.__data__(), self.suffix.__data__()]} + + def missing(self): + """ + THERE IS PLENTY OF OPPORTUNITY TO SIMPLIFY missing EXPRESSIONS + OVERRIDE THIS METHOD TO SIMPLIFY + :return: + """ + return FALSE + + def vars(self): + if self.expr is None: + return set() + return self.expr.vars() | self.suffix.vars() + + def map(self, map_): + if self.expr is None: + return TRUE + else: + return self.lang[SuffixOp([self.expr.map(map_), self.suffix.map(map_)])] + + @simplified + def partial_eval(self): + if self.expr is None: + return TRUE + if not is_literal(self.suffix) and self.suffix.type == STRING: + Log.error("can only hanlde literal suffix ") + + return WhenOp( + self.lang[AndOp([self.expr.exists(), self.suffix.exists()])], + **{ + "then": self.lang[ + RegExpOp([self.expr, Literal(".*" + re.escape(self.suffix.value))]) + ], + "else": FALSE, + } + ).partial_eval() diff --git a/vendor/jx_base/expressions/true_op.py b/vendor/jx_base/expressions/true_op.py new file mode 100644 index 0000000..65d50b0 --- /dev/null +++ b/vendor/jx_base/expressions/true_op.py @@ -0,0 +1,82 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import literal, false_op, _utils +from jx_base.expressions.literal import Literal +from jx_base.expressions.false_op import FALSE +from mo_json import BOOLEAN + + +class TrueOp(Literal): + data_type = BOOLEAN + + def __new__(cls, *args, **kwargs): + return object.__new__(cls, *args, **kwargs) + + def __init__(self, op=None, term=None): + Literal.__init__(self, True) + + @classmethod + def define(cls, expr): + return TRUE + + def __nonzero__(self): + return True + + def __eq__(self, other): + return (other is TRUE) or (other is True) + + def __data__(self): + return True + + def vars(self): + return set() + + def map(self, map_): + return self + + def missing(self): + return FALSE + + def is_true(self): + return TRUE + + def is_false(self): + return FALSE + + def __call__(self, row=None, rownum=None, rows=None): + return True + + def __unicode__(self): + return "true" + + def __str__(self): + return b"true" + + def __bool__(self): + return True + + +TRUE = TrueOp() + +literal.TRUE = TRUE +false_op.TRUE = TRUE +_utils.TRUE = TRUE diff --git a/vendor/jx_base/expressions/tuple_op.py b/vendor/jx_base/expressions/tuple_op.py new file mode 100644 index 0000000..b9ab065 --- /dev/null +++ b/vendor/jx_base/expressions/tuple_op.py @@ -0,0 +1,70 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import _utils +from jx_base.expressions._utils import simplified +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.literal import Literal +from jx_base.expressions.literal import is_literal +from mo_dots import is_many +from mo_json import OBJECT + + +class TupleOp(Expression): + date_type = OBJECT + + def __init__(self, terms): + Expression.__init__(self, terms) + if terms == None: + self.terms = [] + elif is_many(terms): + self.terms = terms + else: + self.terms = [terms] + + def __iter__(self): + return self.terms.__iter__() + + def __data__(self): + return {"tuple": [t.__data__() for t in self.terms]} + + def vars(self): + output = set() + for t in self.terms: + output |= t.vars() + return output + + def map(self, map_): + return self.lang[TupleOp([t.map(map_) for t in self.terms])] + + def missing(self): + return FALSE + + @simplified + def partial_eval(self): + if all(is_literal(t) for t in self.terms): + return self.lang[Literal([t.value for t in self.terms])] + + return self + + +_utils.TupleOp=TupleOp diff --git a/vendor/jx_base/expressions/union_op.py b/vendor/jx_base/expressions/union_op.py new file mode 100644 index 0000000..2f3369e --- /dev/null +++ b/vendor/jx_base/expressions/union_op.py @@ -0,0 +1,84 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions._utils import simplified, merge_types +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.literal import Literal +from jx_base.expressions.null_op import NULL +from jx_base.language import is_op +from mo_dots import is_many +from mo_math import MIN + + +class UnionOp(Expression): + def __init__(self, terms): + Expression.__init__(self, terms) + if terms == None: + self.terms = [] + elif is_many(terms): + self.terms = terms + else: + self.terms = [terms] + + def __data__(self): + return {"union": [t.__data__() for t in self.terms]} + + @property + def type(self): + return merge_types(t.type for t in self.terms) + + def vars(self): + output = set() + for t in self.terms: + output |= t.vars() + return output + + def map(self, map_): + return self.lang[UnionOp([t.map(map_) for t in self.terms])] + + def missing(self): + return FALSE + + @simplified + def partial_eval(self): + minimum = None + terms = [] + for t in self.terms: + simple = t.partial_eval() + if simple is NULL: + pass + elif is_op(simple, Literal): + minimum = MIN([minimum, simple.value]) + else: + terms.append(simple) + if len(terms) == 0: + if minimum == None: + return NULL + else: + return Literal(minimum) + else: + if minimum == None: + output = self.lang[UnionOp(terms)] + else: + output = self.lang[UnionOp([Literal(minimum)] + terms)] + + return output diff --git a/vendor/jx_base/expressions/unix_op.py b/vendor/jx_base/expressions/unix_op.py new file mode 100644 index 0000000..ee24994 --- /dev/null +++ b/vendor/jx_base/expressions/unix_op.py @@ -0,0 +1,45 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.expression import Expression +from mo_json import NUMBER + + +class UnixOp(Expression): + """ + FOR USING ON DATABASES WHICH HAVE A DATE COLUMNS: CONVERT TO UNIX + """ + + has_simple_form = True + data_type = NUMBER + + def __init__(self, term): + Expression.__init__(self, term) + self.value = term + + def vars(self): + return self.value.vars() + + def map(self, map_): + return self.lang[UnixOp(self.value.map(map_))] + + def missing(self): + return self.value.missing() diff --git a/vendor/jx_base/expressions/variable.py b/vendor/jx_base/expressions/variable.py new file mode 100644 index 0000000..0fb2917 --- /dev/null +++ b/vendor/jx_base/expressions/variable.py @@ -0,0 +1,98 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import _utils, expression +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.missing_op import MissingOp +from jx_base.language import is_op +from jx_base.utils import get_property_name +from mo_dots import coalesce, is_sequence, split_field +from mo_dots.lists import last +from mo_future import is_text +from mo_json.typed_encoder import inserter_type_to_json_type + + +class Variable(Expression): + def __init__(self, var): + """ + :param var: DOT DELIMITED PATH INTO A DOCUMENT + + """ + Expression.__init__(self, None) + + # if self.lang != self.__class_.lang: + # pass + self.var = get_property_name(var) + jx_type = inserter_type_to_json_type.get(last(split_field(var))) + if jx_type: + self.data_type = jx_type + + def __call__(self, row, rownum=None, rows=None): + path = split_field(self.var) + for p in path: + row = row.get(p) + if row is None: + return None + if is_sequence(row) and len(row) == 1: + return row[0] + return row + + def __data__(self): + return self.var + + @property + def many(self): + return True + + def vars(self): + return {self} + + def map(self, map_): + return Variable(coalesce(map_.get(self.var), self.var)) + + def __hash__(self): + return self.var.__hash__() + + def __eq__(self, other): + if is_op(other, Variable): + return self.var == other.var + elif is_text(other): + return self.var == other + return False + + def __unicode__(self): + return self.var + + def __str__(self): + return str(self.var) + + def missing(self): + if self.var == "_id": + return FALSE + else: + return self.lang[MissingOp(self)] + + +IDENTITY = Variable(".") + +_utils.Variable = Variable +expression.Variable = Variable diff --git a/vendor/jx_base/expressions/when_op.py b/vendor/jx_base/expressions/when_op.py new file mode 100644 index 0000000..5f5dcdb --- /dev/null +++ b/vendor/jx_base/expressions/when_op.py @@ -0,0 +1,116 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +""" +# NOTE: + +THE self.lang[operator] PATTERN IS CASTING NEW OPERATORS TO OWN LANGUAGE; +KEEPING Python AS# Python, ES FILTERS AS ES FILTERS, AND Painless AS +Painless. WE COULD COPY partial_eval(), AND OTHERS, TO THIER RESPECTIVE +LANGUAGE, BUT WE KEEP CODE HERE SO THERE IS LESS OF IT + +""" +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import first_op, eq_op, not_op +from jx_base.expressions._utils import simplified +from jx_base.expressions.and_op import AndOp +from jx_base.expressions.boolean_op import BooleanOp +from jx_base.expressions.expression import Expression +from jx_base.expressions.false_op import FALSE +from jx_base.expressions.literal import Literal +from jx_base.expressions.not_op import NotOp +from jx_base.expressions.null_op import NULL +from jx_base.expressions.or_op import OrOp +from jx_base.expressions.true_op import TRUE +from jx_base.language import is_op +from mo_dots import coalesce +from mo_json import INTEGER, NUMBER, OBJECT, NUMBER_TYPES +from mo_logs import Log + + +class WhenOp(Expression): + def __init__(self, term, **clauses): + Expression.__init__(self, [term]) + + self.when = term + self.then = coalesce(clauses.get("then"), NULL) + self.els_ = coalesce(clauses.get("else"), NULL) + + if self.then is NULL: + self.data_type = self.els_.type + elif self.els_ is NULL: + self.data_type = self.then.type + elif self.then.type == self.els_.type: + self.data_type = self.then.type + elif self.then.type in NUMBER_TYPES and self.els_.type in NUMBER_TYPES: + self.data_type = NUMBER + else: + self.data_type = OBJECT + + def __data__(self): + return { + "when": self.when.__data__(), + "then": None if self.then is NULL else self.then.__data__(), + "else": None if self.els_ is NULL else self.els_.__data__() + } + + def vars(self): + return self.when.vars() | self.then.vars() | self.els_.vars() + + def map(self, map_): + return self.lang[ + WhenOp( + self.when.map(map_), + **{"then": self.then.map(map_), "else": self.els_.map(map_)} + ) + ] + + def missing(self): + return self.lang[ + OrOp( + [ + AndOp([self.when, self.then.missing()]), + AndOp([NotOp(self.when), self.els_.missing()]), + ] + ) + ].partial_eval() + + @simplified + def partial_eval(self): + when = self.lang[BooleanOp(self.when)].partial_eval() + + if when is TRUE: + return self.lang[self.then].partial_eval() + elif when in [FALSE, NULL]: + return self.lang[self.els_].partial_eval() + elif is_op(when, Literal): + Log.error("Expecting `when` clause to return a Boolean, or `null`") + + then = self.lang[self.then].partial_eval() + els_ = self.lang[self.els_].partial_eval() + + if then is TRUE: + if els_ is FALSE: + return when + elif els_ is TRUE: + return TRUE + elif then is FALSE: + if els_ is FALSE: + return FALSE + elif els_ is TRUE: + return self.lang[NotOp(when)].partial_eval() + + return self.lang[WhenOp(when, **{"then": then, "else": els_})] + + +first_op.WhenOp = WhenOp +eq_op.WhenOp = WhenOp +not_op.WhenOp = WhenOp diff --git a/vendor/jx_base/facts.py b/vendor/jx_base/facts.py index f500e47..527aae0 100644 --- a/vendor/jx_base/facts.py +++ b/vendor/jx_base/facts.py @@ -5,11 +5,12 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http:# mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals + +from mo_future import is_text +from mo_logs import Log class Facts(object): @@ -18,10 +19,21 @@ class Facts(object): WITH THE RELATIONS THAT CONNECT THEM ALL, BUT LIMITED TO A TREE """ - def __init__(self, container, snowflake): + def __init__(self, name, container): + if not is_text(name): + Log.error("parameter is wrong") self.container = container - self.snowflake = snowflake + self.name = name @property def namespace(self): return self.container.namespace + + @property + def snowflake(self): + return self.schema.snowflake + + @property + def schema(self): + return self.container.ns.get_schema(self.name) + diff --git a/vendor/jx_base/language.py b/vendor/jx_base/language.py new file mode 100644 index 0000000..efda724 --- /dev/null +++ b/vendor/jx_base/language.py @@ -0,0 +1,251 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from copy import copy +from decimal import Decimal +from math import isnan + +from mo_dots import Data, data_types, listwrap, NullType, startswith_field +from mo_dots.lists import list_types, is_many +from mo_future import boolean_type, long, none_type, text, transpose +from mo_logs import Log +from mo_times import Date + +builtin_tuple = tuple + +Expression = None +expression_module = "jx_base.expressions" +JX = None +ID = "_op_id" + +_next_id = 0 + + +def next_id(): + global _next_id + try: + return _next_id + finally: + _next_id+=1 + + +def all_bases(bases): + for b in bases: + yield b + for y in all_bases(b.__bases__): + yield y + + +# EVERY OPERATOR WILL HAVE lang WHICH POINTS TO LANGUAGE +class LanguageElement(type): + def __new__(cls, name, bases, dct): + x = type.__new__(cls, name, bases, dct) + x.lang = None + if startswith_field(x.__module__, expression_module): + # ALL OPS IN expression_module ARE GIVEN AN ID, NO OTHERS + setattr(x, ID, next_id()) + return x + + def __init__(cls, *args): + global Expression, expression_module + type.__init__(cls, *args) + if not expression_module and cls.__name__ == "Expression": + # THE expression_module IS DETERMINED BY THE LOCATION OF Expression CLASS + Expression = cls + expression_module = cls.__module__ + + +BaseExpression = LanguageElement(str("BaseExpression"), (object,), {}) + + +class Language(object): + + def __init__(self, name): + global JX + if not name: + name = "JX" + JX = self + self.name = name + self.ops = None + + def register_ops(self, module_vars): + global JX + + if self.name != "JX": + self.ops = copy(JX.ops) + else: + num_ops = 1 + max( + obj.get_id() + for obj in module_vars.values() + if isinstance(obj, type) and hasattr(obj, ID) + ) + self.ops = [None] * num_ops + + for _, new_op in module_vars.items(): + if isinstance(new_op, type) and hasattr(new_op, ID): + # EXPECT OPERATORS TO HAVE id + # EXPECT NEW DEFINED OPS IN THIS MODULE TO HAVE lang NOT SET + curr = getattr(new_op, "lang") + if not curr: + old_op = self.ops[new_op.get_id()] + if old_op is not None and old_op.__name__ != new_op.__name__: + Log.error("Logic error") + self.ops[new_op.get_id()] = new_op + setattr(new_op, "lang", self) + + if self.name: + # ENSURE THE ALL OPS ARE DEFINED ON THE NEW LANGUAGE + for base_op, new_op in transpose(JX.ops, self.ops): + if new_op is base_op: + # MISSED DEFINITION, ADD ONE + new_op = type(base_op.__name__, (base_op,), {}) + self.ops[new_op.get_id()] = new_op + setattr(new_op, "lang", self) + + def __getitem__(self, item): + if item == None: + Log.error("expecting operator") + class_ = self.ops[item.get_id()] + if class_.__name__ != item.__class__.__name__: + Log.error("programming error") + item.__class__ = class_ + return item + + def __str__(self): + return self.name + + +def is_op(call, op): + """ + :param call: The specific operator instance (a method call) + :param op: The the operator we are testing against + :return: isinstance(call, op), but faster + """ + try: + return call.get_id() == op.get_id() + except Exception as e: + return False + + +def is_expression(call): + if is_many(call): + return False + try: + output = getattr(call, ID, None) != None + except Exception: + output = False + # if output != isinstance(call, Expression): + # Log.error("programmer error") + return output + + +def value_compare(left, right, ordering=1): + """ + SORT VALUES, NULL IS THE LEAST VALUE + :param left: LHS + :param right: RHS + :param ordering: (-1, 0, 1) TO AFFECT SORT ORDER + :return: The return value is negative if x < y, zero if x == y and strictly positive if x > y. + """ + + try: + ltype = left.__class__ + rtype = right.__class__ + + if ltype in list_types or rtype in list_types: + if left == None: + return ordering + elif right == None: + return - ordering + + left = listwrap(left) + right = listwrap(right) + for a, b in zip(left, right): + c = value_compare(a, b) * ordering + if c != 0: + return c + + if len(left) < len(right): + return - ordering + elif len(left) > len(right): + return ordering + else: + return 0 + + if ltype is float and isnan(left): + left = None + ltype = none_type + if rtype is float and isnan(right): + right = None + rtype = none_type + + ltype_num = type_order(ltype, ordering) + rtype_num = type_order(rtype, ordering) + + type_diff = ltype_num - rtype_num + if type_diff != 0: + return ordering if type_diff > 0 else -ordering + + if ltype_num in (-10, 10): + return 0 + elif ltype is builtin_tuple: + for a, b in zip(left, right): + c = value_compare(a, b) + if c != 0: + return c * ordering + return 0 + elif ltype in data_types: + for k in sorted(set(left.keys()) | set(right.keys())): + c = value_compare(left.get(k), right.get(k)) * ordering + if c != 0: + return c + return 0 + elif left > right: + return ordering + elif left < right: + return -ordering + else: + return 0 + except Exception as e: + Log.error("Can not compare values {{left}} to {{right}}", left=left, right=right, cause=e) + + +def type_order(dtype, ordering): + o = TYPE_ORDER.get(dtype) + if o is None: + if dtype in NULL_TYPES: + return ordering * 10 + else: + Log.warning("type will be treated as its own type while sorting") + TYPE_ORDER[dtype] = 6 + return 6 + return o + + +NULL_TYPES = (none_type, NullType) + + +TYPE_ORDER = { + boolean_type: 0, + int: 1, + float: 1, + Decimal: 1, + Date: 1, + long: 1, + text: 3, + list: 4, + builtin_tuple: 4, + dict: 5, + Data: 5 +} + + + diff --git a/vendor/jx_base/meta_columns.py b/vendor/jx_base/meta_columns.py new file mode 100644 index 0000000..7df1760 --- /dev/null +++ b/vendor/jx_base/meta_columns.py @@ -0,0 +1,304 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +import datetime +from collections import Mapping + +from jx_base import Column, TableDesc +from jx_base.schema import Schema +from mo_collections import UniqueIndex +from mo_dots import ( + Data, + FlatList, + NullType, + ROOT_PATH, + concat_field, + is_container, + join_field, + listwrap, + split_field, + unwraplist, + wrap) +from mo_future import binary_type, items, long, none_type, reduce, text +from mo_json import INTEGER, NUMBER, STRING, python_type_to_json_type +from mo_times.dates import Date + +DEBUG = False +META_TABLES_NAME = "meta.tables" +META_COLUMNS_NAME = "meta.columns" +META_COLUMNS_TYPE_NAME = "column" +singlton = None + + +def get_schema_from_list(table_name, frum, native_type_to_json_type=python_type_to_json_type): + """ + SCAN THE LIST FOR COLUMN TYPES + """ + columns = UniqueIndex(keys=("name",)) + _get_schema_from_list( + frum, + ".", + parent=".", + nested_path=ROOT_PATH, + columns=columns, + native_type_to_json_type=native_type_to_json_type, + ) + return Schema(table_name=table_name, columns=list(columns)) + + +def _get_schema_from_list( + frum, # The list + table_name, # Name of the table this list holds records for + parent, # parent path + nested_path, # each nested array, in reverse order + columns, # map from full name to column definition + native_type_to_json_type # dict from storage type name to json type name +): + for d in frum: + row_type = python_type_to_json_type[d.__class__] + + if row_type != "object": + # EXPECTING PRIMITIVE VALUE + full_name = parent + column = columns[full_name] + if not column: + column = Column( + name=concat_field(table_name, full_name), + es_column=full_name, + es_index=".", + es_type=d.__class__.__name__, + jx_type=None, # WILL BE SET BELOW + last_updated=Date.now(), + nested_path=nested_path, + ) + columns.add(column) + column.es_type = _merge_python_type(column.es_type, d.__class__) + column.jx_type = native_type_to_json_type[column.es_type] + else: + for name, value in d.items(): + full_name = concat_field(parent, name) + column = columns[full_name] + if not column: + column = Column( + name=concat_field(table_name, full_name), + es_column=full_name, + es_index=".", + es_type=value.__class__.__name__, + jx_type=None, # WILL BE SET BELOW + last_updated=Date.now(), + nested_path=nested_path, + ) + columns.add(column) + if is_container(value): # GET TYPE OF MULTIVALUE + v = list(value) + if len(v) == 0: + this_type_name = none_type.__name__ + elif len(v) == 1: + this_type_name = v[0].__class__.__name__ + else: + this_type_name = reduce( + _merge_python_type, (vi.__class__.__name__ for vi in value) + ) + else: + this_type_name = value.__class__.__name__ + column.es_type = _merge_python_type(column.es_type, this_type_name) + try: + column.jx_type = native_type_to_json_type[column.es_type] + except Exception as e: + raise e + + if this_type_name in {"object", "dict", "Mapping", "Data"}: + _get_schema_from_list( + [value], table_name, full_name, nested_path, columns, native_type_to_json_type + ) + elif this_type_name in {"list", "FlatList"}: + np = listwrap(nested_path) + newpath = unwraplist([join_field(split_field(np[0]) + [name])] + np) + _get_schema_from_list( + value, table_name, full_name, newpath, columns + ) + + +def get_id(column): + """ + :param column: + :return: Elasticsearch id for column + """ + return column.es_index + "|" + column.es_column + + +META_COLUMNS_DESC = TableDesc( + name=META_COLUMNS_NAME, + url=None, + query_path=ROOT_PATH, + last_updated=Date.now(), + columns=wrap( + [ + Column( + name=c, + es_index=META_COLUMNS_NAME, + es_column=c, + es_type="keyword", + jx_type=STRING, + last_updated=Date.now(), + nested_path=ROOT_PATH, + ) + for c in [ + "name", + "es_type", + "jx_type", + "nested_path", + "es_column", + "es_index", + "partitions", + ] + ] + + [ + Column( + name=c, + es_index=META_COLUMNS_NAME, + es_column=c, + es_type="integer", + jx_type=INTEGER, + last_updated=Date.now(), + nested_path=ROOT_PATH, + ) + for c in ["count", "cardinality", "multi"] + ] + + [ + Column( + name="last_updated", + es_index=META_COLUMNS_NAME, + es_column="last_updated", + es_type="double", + jx_type=NUMBER, + last_updated=Date.now(), + nested_path=ROOT_PATH + ) + ] + ) + +) + +META_TABLES_DESC = TableDesc( + name=META_TABLES_NAME, + url=None, + query_path=ROOT_PATH, + last_updated=Date.now(), + columns=wrap( + [ + Column( + name=c, + es_index=META_TABLES_NAME, + es_column=c, + es_type="string", + jx_type=STRING, + last_updated=Date.now(), + nested_path=ROOT_PATH + ) + for c in [ + "name", + "url", + "query_path" + ] + ] + [ + Column( + name=c, + es_index=META_TABLES_NAME, + es_column=c, + es_type="integer", + jx_type=INTEGER, + last_updated=Date.now(), + nested_path=ROOT_PATH + ) + for c in [ + "timestamp" + ] + ] + ) +) + + + +SIMPLE_METADATA_COLUMNS = ( # FOR PURELY INTERNAL PYTHON LISTS, NOT MAPPING TO ANOTHER DATASTORE + [ + Column( + name=c, + es_index=META_COLUMNS_NAME, + es_column=c, + es_type="string", + jx_type=STRING, + last_updated=Date.now(), + nested_path=ROOT_PATH, + ) + for c in ["table", "name", "type", "nested_path"] + ] + + [ + Column( + name=c, + es_index=META_COLUMNS_NAME, + es_column=c, + es_type="long", + jx_type=INTEGER, + last_updated=Date.now(), + nested_path=ROOT_PATH, + ) + for c in ["count", "cardinality", "multi"] + ] + + [ + Column( + name="last_updated", + es_index=META_COLUMNS_NAME, + es_column="last_updated", + es_type="time", + jx_type=NUMBER, + last_updated=Date.now(), + nested_path=ROOT_PATH, + ) + ] +) + +_merge_order = { + none_type: 0, + NullType: 1, + bool: 2, + int: 3, + long: 3, + Date: 4, + datetime: 4, + float: 5, + text: 6, + binary_type: 6, + object: 7, + dict: 8, + Mapping: 9, + Data: 10, + list: 11, + FlatList: 12, +} + +for k, v in items(_merge_order): + _merge_order[k.__name__] = v + + +def _merge_python_type(A, B): + a = _merge_order[A] + b = _merge_order[B] + + if a >= b: + output = A + else: + output = B + + if isinstance(output, str): + return output + else: + return output.__name__ diff --git a/vendor/jx_base/namespace.py b/vendor/jx_base/namespace.py index 66e818b..40bc24f 100644 --- a/vendor/jx_base/namespace.py +++ b/vendor/jx_base/namespace.py @@ -5,15 +5,12 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from collections import Mapping +from __future__ import absolute_import, division, unicode_literals from jx_base.query import QueryOp +from mo_dots import is_data class Namespace(object): @@ -32,12 +29,11 @@ class Namespace(object): raise NotImplementedError() def _convert_query(self, query): - output = QueryOp("from", None) + output = QueryOp(None) output.select = self._convert_clause(query.select) output.where = self.convert(query.where) output["from"] = self._convert_from(query["from"]) output.edges = self._convert_clause(query.edges) - output.having = convert_list(self._convert_having, query.having) output.window = convert_list(self._convert_window, query.window) output.sort = self._convert_clause(query.sort) output.format = query.format @@ -50,9 +46,6 @@ class Namespace(object): def _convert_clause(self, clause): raise NotImplementedError() - def _convert_having(self, clause): - raise NotImplementedError() - def _convert_window(self, clause): raise NotImplementedError() @@ -60,9 +53,9 @@ class Namespace(object): def convert_list(operator, operand): if operand==None: return None - elif isinstance(operand, Mapping): + elif is_data(operand): return operator(operand) else: - return map(operator, operand) + return list(map(operator, operand)) diff --git a/vendor/jx_base/queries.py b/vendor/jx_base/queries.py index fbc018c..da45317 100644 --- a/vendor/jx_base/queries.py +++ b/vendor/jx_base/queries.py @@ -5,14 +5,11 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import re -from mo_future import text_type - +from mo_future import is_text from mo_logs import Log keyword_pattern = re.compile(r"(\w|[\\.,$-])+(?:\.(\w|[\\.,$-])+)*") @@ -23,7 +20,7 @@ def is_variable_name(value): Log.warning("not expected") return True - if not value or not isinstance(value, text_type): + if not value or not is_text(value): return False # _a._b value = value.lstrip(".") if not value: diff --git a/vendor/jx_base/query.py b/vendor/jx_base/query.py index ecec54c..7a32240 100644 --- a/vendor/jx_base/query.py +++ b/vendor/jx_base/query.py @@ -5,29 +5,29 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from collections import Mapping from copy import copy +from importlib import import_module import jx_base +import mo_math from jx_base.dimensions import Dimension -from jx_base.domains import Domain, SetDomain, DefaultDomain -from jx_base.expressions import jx_expression, Expression, Variable, LeavesOp, ScriptOp, OffsetOp, TRUE, FALSE -from jx_base.queries import is_variable_name -from mo_dots import Data, relative_field, concat_field -from mo_dots import coalesce, Null, set_default, unwraplist, literal_field -from mo_dots import wrap, unwrap, listwrap -from mo_dots.lists import FlatList -from mo_future import text_type -from mo_json.typed_encoder import untype_path, STRUCT +from jx_base.domains import DefaultDomain, Domain, SetDomain +from jx_base.expressions import Expression, FALSE, LeavesOp, QueryOp as QueryOp_, ScriptOp, Variable, jx_expression +from jx_base.language import is_expression, is_op +from jx_base.utils import is_variable_name +from mo_dots import Data, FlatList, Null, coalesce, concat_field, is_container, is_data, is_list, listwrap, \ + literal_field, relative_field, set_default, unwrap, unwraplist, wrap, is_many +from mo_future import is_text, text +from mo_json import STRUCT +from mo_json.typed_encoder import untype_path from mo_logs import Log -from mo_math import AND, UNION, Math +from mo_math import AND, UNION, is_number +BAD_SELECT = "Expecting `value` or `aggregate` in select clause not {{select}}" DEFAULT_LIMIT = 10 MAX_LIMIT = 10000 DEFAULT_SELECT = Data(name="count", value=jx_expression("."), aggregate="count", default=0) @@ -36,32 +36,26 @@ _jx = None _Column = None + def _late_import(): global _jx global _Column - from jx_python.meta import Column as _Column + from jx_base import Column as _Column from jx_python import jx as _jx _ = _jx _ = _Column +class QueryOp(QueryOp_): + __slots__ = ["frum", "select", "edges", "groupby", "where", "window", "sort", "limit", "format", "chunk_size", "destination"] -class QueryOp(Expression): - __slots__ = ["frum", "select", "edges", "groupby", "where", "window", "sort", "limit", "having", "format", "isLean"] - - # def __new__(cls, op=None, frum=None, select=None, edges=None, groupby=None, window=None, where=None, sort=None, limit=None, format=None): - # output = object.__new__(cls) - # for s in QueryOp.__slots__: - # setattr(output, s, None) - # return output - - def __init__(self, op, frum, select=None, edges=None, groupby=None, window=None, where=None, sort=None, limit=None, format=None): + def __init__(self,frum, select=None, edges=None, groupby=None, window=None, where=None, sort=None, limit=None, format=None, chunk_size=None, destination=None): if isinstance(frum, jx_base.Table): pass else: - Expression.__init__(self, op, frum) + Expression.__init__(self,frum) self.frum = frum self.select = select self.edges = edges @@ -71,10 +65,12 @@ class QueryOp(Expression): self.sort = sort self.limit = limit self.format = format + self.chunk_size = chunk_size + self.destination = destination def __data__(self): def select___data__(): - if isinstance(self.select, list): + if is_list(self.select): return [s.__data__() for s in self.select] else: return self.select.__data__() @@ -103,16 +99,15 @@ class QueryOp(Expression): format=copy(self.format) ) - def vars(self, exclude_where=False, exclude_select=False): """ :return: variables in query """ def edges_get_all_vars(e): output = set() - if isinstance(e.value, text_type): + if is_text(e.value): output.add(e.value) - if isinstance(e.value, Expression): + if is_expression(e.value): output |= e.value.vars() if e.domain.key: output.add(e.domain.key) @@ -180,13 +175,12 @@ class QueryOp(Expression): edge.range.max = e.range.max.map(map_) return edge - if isinstance(self.select, list): + if is_list(self.select): select = wrap([map_select(s, map_) for s in self.select]) else: select = map_select(self.select, map_) return QueryOp( - "from", frum=self.frum.map(map_), select=select, edges=wrap([map_edge(e, map_) for e in self.edges]), @@ -206,20 +200,23 @@ class QueryOp(Expression): """ NORMALIZE QUERY SO IT CAN STILL BE JSON """ - if isinstance(query, QueryOp) or query == None: + if is_op(query, QueryOp) or query == None: return query query = wrap(query) table = container.get_table(query['from']) schema = table.schema output = QueryOp( - op="from", frum=table, format=query.format, - limit=Math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT)) + chunk_size=query.chunk_size, + destination=query.destination, ) - if query.select or isinstance(query.select, (Mapping, list)): + _import_temper_limit() + output.limit = temper_limit(query.limit, query) + + if query.select or is_many(query.select) or is_data(query.select): output.select = _normalize_selects(query.select, query.frum, schema=schema) else: if query.edges or query.groupby: @@ -239,15 +236,12 @@ class QueryOp(Expression): output.edges = Null output.groupby = Null - output.where = _normalize_where(query.where, schema=schema) + output.where = _normalize_where({"and": listwrap(query.where)}, schema=schema) output.window = [_normalize_window(w) for w in listwrap(query.window)] - output.having = None output.sort = _normalize_sort(query.sort) - if not Math.is_integer(output.limit) or output.limit < 0: + if output.limit != None and (not mo_math.is_integer(output.limit) or output.limit < 0): Log.error("Expecting limit >= 0") - output.isLean = query.isLean - return output @@ -263,7 +257,6 @@ class QueryOp(Expression): def column_names(self): return listwrap(self.select).name + self.edges.name + self.groupby.name - def __getitem__(self, item): if item == "from": return self.frum @@ -280,7 +273,20 @@ class QueryOp(Expression): return output +def temper_limit(limit, query): + return coalesce(query.limit, 10) + + +def _import_temper_limit(): + global temper_limit + try: + temper_limit = import_module("jx_elasticsearch.es52").temper_limit + except Exception as e: + pass + + canonical_aggregates = wrap({ + "cardinality": {"name":"cardinality", "default": 0}, "count": {"name": "count", "default": 0}, "min": {"name": "minimum"}, "max": {"name": "maximum"}, @@ -291,15 +297,15 @@ canonical_aggregates = wrap({ def _normalize_selects(selects, frum, schema=None, ): - if frum == None or isinstance(frum, (list, set, text_type)): - if isinstance(selects, list): + if frum == None or isinstance(frum, (list, set, text)): + if is_list(selects): if len(selects) == 0: return Null else: output = [_normalize_select_no_context(s, schema=schema) for s in selects] else: return _normalize_select_no_context(selects, schema=schema) - elif isinstance(selects, list): + elif is_list(selects): output = [ss for s in selects for ss in _normalize_select(s, frum=frum, schema=schema)] else: output = _normalize_select(selects, frum, schema=schema) @@ -322,7 +328,7 @@ def _normalize_select(select, frum, schema=None): if not _Column: _late_import() - if isinstance(select, text_type): + if is_text(select): canonical = select = Data(value=select) else: select = wrap(select) @@ -335,7 +341,10 @@ def _normalize_select(select, frum, schema=None): return frum._normalize_select(canonical) output = [] - if not select.value or select.value == ".": + + if len(select) and not select.value: + Log.error(BAD_SELECT, select=select) + elif not select.value or select.value == ".": output.extend([ set_default( { @@ -346,16 +355,16 @@ def _normalize_select(select, frum, schema=None): ) for c in frum.get_leaves() ]) - elif isinstance(select.value, text_type): + elif is_text(select.value): if select.value.endswith(".*"): canonical.name = coalesce(select.name, ".") value = jx_expression(select[:-2], schema=schema) - if not isinstance(value, Variable): + if not is_op(value, Variable): Log.error("`*` over general expression not supported yet") output.append([ set_default( { - "value": LeavesOp("leaves", value, prefix=select.prefix), + "value": LeavesOp(value, prefix=select.prefix), "format": "dict" # MARKUP FOR DECODING }, canonical @@ -383,7 +392,7 @@ def _normalize_select_no_context(select, schema=None): if not _Column: _late_import() - if isinstance(select, text_type): + if is_text(select): select = Data(value=select) else: select = wrap(select) @@ -393,26 +402,28 @@ def _normalize_select_no_context(select, schema=None): output.name = coalesce(select.name, select.aggregate) if output.name: output.value = jx_expression(".", schema=schema) + elif len(select): + Log.error(BAD_SELECT, select=select) else: return Null - elif isinstance(select.value, text_type): + elif is_text(select.value): if select.value.endswith(".*"): - name = select.value[:-2] + name = select.value[:-2].lstrip(".") output.name = coalesce(select.name, name) - output.value = LeavesOp("leaves", Variable(name), prefix=coalesce(select.prefix, name)) + output.value = LeavesOp(Variable(name), prefix=coalesce(select.prefix, name)) else: if select.value == ".": output.name = coalesce(select.name, select.aggregate, ".") output.value = jx_expression(select.value, schema=schema) elif select.value == "*": output.name = coalesce(select.name, select.aggregate, ".") - output.value = LeavesOp("leaves", Variable(".")) + output.value = LeavesOp(Variable(".")) else: - output.name = coalesce(select.name, select.value, select.aggregate) + output.name = coalesce(select.name, select.value.lstrip("."), select.aggregate) output.value = jx_expression(select.value, schema=schema) - elif isinstance(select.value, (int, float)): + elif is_number(output.value): if not output.name: - output.name = text_type(select.value) + output.name = text(output.value) output.value = jx_expression(select.value, schema=schema) else: output.value = jx_expression(select.value, schema=schema) @@ -441,18 +452,19 @@ def _normalize_edge(edge, dim_index, limit, schema=None): if not _Column: _late_import() - if edge == None: + if not edge: Log.error("Edge has no value, or expression is empty") - elif isinstance(edge, text_type): + elif is_text(edge): if schema: leaves = unwraplist(list(schema.leaves(edge))) - if not leaves or isinstance(leaves, (list, set)): + if not leaves or is_container(leaves): return [ Data( name=edge, value=jx_expression(edge, schema=schema), allowNulls=True, - dim=dim_index + dim=dim_index, + domain=_normalize_domain(None, limit) ) ] elif isinstance(leaves, _Column): @@ -463,7 +475,7 @@ def _normalize_edge(edge, dim_index, limit, schema=None): dim=dim_index, domain=_normalize_domain(domain=leaves, limit=limit, schema=schema) )] - elif isinstance(leaves.fields, list) and len(leaves.fields) == 1: + elif is_list(leaves.fields) and len(leaves.fields) == 1: return [Data( name=leaves.name, value=jx_expression(leaves.fields[0], schema=schema), @@ -490,10 +502,10 @@ def _normalize_edge(edge, dim_index, limit, schema=None): ] else: edge = wrap(edge) - if not edge.name and not isinstance(edge.value, text_type): + if not edge.name and not is_text(edge.value): Log.error("You must name compound and complex edges: {{edge}}", edge=edge) - if isinstance(edge.value, (list, set)) and not edge.domain: + if is_container(edge.value) and not edge.domain: # COMPLEX EDGE IS SHORT HAND domain = _normalize_domain(schema=schema) domain.dimension = Data(fields=edge.value) @@ -521,8 +533,10 @@ def _normalize_edge(edge, dim_index, limit, schema=None): def _normalize_groupby(groupby, limit, schema=None): if groupby == None: return None - output = wrap([n for ie, e in enumerate(listwrap(groupby)) for n in _normalize_group(e, ie, limit, schema=schema) ]) - if any(o==None for o in output): + output = wrap([n for e in listwrap(groupby) for n in _normalize_group(e, None, limit, schema=schema)]) + for i, o in enumerate(output): + o.dim = i + if any(o == None for o in output): Log.error("not expected") return output @@ -534,14 +548,14 @@ def _normalize_group(edge, dim_index, limit, schema=None): :param schema: for context :return: a normalized groupby """ - if isinstance(edge, text_type): + if is_text(edge): if edge.endswith(".*"): prefix = edge[:-2] if schema: output = wrap([ - { - "name": concat_field(prefix, literal_field(relative_field(untype_path(c.names["."]), prefix))), - "put": {"name": literal_field(untype_path(c.names["."]))}, + { # BECASUE THIS IS A GROUPBY, EARLY SPLIT INTO LEAVES WORKS JUST FINE + "name": concat_field(prefix, literal_field(relative_field(untype_path(c.name), prefix))), + "put": {"name": literal_field(untype_path(c.name))}, "value": jx_expression(c.es_column, schema=schema), "allowNulls": True, "domain": {"type": "default"} @@ -553,9 +567,9 @@ def _normalize_group(edge, dim_index, limit, schema=None): return wrap([{ "name": untype_path(prefix), "put": {"name": literal_field(untype_path(prefix))}, - "value": jx_expression(prefix, schema=schema), + "value": LeavesOp(Variable(prefix)), "allowNulls": True, - "dim":dim_index, + "dim": dim_index, "domain": {"type": "default"} }]) @@ -568,10 +582,10 @@ def _normalize_group(edge, dim_index, limit, schema=None): }]) else: edge = wrap(edge) - if (edge.domain and edge.domain.type != "default") or edge.allowNulls != None: + if (edge.domain and edge.domain.type != "default"): Log.error("groupby does not accept complicated domains") - if not edge.name and not isinstance(edge.value, text_type): + if not edge.name and not is_text(edge.value): Log.error("You must name compound edges: {{edge}}", edge= edge) return wrap([{ @@ -587,13 +601,13 @@ def _normalize_domain(domain=None, limit=None, schema=None): if not domain: return Domain(type="default", limit=limit) elif isinstance(domain, _Column): - if domain.partitions: + if domain.partitions and domain.multi <= 1: # MULTI FIELDS ARE TUPLES, AND THERE ARE TOO MANY POSSIBLE COMBOS AT THIS TIME return SetDomain(partitions=domain.partitions.left(limit)) else: return DefaultDomain(type="default", limit=limit) elif isinstance(domain, Dimension): return domain.getDomain() - elif schema and isinstance(domain, text_type) and schema[domain]: + elif schema and is_text(domain) and schema[domain]: return schema[domain].getDomain() elif isinstance(domain, Domain): return domain @@ -613,7 +627,7 @@ def _normalize_window(window, schema=None): if hasattr(v, "__call__"): expr = v else: - expr = ScriptOp("script", v) + expr = ScriptOp(v) return Data( name=coalesce(window.name, window.value), @@ -638,8 +652,6 @@ def _normalize_range(range): def _normalize_where(where, schema=None): - if where == None: - return TRUE return jx_expression(where, schema=schema) @@ -653,7 +665,7 @@ def _map_term_using_schema(master, path, term, schema_edges): if isinstance(dimension, Dimension): domain = dimension.getDomain() if dimension.fields: - if isinstance(dimension.fields, Mapping): + if is_data(dimension.fields): # EXPECTING A TUPLE for local_field, es_field in dimension.fields.items(): local_value = v[local_field] @@ -696,7 +708,7 @@ def _map_term_using_schema(master, path, term, schema_edges): continue else: Log.error("not expected") - elif isinstance(v, Mapping): + elif is_data(v): sub = _map_term_using_schema(master, path + [k], v, schema_edges[k]) output.append(sub) continue @@ -710,7 +722,7 @@ def _where_terms(master, where, schema): USE THE SCHEMA TO CONVERT DIMENSION NAMES TO ES FILTERS master - TOP LEVEL WHERE (FOR PLACING NESTED FILTERS) """ - if isinstance(where, Mapping): + if is_data(where): if where.term: # MAP TERM try: @@ -722,13 +734,13 @@ def _where_terms(master, where, schema): # MAP TERM output = FlatList() for k, v in where.terms.items(): - if not isinstance(v, (list, set)): + if not is_container(v): Log.error("terms filter expects list of values") edge = schema.edges[k] if not edge: output.append({"terms": {k: v}}) else: - if isinstance(edge, text_type): + if is_text(edge): # DIRECT FIELD REFERENCE return {"terms": {edge: v}} try: @@ -736,7 +748,7 @@ def _where_terms(master, where, schema): except Exception as e: Log.error("programmer error", e) fields = domain.dimension.fields - if isinstance(fields, Mapping): + if is_data(fields): or_agg = [] for vv in v: and_agg = [] @@ -746,7 +758,7 @@ def _where_terms(master, where, schema): and_agg.append({"term": {es_field: vvv}}) or_agg.append({"and": and_agg}) output.append({"or": or_agg}) - elif isinstance(fields, list) and len(fields) == 1 and is_variable_name(fields[0]): + elif is_list(fields) and len(fields) == 1 and is_variable_name(fields[0]): output.append({"terms": {fields[0]: v}}) elif domain.partitions: output.append({"or": [domain.getPartByKey(vv).esfilter for vv in v]}) @@ -770,19 +782,19 @@ def _normalize_sort(sort=None): output = FlatList() for s in listwrap(sort): - if isinstance(s, text_type): + if is_text(s): output.append({"value": jx_expression(s), "sort": 1}) - elif isinstance(s, Expression): + elif is_expression(s): output.append({"value": s, "sort": 1}) - elif Math.is_integer(s): - output.append({"value": OffsetOp("offset", s), "sort": 1}) + elif mo_math.is_integer(s): + output.append({"value": jx_expression({"offset": s}), "sort": 1}) elif not s.sort and not s.value and all(d in sort_direction for d in s.values()): for v, d in s.items(): output.append({"value": jx_expression(v), "sort": sort_direction[d]}) elif not s.sort and not s.value: Log.error("`sort` clause must have a `value` property") else: - output.append({"value": jx_expression(coalesce(s.value, s.field)), "sort": coalesce(sort_direction[s.sort], 1)}) + output.append({"value": jx_expression(coalesce(s.value, s.field)), "sort": sort_direction[s.sort]}) return output @@ -795,8 +807,7 @@ sort_direction = { 1: 1, 0: 0, -1: -1, - None: 1, - Null: 1 + None: 1 } diff --git a/vendor/jx_base/schema.py b/vendor/jx_base/schema.py index fbea567..5eef8e7 100644 --- a/vendor/jx_base/schema.py +++ b/vendor/jx_base/schema.py @@ -5,16 +5,15 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals from copy import copy -from mo_dots import Null, startswith_field, set_default, wrap -from mo_json.typed_encoder import unnest_path, untype_path, STRUCT, EXISTS, OBJECT, NESTED +from mo_dots import Null, relative_field, set_default, startswith_field, wrap +from mo_json import EXISTS, NESTED, OBJECT, STRUCT +from mo_json.typed_encoder import unnest_path, untype_path from mo_logs import Log @@ -56,7 +55,7 @@ class Schema(object): :param column: :return: NAME OF column """ - return column.names[self.query_path] + return relative_field(column.name, query_path) def values(self, name): """ @@ -86,13 +85,13 @@ class Schema(object): full_name = self.query_path return set_default( { - c.names[full_name]: c.es_column + relative_field(c.name, full_name): c.es_column for k, cs in self.lookup.items() # if startswith_field(k, full_name) for c in cs if c.jx_type not in STRUCT }, { - c.names["."]: c.es_column + c.name: c.es_column for k, cs in self.lookup.items() # if startswith_field(k, full_name) for c in cs if c.jx_type not in STRUCT @@ -104,14 +103,13 @@ class Schema(object): return copy(self._columns) - def _indexer(columns, query_path): - all_names = set(unnest_path(n) for c in columns for n in c.names.values()) | {"."} + all_names = set(unnest_path(c.name) for c in columns) | {"."} lookup_leaves = {} # ALL LEAF VARIABLES for full_name in all_names: for c in columns: - cname = c.names[query_path] + cname = relative_field(c.name, query_path) nfp = unnest_path(cname) if ( startswith_field(nfp, full_name) and @@ -126,7 +124,7 @@ def _indexer(columns, query_path): lookup_variables = {} # ALL NOT-NESTED VARIABLES for full_name in all_names: for c in columns: - cname = c.names[query_path] + cname = relative_field(c.name, query_path) nfp = unnest_path(cname) if ( startswith_field(nfp, full_name) and @@ -142,7 +140,7 @@ def _indexer(columns, query_path): relative_lookup = {} for c in columns: try: - cname = c.names[query_path] + cname = relative_field(c.name, query_path) cs = relative_lookup.setdefault(cname, set()) cs.add(c) diff --git a/vendor/jx_base/snowflake.py b/vendor/jx_base/snowflake.py index 4b19a79..3197453 100644 --- a/vendor/jx_base/snowflake.py +++ b/vendor/jx_base/snowflake.py @@ -5,11 +5,9 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http:# mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals class Snowflake(object): diff --git a/vendor/jx_base/table.py b/vendor/jx_base/table.py index 2605170..a1697b4 100644 --- a/vendor/jx_base/table.py +++ b/vendor/jx_base/table.py @@ -5,11 +5,9 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals class Table(object): @@ -20,3 +18,5 @@ class Table(object): def map(self, mapping): return self + def __data__(self): + return self.name diff --git a/vendor/jx_base/utils.py b/vendor/jx_base/utils.py new file mode 100644 index 0000000..026509c --- /dev/null +++ b/vendor/jx_base/utils.py @@ -0,0 +1,56 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. +# +from __future__ import absolute_import, division, unicode_literals + +import re + +from mo_future import is_text +from mo_logs import Log + +keyword_pattern = re.compile(r"(\w|[\\.])(\w|[\\.$-])*(?:\.(\w|[\\.$-])+)*") + + +def is_variable_name(value): + if value.__class__.__name__ == "Variable": + Log.warning("not expected") + return True + + if not value or not is_text(value): + return False # _a._b + value = value.lstrip(".") + if not value: + return True + match = keyword_pattern.match(value) + if not match: + return False + return match.group(0) == value + + +def dequote(s): + """ + If a string has single or double quotes around it, remove them. + Make sure the pair of quotes match. + If a matching pair of quotes is not found, return the string unchanged. + """ + if (s[0] == s[-1]) and s.startswith(("'", '"')): + return s[1:-1] + return s + + +def is_column_name(col): + if re.match(r"(\$|\w|\\\.)+(?:\.(\$|\w|\\\.)+)*\.\$\w{6}$", col): + return True + else: + return False + + +def get_property_name(s): + if s == ".": + return s + else: + return s.lstrip(".") diff --git a/vendor/jx_elasticsearch/README.md b/vendor/jx_elasticsearch/README.md new file mode 100644 index 0000000..1251462 --- /dev/null +++ b/vendor/jx_elasticsearch/README.md @@ -0,0 +1,42 @@ +# `jx_elasticsearch` + +This library implements [JSON Query Expressions]() atop an Elasticsearch instance. + + +## Contribution + +New, or old, versions of Elasticsearch should be added by copying the `es52` subdirectory, and altering the implementation to deal with the differences. + +There are two directories in the git history that may help for old versions. + +1. `es09` for Elasticsearch version 0.9.x (with MVEL scripting) +2. `es14` is for any version 1.x variant of Elasticsearch (with Groovy scripting) + +Both of these directories are too old to be used directly, but they do have code templates for their respective scripting language, and they do have other hints about how to construct queries with the limitations of the older versions. + + + +## elasticsearch.py + +This module handles the lifecycle of an Elasticsearch index in the context of +ETL. You only need this module if you are creating and retiring indexes. You +do not need this module for simply searching; for that I suggest using the +rest API directly. + +### Settings + +Both ```Cluster``` and ```Index``` objects accept the same settings dict, +selecting only the properties it requires. + + { + "host" : "http://192.168.0.98", + "port" : 9200, + "index" : "b2g_tests", + "type" : "test_result", + "debug" : true, + "limit_replicas" : true, + "schema_file" : "resources/schema/test_schema.json" + }, + + + diff --git a/vendor/jx_elasticsearch/__init__.py b/vendor/jx_elasticsearch/__init__.py index 7335061..786f8a8 100644 --- a/vendor/jx_elasticsearch/__init__.py +++ b/vendor/jx_elasticsearch/__init__.py @@ -5,17 +5,15 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http:# mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals from jx_base.container import type2container from mo_files.url import URL from mo_kwargs import override from mo_logs import Log -from pyLibrary.env import http +from mo_http import http DEBUG = False @@ -44,13 +42,7 @@ def new_instance( url.port = port status = http.get_json(url, stream=False) version = status.version.number - if version.startswith("1."): - from jx_elasticsearch.es14 import ES14 - type2container.setdefault("elasticsearch", ES14) - known_hosts[(host, port)] = ES14 - output = ES14(kwargs=kwargs) - return output - elif version.startswith(("5.", "6.")): + if version.startswith(("5.", "6.")): from jx_elasticsearch.es52 import ES52 type2container.setdefault("elasticsearch", ES52) known_hosts[(host, port)] = ES52 @@ -62,25 +54,3 @@ def new_instance( Log.error("Can not make an interpreter for Elasticsearch", cause=e) -# SCRUB THE QUERY SO IT IS VALID -# REPORT ERROR IF OUTPUT APEARS TO HAVE HIT GIVEN limit -def post(es, es_query, limit): - post_result = None - try: - if not es_query.sort: - es_query.sort = None - post_result = es.search(es_query) - - for facetName, f in post_result.facets.items(): - if f._type == "statistical": - continue - if not f.terms: - continue - - if not DEBUG and not limit and len(f.terms) == limit: - Log.error("Not all data delivered (" + str(len(f.terms)) + "/" + str(f.total) + ") try smaller range") - except Exception as e: - Log.error("Error with FromES", e) - - return post_result - diff --git a/vendor/pyLibrary/env/elasticsearch.py b/vendor/jx_elasticsearch/elasticsearch.py similarity index 69% rename from vendor/pyLibrary/env/elasticsearch.py rename to vendor/jx_elasticsearch/elasticsearch.py index 4fd13f5..c67dc33 100644 --- a/vendor/pyLibrary/env/elasticsearch.py +++ b/vendor/jx_elasticsearch/elasticsearch.py @@ -4,53 +4,52 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals +import ast import re -from collections import Mapping +from collections import namedtuple from copy import deepcopy +from jx_base import Column from jx_python import jx -from jx_python.expressions import jx_expression_to_function -from jx_python.meta import Column -from mo_dots import wrap, FlatList, coalesce, Null, Data, set_default, listwrap, literal_field, ROOT_PATH, concat_field, split_field, SLOT +from mo_dots import Data, FlatList, Null, ROOT_PATH, SLOT, coalesce, concat_field, is_data, is_list, listwrap, \ + literal_field, set_default, split_field, wrap, lists +from mo_files import File, mimetype from mo_files.url import URL -from mo_future import text_type, binary_type, items -from mo_json import value2json, json2value -from mo_json.typed_encoder import EXISTS_TYPE, BOOLEAN_TYPE, STRING_TYPE, NUMBER_TYPE, NESTED_TYPE, TYPE_PREFIX, json_type_to_inserter_type +from mo_future import binary_type, generator_types, is_binary, is_text, items, text +from mo_json import BOOLEAN, EXISTS, NESTED, NUMBER, OBJECT, STRING, json2value, value2json +from mo_json.typed_encoder import BOOLEAN_TYPE, EXISTS_TYPE, NESTED_TYPE, NUMBER_TYPE, STRING_TYPE, TYPE_PREFIX, \ + json_type_to_inserter_type from mo_kwargs import override from mo_logs import Log, strings -from mo_logs.exceptions import Except -from mo_logs.strings import utf82unicode, unicode2utf8 -from mo_math import Math +from mo_logs.exceptions import Except, suppress_exception +from mo_math import is_integer, is_number from mo_math.randoms import Random -from mo_threads import Lock, ThreadedQueue, Till -from mo_times import Date, Timer, MINUTE -from pyLibrary import convert -from pyLibrary.env import http +from mo_threads import Lock, ThreadedQueue, Till, THREAD_STOP, Thread, MAIN_THREAD +from mo_times import Date, Timer, HOUR, dates, Duration +from mo_http import http +DEBUG = True DEBUG_METADATA_UPDATE = False ES_STRUCT = ["object", "nested"] ES_NUMERIC_TYPES = ["long", "integer", "double", "float"] -ES_PRIMITIVE_TYPES = ["string", "boolean", "integer", "date", "long", "double"] +ES_PRIMITIVE_TYPES = ("boolean", "float", "integer", "date", "long", "double", "string", "keyword") + INDEX_DATE_FORMAT = "%Y%m%d_%H%M%S" +SUFFIX_PATTERN = r'\d{8}_\d{6}' +ID = Data(field='_id') +LF = "\n".encode('utf8') -STALE_METADATA = 10 * MINUTE - -DATA_KEY = text_type("data") +STALE_METADATA = HOUR +DATA_KEY = text("data") -class Features(object): - pass - - -class Index(Features): +class Index(object): """ AN ElasticSearch INDEX LIFETIME MANAGEMENT TOOL @@ -65,11 +64,12 @@ class Index(Features): IF ANY YET. """ + @override def __init__( self, index, # NAME OF THE INDEX, EITHER ALIAS NAME OR FULL VERSION NAME - id_column="_id", + id=ID, # CUSTOM FIELD FOR _id AND version type=None, # SCHEMA NAME, (DEFAULT TO TYPE IN INDEX, IF ONLY ONE) alias=None, explore_metadata=True, # PROBING THE CLUSTER FOR METADATA IS ALLOWED @@ -83,7 +83,7 @@ class Index(Features): ): if kwargs.tjson != None: Log.error("used `typed` parameter, not `tjson`") - if index==None: + if index == None: Log.error("not allowed") self.info = None @@ -93,14 +93,14 @@ class Index(Features): try: full_index = self.cluster.get_canonical_index(index) - if full_index and alias==None: + if full_index and alias == None: kwargs.alias = kwargs.index kwargs.index = full_index - if full_index==None: + if full_index == None: Log.error("not allowed") if type == None: # NO type PROVIDED, MAYBE THERE IS A SUITABLE DEFAULT? - about = self.cluster.get_metadata().indices[self.settings.index] + about = self.cluster.get_metadata().indices[literal_field(self.settings.index)] type = self.settings.type = _get_best_type_from_mapping(about.mappings)[0] if type == "_default_": Log.error("not allowed") @@ -129,12 +129,21 @@ class Index(Features): typed = kwargs.typed = False if not read_only: - if typed: - from pyLibrary.env.typed_inserter import TypedInserter - - self.encode = TypedInserter(self, id_column).typed_encode + if is_text(id): + id_info = set_default({"field": id}) + elif is_data(id): + if not id.field: + id.field = ID.field + id_info = id else: - self.encode = get_encoder(id_column) + Log.error("do not know how to handle id={{id}}", id=id) + + if typed: + from jx_elasticsearch.typed_inserter import TypedInserter + + self.encode = TypedInserter(self, id_info).typed_encode + else: + self.encode = get_encoder(id_info) @property def url(self): @@ -143,14 +152,14 @@ class Index(Features): def get_properties(self, retry=True): if self.settings.explore_metadata: metadata = self.cluster.get_metadata() - index = metadata.indices[self.settings.index] + index = metadata.indices[literal_field(self.settings.index)] if index == None and retry: - #TRY AGAIN, JUST IN CASE + # TRY AGAIN, JUST IN CASE self.cluster.info = None return self.get_properties(retry=False) - if not index.mappings[self.settings.type] and (index.mappings.keys()-{"_default_"}): + if not index.mappings[self.settings.type] and (index.mappings.keys() - {"_default_"}): Log.warning( "ElasticSearch index {{index|quote}} does not have type {{type|quote}} in {{metadata|json}}", index=self.settings.index, @@ -177,7 +186,7 @@ class Index(Features): name = self.settings.index if prefix == name: - Log.note("{{index_name}} will not be deleted", index_name= prefix) + Log.note("{{index_name}} will not be deleted", index_name=prefix) for a in self.cluster.get_aliases(): # MATCH YYMMDD_HHMMSS FORMAT if re.match(re.escape(prefix) + "\\d{8}_\\d{6}", a.index) and a.index != name: @@ -200,8 +209,8 @@ class Index(Features): # WAIT FOR ALIAS TO APPEAR while True: - metadata = self.cluster.get_metadata(force=True) - if alias in metadata.indices[self.settings.index].aliases: + metadata = self.cluster.get_metadata(after=Date.now()) + if alias in metadata.indices[literal_field(self.settings.index)].aliases: return Log.note("Waiting for alias {{alias}} to appear", alias=alias) Till(seconds=1).wait() @@ -232,54 +241,20 @@ class Index(Features): if self.settings.read_only: Log.error("Index opened in read only mode, no changes allowed") - self.cluster.get_metadata() self.debug and Log.note("Delete bugs:\n{{query}}", query=filter) - if self.cluster.info.version.number.startswith("0.90"): - query = {"filtered": { - "query": {"match_all": {}}, - "filter": filter - }} - - result = self.cluster.delete( - self.path + "/_query", - data=value2json(query), - timeout=600, - params={"consistency": self.settings.consistency} - ) - for name, status in result._indices.items(): - if status._shards.failed > 0: - Log.error("Failure to delete from {{index}}", index=name) - - elif self.cluster.info.version.number.startswith("1."): - query = {"query": {"filtered": { - "query": {"match_all": {}}, - "filter": filter - }}} - - result = self.cluster.delete( - self.path + "/_query", - data=value2json(query), - timeout=600, - params={"consistency": self.settings.consistency} - ) - for name, status in result._indices.items(): - if status._shards.failed > 0: - Log.error("Failure to delete from {{index}}", index=name) - - elif self.cluster.info.version.number.startswith(("5.", "6.")): + if self.cluster.info.version.number.startswith(("5.", "6.")): query = {"query": filter} - if filter.terms.bug_id['~n~'] != None: - Log.warning("filter is not typed") wait_for_active_shards = coalesce( # EARLIER VERSIONS USED "consistency" AS A PARAMETER self.settings.wait_for_active_shards, {"one": 1, None: None}[self.settings.consistency] ) - + path = self.path + "/_delete_by_query" + DEBUG and Log.note("Delete: {{path}}\n{{query}}", path=path, query=query) result = self.cluster.post( - self.path + "/_delete_by_query", + path, json=query, timeout=600, params={"wait_for_active_shards": wait_for_active_shards} @@ -291,6 +266,15 @@ class Index(Features): else: raise NotImplementedError + def delete_id(self, id): + result = self.cluster.delete( + path=self.path + "/" + id, + timeout=600, + # params={"wait_for_active_shards": wait_for_active_shards} + ) + if result.failures: + Log.error("Failure to delete fom {{index}}:\n{{data|pretty}}", index=self.settings.index, data=result) + def extend(self, records): """ records - MUST HAVE FORM OF @@ -300,25 +284,17 @@ class Index(Features): """ if self.settings.read_only: Log.error("Index opened in read only mode, no changes allowed") - lines = [] + if not records: + return + if isinstance(records, generator_types): + Log.error("single use generators no longer accepted") + if is_text(records): + Log.error("records must have __iter__") + if not hasattr(records, "__iter__"): + Log.error("records must have __iter__") + try: - for r in records: - rec = self.encode(r) - json_bytes = rec['json'] - lines.append('{"index":{"_id": ' + convert.value2json(rec['id']) + '}}') - lines.append(json_bytes) - - del records - - if not lines: - return - - with Timer("Add {{num}} documents to {{index}}", {"num": int(len(lines) / 2), "index": self.settings.index}, silent=not self.debug): - try: - data_string = "\n".join(l for l in lines) + "\n" - except Exception as e: - raise Log.error("can not make request body from\n{{lines|indent}}", lines=lines, cause=e) - + with Timer("Add document(s) to {{index}}", {"index": self.settings.index}, verbose=self.debug): wait_for_active_shards = coalesce( self.settings.wait_for_active_shards, {"one": 1, None: None}[self.settings.consistency] @@ -326,7 +302,8 @@ class Index(Features): response = self.cluster.post( self.path + "/_bulk", - data=data_string, + data=IterableBytes(self.encode, records), + zip=True, headers={"Content-Type": "application/x-ndjson"}, timeout=self.settings.timeout, retry=self.settings.retry, @@ -335,50 +312,40 @@ class Index(Features): items = response["items"] fails = [] - if self.cluster.version.startswith("0.90."): + if self.cluster.version.startswith(("1.4.", "1.5.", "1.6.", "1.7.", "5.", "6.")): for i, item in enumerate(items): - if not item.index.ok: - fails.append(i) - elif self.cluster.version.startswith(("1.4.", "1.5.", "1.6.", "1.7.", "5.", "6.")): - for i, item in enumerate(items): - if item.index.status not in [200, 201]: + if item.index.status == 409: # 409 ARE VERSION CONFLICTS + if "version conflict" not in item.index.error.reason: + fails.append(i) # IF NOT A VERSION CONFLICT, REPORT AS FAILURE + elif item.index.status not in [200, 201]: fails.append(i) else: Log.error("version not supported {{version}}", version=self.cluster.version) if fails: - if len(fails) <= 3: - cause = [ - Except( - template="{{status}} {{error}} (and {{some}} others) while loading line id={{id}} into index {{index|quote}} (typed={{typed}}):\n{{line}}", - status=items[i].index.status, - error=items[i].index.error, - some=len(fails) - 1, - line=strings.limit(lines[i * 2 + 1], 500 if not self.debug else 100000), - index=self.settings.index, - typed=self.settings.typed, - id=items[i].index._id - ) - for i in fails - ] - else: - i=fails[0] - cause = Except( + lines = list(IterableBytes(self.encode, records)) + cause = [ + Except( template="{{status}} {{error}} (and {{some}} others) while loading line id={{id}} into index {{index|quote}} (typed={{typed}}):\n{{line}}", - status=items[i].index.status, - error=items[i].index.error, - some=len(fails) - 1, - line=strings.limit(lines[i * 2 + 1], 500 if not self.debug else 100000), - index=self.settings.index, - typed=self.settings.typed, - id=items[i].index._id + params={ + "status": items[i].index.status, + "error": items[i].index.error, + "some": len(fails) - 1, + "line": strings.limit(lines[i * 2 + 1], 500 if not self.debug else 100000), + "index": self.settings.index, + "typed": self.settings.typed, + "id": items[i].index._id + } ) + for i in fails[:3] + ] Log.error("Problems with insert", cause=cause) pass except Exception as e: e = Except.wrap(e) + lines = list(IterableBytes(self.encode, records)) if e.message.startswith("sequence item "): - Log.error("problem with {{data}}", data=text_type(repr(lines[int(e.message[14:16].strip())])), cause=e) + Log.error("problem with {{data}}", data=text(repr(lines[int(e.message[14:16].strip())])), cause=e) Log.error("problem sending to ES", cause=e) # RECORDS MUST HAVE id AND json AS A STRING OR @@ -386,8 +353,10 @@ class Index(Features): def add(self, record): if self.settings.read_only: Log.error("Index opened in read only mode, no changes allowed") - if isinstance(record, list): + if is_list(record): Log.error("add() has changed to only accept one record, no lists") + if record is THREAD_STOP: + return self.extend([record]) def add_property(self, name, details): @@ -395,7 +364,7 @@ class Index(Features): for n in jx.reverse(split_field(name)): if n == NESTED_TYPE: details = {"properties": {n: set_default(details, {"type": "nested", "dynamic": True})}} - elif n.startswith(TYPE_PREFIX): + elif n.startswith(TYPE_PREFIX) or details.get('type', 'object') in ES_PRIMITIVE_TYPES: details = {"properties": {n: details}} else: details = {"properties": {n: set_default(details, {"type": "object", "dynamic": True})}} @@ -417,21 +386,9 @@ class Index(Features): if seconds <= 0: interval = -1 else: - interval = text_type(seconds) + "s" + interval = text(int(seconds)) + "s" - if self.cluster.version.startswith("0.90."): - response = self.cluster.put( - "/" + self.settings.index + "/_settings", - data='{"index":{"refresh_interval":' + value2json(interval) + '}}', - **kwargs - ) - - result = json2value(utf82unicode(response.all_content)) - if not result.ok: - Log.error("Can not set refresh interval ({{error}})", { - "error": utf82unicode(response.all_content) - }) - elif self.cluster.version.startswith(("1.4.", "1.5.", "1.6.", "1.7.", "5.", "6.")): + if self.cluster.version.startswith(("1.4.", "1.5.", "1.6.", "1.7.", "5.", "6.")): result = self.cluster.put( "/" + self.settings.index + "/_settings", data={"index": {"refresh_interval": interval}}, @@ -445,18 +402,15 @@ class Index(Features): else: Log.error("Do not know how to handle ES version {{version}}", version=self.cluster.version) - def search(self, query, timeout=None, retry=None): + def search(self, query, timeout=None, retry=None, scroll=None): query = wrap(query) try: - if self.debug: - if len(query.facets.keys()) > 20: - show_query = query.copy() - show_query.facets = {k: "..." for k in query.facets.keys()} - else: - show_query = query - Log.note("Query:\n{{query|indent}}", query=show_query) + suffix = "/_search?scroll=" + scroll if scroll else "/_search" + url = self.path + suffix + + self.debug and Log.note("Query: {{url}}\n{{query|indent}}", url=url, query=query) return self.cluster.post( - self.path + "/_search", + url, data=query, timeout=coalesce(timeout, self.settings.timeout), retry=retry @@ -469,7 +423,11 @@ class Index(Features): cause=e ) + def threaded_queue(self, batch_size=None, max_size=None, period=None, silent=False): + """ + USE THIS TO AVOID WAITING + """ def errors(e, _buffer): # HANDLE ERRORS FROM extend() if e.cause.cause: @@ -507,7 +465,8 @@ HOPELESS = [ "400 MapperParsingException", "400 RoutingMissingException", "500 IllegalArgumentException[cannot change DocValues type", - "JsonParseException" + "JsonParseException", + " as object, but found a concrete value" ] known_clusters = {} # MAP FROM (host, port) PAIR TO CLUSTER INSTANCE @@ -517,7 +476,7 @@ class Cluster(object): @override def __new__(cls, host, port=9200, kwargs=None): - if not Math.is_integer(port): + if not is_integer(port): Log.error("port must be integer") cluster = known_clusters.get((host, int(port))) if cluster: @@ -545,6 +504,13 @@ class Cluster(object): self.debug = debug self._version = None self.url = URL(host, port=port) + self.lang = None + self.known_indices = {} + if self.version.startswith("6."): + from jx_elasticsearch.es52.expressions import ES52 + self.lang = ES52 + else: + Log.error("Not a know version: {{version}}", version=self.version) @override def get_or_create_index( @@ -555,6 +521,7 @@ class Cluster(object): limit_replicas=None, read_only=False, typed=None, + refresh_interval=None, kwargs=None ): if kwargs.tjson != None: @@ -572,23 +539,42 @@ class Cluster(object): index = kwargs.index meta = self.get_metadata() - type, about = _get_best_type_from_mapping(meta.indices[index].mappings) + type, about = _get_best_type_from_mapping(meta.indices[literal_field(index)].mappings) if typed == None: typed = True - columns = parse_properties(index, ".", about.properties) + columns = parse_properties(index, ".", ROOT_PATH, about.properties) if len(columns) > 0: typed = any( - c.names["."].startswith(TYPE_PREFIX) or - c.names["."].find("." + TYPE_PREFIX) != -1 + c.name.startswith(TYPE_PREFIX) or + c.name.find("." + TYPE_PREFIX) != -1 for c in columns ) kwargs.typed = typed - return Index(kwargs=kwargs, cluster=self) + return self._new_handle_to_index(kwargs) + + def _new_handle_to_index(self, kwargs): + key = (kwargs.index, kwargs.typed, kwargs.read_only) + known_index = self.known_indices.get(key) + if not known_index: + known_index = Index(kwargs=kwargs, cluster=self) + self.known_indices[key] = known_index + + def set_refresh(please_stop): + try: + known_index.set_refresh_interval(seconds=Duration(kwargs.refresh_interval).seconds) + except Exception as e: + Log.warning("could not set refresh interval for {{index}}", index=known_index.settings.index, cause=e) + if kwargs.refresh_interval: + Thread.run("setting refresh interval", set_refresh, parent_thread=MAIN_THREAD).release() + else: + pass + return known_index + @override - def get_index(self, index, type, alias=None, typed=None, read_only=True, kwargs=None): + def get_index(self, index, alias=None, typed=None, read_only=True, kwargs=None): """ TESTS THAT THE INDEX EXISTS BEFORE RETURNING A HANDLE """ @@ -605,7 +591,7 @@ class Cluster(object): kwargs.index = match.index else: Log.error("Can not find index {{index_name}}", index_name=kwargs.index) - return Index(kwargs=kwargs, cluster=self) + return self._new_handle_to_index(kwargs) else: # GET BEST MATCH, INCLUDING PROTOTYPE best = self.get_best_matching_index(index, alias) @@ -619,7 +605,7 @@ class Cluster(object): kwargs.alias = kwargs.index kwargs.index = best.index - return Index(kwargs=kwargs, cluster=self) + return self._new_handle_to_index(kwargs) def get_alias(self, alias): """ @@ -631,8 +617,8 @@ class Cluster(object): settings = self.settings.copy() settings.alias = alias settings.index = alias - return Index(read_only=True, kwargs=settings, cluster=self) - Log.error("Can not find any index with alias {{alias_name}}", alias_name= alias) + self._new_handle_to_index(set_default({"read_only": True}, settings)) + Log.error("Can not find any index with alias {{alias_name}}", alias_name=alias) def get_canonical_index(self, alias): """ @@ -658,10 +644,10 @@ class Cluster(object): def get_best_matching_index(self, index, alias=None): indexes = jx.sort( [ - ai_pair - for pattern in [re.escape(index) + r'\d{8}_\d{6}'] - for ai_pair in self.get_aliases() - for a, i in [(ai_pair.alias, ai_pair.index)] + p + for pattern in [re.escape(index) + SUFFIX_PATTERN] + for p in self.get_aliases() + for i, a in [(p.index, p.alias)] if (a == index and alias == None) or (re.match(pattern, i) and alias == None) or (i == index and (alias == None or a == None or a == alias)) @@ -676,9 +662,9 @@ class Cluster(object): ALIAS YET BECAUSE INCOMPLETE """ output = sort([ - a.index - for a in self.get_aliases() - if re.match(re.escape(alias) + "\\d{8}_\\d{6}", a.index) and not a.alias + p.index + for p in self.get_aliases() + if re.match(re.escape(alias) + "\\d{8}_\\d{6}", p.index) and not p.alias ]) return output @@ -695,7 +681,6 @@ class Cluster(object): if re.match(re.escape(prefix) + "\\d{8}_\\d{6}", a.index) and a.index != name: self.delete_index(a.index) - @override def create_index( self, @@ -712,23 +697,28 @@ class Cluster(object): if kwargs.tjson != None: Log.error("used `typed` parameter, not `tjson`") if not alias: - alias = kwargs.alias = kwargs.index - index = kwargs.index = proto_name(alias, create_timestamp) + requested_name = kwargs.index - if kwargs.alias == index: - Log.error("Expecting index name to conform to pattern") + index = kwargs.index = proto_name(requested_name, create_timestamp) + if requested_name == index: + kwargs.alias = None + else: + kwargs.alias = requested_name + + if not re.match('.*' + SUFFIX_PATTERN, index): + Log.error("Expecting index name to conform to pattern {{pattern}}", pattern=INDEX_DATE_FORMAT) if kwargs.schema_file: Log.error('schema_file attribute not supported. Use {"$ref":} instead') if schema == None: Log.error("Expecting a schema") - elif isinstance(schema, text_type): + elif is_text(schema): Log.error("Expecting a JSON schema") + else: + schema = wrap(schema) for k, m in items(schema.mappings): - m.date_detection = False # DISABLE DATE DETECTION - if typed: m = schema.mappings[k] = wrap(add_typed_annotations(m)) @@ -740,6 +730,7 @@ class Cluster(object): if self.version.startswith("6."): m.dynamic_templates = [t for t in m.dynamic_templates if "default_integer" not in t] if self.version.startswith("5."): + schema.settings.index.max_result_window = None # NOT ACCEPTED BY ES5 schema.settings.index.max_inner_result_window = None # NOT ACCEPTED BY ES5 schema = json2value(value2json(schema), leaves=True) elif self.version.startswith("6."): @@ -747,31 +738,31 @@ class Cluster(object): else: schema = retro_schema(json2value(value2json(schema), leaves=True)) - if limit_replicas: # DO NOT ASK FOR TOO MANY REPLICAS health = self.get("/_cluster/health", stream=False) if schema.settings.index.number_of_replicas >= health.number_of_nodes: if limit_replicas_warning: Log.warning( - "Reduced number of replicas: {{from}} requested, {{to}} realized", + "Reduced number of replicas for {{index}}: {{from}} requested, {{to}} realized", {"from": schema.settings.index.number_of_replicas}, - to=health.number_of_nodes - 1 + to=health.number_of_nodes - 1, + index=index ) schema.settings.index.number_of_replicas = health.number_of_nodes - 1 self.put( "/" + index, data=schema, - headers={text_type("Content-Type"): text_type("application/json")}, + headers={"Content-Type": mimetype.JSON}, stream=False ) # CONFIRM INDEX EXISTS while not Till(seconds=30): try: - metadata = self.get_metadata(force=True) - if index in metadata.indices: + metadata = self.get_metadata(after=Date.now()) + if index in metadata.indices.keys(): break Log.note("Waiting for index {{index}} to appear", index=index) except Exception as e: @@ -779,11 +770,10 @@ class Cluster(object): Till(seconds=1).wait() Log.alert("Made new index {{index|quote}}", index=index) - es = Index(kwargs=kwargs, cluster=self) - return es + return self._new_handle_to_index(kwargs) def delete_index(self, index_name): - if not isinstance(index_name, text_type): + if not is_text(index_name): Log.error("expecting an index name") self.debug and Log.note("Deleting index {{index}}", index=index_name) @@ -796,44 +786,56 @@ class Cluster(object): data={"actions": [{"remove": a} for a in aliases]} ) - url = self.settings.host + ":" + text_type(self.settings.port) + "/" + index_name + url = self.settings.host + ":" + text(self.settings.port) + "/" + index_name try: response = http.delete(url) if response.status_code != 200: Log.error("Expecting a 200, got {{code}}", code=response.status_code) - details = json2value(utf82unicode(response.content)) + else: + # making the metadata stale after deletion of the index + self.metatdata_last_updated = self.metatdata_last_updated - STALE_METADATA + + details = json2value(response.content.decode('utf8')) self.debug and Log.note("delete response {{response}}", response=details) return response except Exception as e: Log.error("Problem with call to {{url}}", url=url, cause=e) - def get_aliases(self): + def get_aliases(self, after=None): """ RETURN LIST OF {"alias":a, "index":i} PAIRS ALL INDEXES INCLUDED, EVEN IF NO ALIAS {"alias":Null} """ - for index, desc in self.get_metadata().indices.items(): + for index, desc in self.get_metadata(after=after).indices.items(): if not desc["aliases"]: - yield wrap({"index": index}) + yield Data(index=index) elif desc['aliases'][0] == index: Log.error("should not happen") else: - for a in desc["aliases"]: - yield wrap({"index": index, "alias": a}) + for alias in desc["aliases"]: + yield Data(index=index, alias=alias) + + def get_metadata(self, after=None): + now = Date.now() - def get_metadata(self, force=False): if not self.settings.explore_metadata: Log.error("Metadata exploration has been disabled") - if not force and self._metadata and Date.now() < self.metatdata_last_updated + STALE_METADATA: + if not after and self._metadata and now < self.metatdata_last_updated + STALE_METADATA: + return self._metadata + if after <= self.metatdata_last_updated: return self._metadata old_indices = self._metadata.indices response = self.get("/_cluster/state", retry={"times": 3}, timeout=30, stream=False) - now = self.metatdata_last_updated = Date.now() + + self.debug and Log.alert("Got metadata for {{cluster}}", cluster=self.url) + + self.metatdata_last_updated = now # ONLY UPDATE AFTER WE GET A RESPONSE + with self.metadata_locker: self._metadata = wrap(response.metadata) for new_index_name, new_meta in self._metadata.indices.items(): - old_index = old_indices[new_index_name] + old_index = old_indices[literal_field(new_index_name)] if not old_index: DEBUG_METADATA_UPDATE and Log.note("New index found {{index}} at {{time}}", index=new_index_name, time=now) self.index_last_updated[new_index_name] = now @@ -845,9 +847,9 @@ class Cluster(object): DEBUG_METADATA_UPDATE and Log.note("More columns found in {{index}} at {{time}}", index=new_index_name, time=now) self.index_last_updated[new_index_name] = now for old_index_name, old_meta in old_indices.items(): - new_index = self._metadata.indices[old_index_name] + new_index = self._metadata.indices[literal_field(old_index_name)] if not new_index: - DEBUG_METADATA_UPDATE and Log.note("Old index lost: {{index}} at {{time}}", index=new_index_name, time=now) + DEBUG_METADATA_UPDATE and Log.note("Old index lost: {{index}} at {{time}}", index=old_index_name, time=now) self.index_last_updated[old_index_name] = now self.info = wrap(self.get("/", stream=False)) self._version = self.info.version.number @@ -860,27 +862,27 @@ class Cluster(object): return self._version def post(self, path, **kwargs): - url = self.url / path # self.settings.host + ":" + text_type(self.settings.port) + path + url = self.url / path # self.settings.host + ":" + text(self.settings.port) + path + + data = kwargs.get(DATA_KEY) + if data == None: + pass + elif is_data(data): + data = kwargs[DATA_KEY] = value2json(data).encode('utf8') + elif is_text(data): + data = kwargs[DATA_KEY] = data.encode('utf8') + elif hasattr(data, str("__iter__")): + pass # ASSUME THIS IS AN ITERATOR OVER BYTES + else: + Log.error("data must be utf8 encoded string") try: heads = wrap(kwargs).headers heads["Accept-Encoding"] = "gzip,deflate" - heads["Content-Type"] = "application/json" - - data = kwargs.get(DATA_KEY) - if data == None: - pass - elif isinstance(data, Mapping): - data = kwargs[DATA_KEY] = unicode2utf8(value2json(data)) - elif isinstance(data, text_type): - data = kwargs[DATA_KEY] = unicode2utf8(data) - elif hasattr(data, str("__iter__")): - pass # ASSUME THIS IS AN ITERATOR OVER BYTES - else: - Log.error("data must be utf8 encoded string") + heads["Content-Type"] = mimetype.JSON if self.debug: - if isinstance(data, binary_type): + if is_binary(data): sample = kwargs.get(DATA_KEY, b"")[:300] Log.note("{{url}}:\n{{data|indent}}", url=url, data=sample) else: @@ -889,15 +891,17 @@ class Cluster(object): self.debug and Log.note("POST {{url}}", url=url) response = http.post(url, **kwargs) if response.status_code not in [200, 201]: - Log.error(text_type(response.reason) + ": " + strings.limit(response.content.decode("latin1"), 100 if self.debug else 10000)) - self.debug and Log.note("response: {{response}}", response=utf82unicode(response.content)[:130]) - details = json2value(utf82unicode(response.content)) + Log.error(text(response.reason) + ": " + strings.limit(response.content.decode("latin1"), 1000 if self.debug else 10000)) + self.debug and Log.note("response: {{response}}", response=(response.content.decode('utf8'))[:130]) + details = json2value(response.content.decode('utf8')) if details.error: - Log.error(convert.quote2string(details.error)) + Log.error(quote2string(details.error)) if details._shards.failed > 0: Log.error( - "Shard failures {{failures|indent}}", - failures=details._shards.failures.reason + "{{num}} orf {{total}} shard failures {{failures|indent}}", + failures=details._shards.failures.reason, + num=details._shards.failed, + total=details._shards.total ) return details except Exception as e: @@ -907,24 +911,24 @@ class Cluster(object): else: suggestion = "" - if kwargs.get(DATA_KEY): + if is_binary(data): Log.error( "Problem with call to {{url}}" + suggestion + "\n{{body|left(10000)}}", url=url, - body=strings.limit(utf82unicode(kwargs[DATA_KEY]), 100 if self.debug else 10000), + body=strings.limit(kwargs[DATA_KEY].decode('utf8'), 500 if self.debug else 10000), cause=e ) else: Log.error("Problem with call to {{url}}" + suggestion, url=url, cause=e) def delete(self, path, **kwargs): - url = self.settings.host + ":" + text_type(self.settings.port) + path + url = self.settings.host + ":" + text(self.settings.port) + path try: response = http.delete(url, **kwargs) if response.status_code not in [200]: - Log.error(response.reason+": "+response.all_content) - self.debug and Log.note("response: {{response}}", response=strings.limit(utf82unicode(response.all_content), 130)) - details = wrap(json2value(utf82unicode(response.all_content))) + Log.error(response.reason + ": " + response.all_content) + self.debug and Log.note("response: {{response}}", response=strings.limit(response.all_content.decode('utf8'), 500)) + details = json2value(response.all_content.decode('utf8')) if details.error: Log.error(details.error) return details @@ -932,14 +936,14 @@ class Cluster(object): Log.error("Problem with call to {{url}}", url=url, cause=e) def get(self, path, **kwargs): - url = self.settings.host + ":" + text_type(self.settings.port) + path + url = self.settings.host + ":" + text(self.settings.port) + path try: self.debug and Log.note("GET {{url}}", url=url) response = http.get(url, **kwargs) if response.status_code not in [200]: Log.error(response.reason + ": " + response.all_content) - self.debug and Log.note("response: {{response}}", response=strings.limit(utf82unicode(response.all_content), 130)) - details = wrap(json2value(utf82unicode(response.all_content))) + self.debug and Log.note("response: {{response}}", response=strings.limit(response.all_content.decode('utf8'), 500)) + details = json2value(response.all_content.decode('utf8')) if details.error: Log.error(details.error) return details @@ -947,35 +951,35 @@ class Cluster(object): Log.error("Problem with call to {{url}}", url=url, cause=e) def head(self, path, **kwargs): - url = self.settings.host + ":" + text_type(self.settings.port) + path + url = self.settings.host + ":" + text(self.settings.port) + path try: response = http.head(url, **kwargs) if response.status_code not in [200]: - Log.error(response.reason+": "+response.all_content) - self.debug and Log.note("response: {{response}}", response=strings.limit(utf82unicode(response.all_content), 130)) + Log.error(response.reason + ": " + response.all_content) + self.debug and Log.note("response: {{response}}", response=strings.limit(response.all_content.decode('utf8'), 500)) if response.all_content: - details = wrap(json2value(utf82unicode(response.all_content))) + details = json2value(response.all_content.decode('utf8')) if details.error: Log.error(details.error) return details else: return None # WE DO NOT EXPECT content WITH HEAD REQUEST except Exception as e: - Log.error("Problem with call to {{url}}", url= url, cause=e) + Log.error("Problem with call to {{url}}", url=url, cause=e) def put(self, path, **kwargs): - url = self.settings.host + ":" + text_type(self.settings.port) + path + url = self.settings.host + ":" + text(self.settings.port) + path heads = wrap(kwargs).headers - heads[text_type("Accept-Encoding")] = text_type("gzip,deflate") - heads[text_type("Content-Type")] = text_type("application/json") + heads[text("Accept-Encoding")] = text("gzip,deflate") + heads[text("Content-Type")] = mimetype.JSON data = kwargs.get(DATA_KEY) if data == None: pass - elif isinstance(data, Mapping): - kwargs[DATA_KEY] = unicode2utf8(convert.value2json(data)) - elif isinstance(kwargs[DATA_KEY], text_type): + elif is_data(data): + kwargs[DATA_KEY] = value2json(data).encode('utf8') + elif is_text(kwargs[DATA_KEY]): pass else: Log.error("data must be utf8 encoded string") @@ -986,12 +990,15 @@ class Cluster(object): try: response = http.put(url, **kwargs) if response.status_code not in [200]: - Log.error(response.reason + ": " + utf82unicode(response.content)) - self.debug and Log.note("response: {{response}}", response=utf82unicode(response.content)[0:300:]) + Log.error("{{reason}}: {{content|limit(3000)}}", reason=response.reason, content=response.content) + if not response.content: + return Null - details = json2value(utf82unicode(response.content)) + self.debug and Log.note("response: {{response}}", response=(response.content.decode('utf8'))[0:300:]) + + details = json2value(response.content.decode('utf8')) if details.error: - Log.error(convert.quote2string(details.error)) + Log.error(quote2string(details.error)) if details._shards.failed > 0: Log.error( "Shard failures {{failures|indent}}", @@ -1002,7 +1009,26 @@ class Cluster(object): Log.error("Problem with call to {{url}}", url=url, cause=e) +def export_schema(cluster, metadata): + aliases = set(a for i, settings in metadata.indices.items() for a in settings.aliases) + output = [] + + for a in aliases: + i = cluster.get_best_matching_index(a).index + output.append("## "+a+"\n") + output.append(strings.indent(value2json(metadata.indices[i].mappings.values()[0].properties, pretty=True), " ")) + output.append("\n") + + File("temp" + text(cluster.url.port) + ".md").write(output) + + def proto_name(prefix, timestamp=None): + suffix = re.search(SUFFIX_PATTERN, prefix) + if suffix: + start, stop = suffix.regs[0] + if stop == len(prefix): + return prefix + if not timestamp: timestamp = Date.now() else: @@ -1027,14 +1053,14 @@ def _scrub(r): try: if r == None: return None - elif isinstance(r, (text_type, binary_type)): + elif r.__class__ in (text, binary_type): if r == "": return None return r - elif Math.is_number(r): - return convert.value2number(r) - elif isinstance(r, Mapping): - if isinstance(r, Data): + elif is_number(r): + return value2number(r) + elif is_data(r): + if r.__class__ is Data: r = object.__getattribute__(r, SLOT) output = {} for k, v in r.items(): @@ -1064,21 +1090,27 @@ def _scrub(r): Log.warning("Can not scrub: {{json}}", json=r, cause=e) -class Alias(Features): +class Alias(object): + """ + REPRESENT MULTIPLE INDICES, ALL WITH THE SAME INDEX + """ + @override def __init__( self, alias, # NAME OF THE ALIAS + index=None, # NO LONGER USED type=None, # SCHEMA NAME, WILL HUNT FOR ONE IF None explore_metadata=True, # IF PROBING THE CLUSTER FOR METADATA IS ALLOWED debug=False, timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests) kwargs=None ): - self.debug = debug - self.debug and Log.alert("Elasticsearch debugging on {{index|quote}} is on", index= kwargs.index) if alias == None: - Log.error("Alias can not be None") + Log.error("alias can not be None") + if index != None: + Log.error("index is no longer accepted") + self.debug = debug self.settings = kwargs self.cluster = Cluster(kwargs) @@ -1086,24 +1118,26 @@ class Alias(Features): if not explore_metadata: Log.error("Alias() was given no `type` (aka schema) and not allowed to explore metadata. Do not know what to do now.") - if not self.settings.alias or self.settings.alias==self.settings.index: + if not self.settings.alias or self.settings.alias == self.settings.index: alias_list = self.cluster.get("/_alias") candidates = ( [(name, i) for name, i in alias_list.items() if self.settings.index in i.aliases.keys()] + - [(name, Null) for name, i in alias_list.items() if self.settings.index==name] + [(name, Null) for name, i in alias_list.items() if self.settings.index == name] ) full_name = jx.sort(candidates, 0).last()[0] if not full_name: Log.error("No index by name of {{name}}", name=self.settings.index) - mappings = self.cluster.get("/" + full_name + "/_mapping")[full_name] + settings = self.cluster.get("/" + full_name + "/_mapping")[full_name] else: - mappings = self.cluster.get("/"+self.settings.index+"/_mapping")[self.settings.index] + index = self.cluster.get_best_matching_index(alias).index + settings = self.cluster.get_metadata().indices[literal_field(index)] # FIND MAPPING WITH MOST PROPERTIES (AND ASSUME THAT IS THE CANONICAL TYPE) - type, props = _get_best_type_from_mapping(mappings.mappings) + type, props = _get_best_type_from_mapping(settings.mappings) if type == None: Log.error("Can not find schema type for index {{index}}", index=coalesce(self.settings.alias, self.settings.index)) + self.debug and Log.alert("Elasticsearch debugging on {{alias|quote}} is on", alias=alias) self.path = "/" + alias + "/" + type @property @@ -1113,28 +1147,27 @@ class Alias(Features): def get_snowflake(self, retry=True): if self.settings.explore_metadata: indices = self.cluster.get_metadata().indices - if not self.settings.alias or self.settings.alias==self.settings.index: - #PARTIALLY DEFINED settings + if not self.settings.alias or self.settings.alias == self.settings.index: + # PARTIALLY DEFINED settings candidates = [(name, i) for name, i in indices.items() if self.settings.index in i.aliases] # TODO: MERGE THE mappings OF ALL candidates, DO NOT JUST PICK THE LAST ONE index = "dummy value" - schema = wrap({"_routing": {}, "properties": {}}) + schema = wrap({"properties": {}}) for _, ind in jx.sort(candidates, {"value": 0, "sort": -1}): mapping = ind.mappings[self.settings.type] - set_default(schema._routing, mapping._routing) schema.properties = _merge_mapping(schema.properties, mapping.properties) else: - #FULLY DEFINED settings + # FULLY DEFINED settings index = indices[self.settings.index] schema = index.mappings[self.settings.type] if index == None and retry: - #TRY AGAIN, JUST IN CASE + # TRY AGAIN, JUST IN CASE self.cluster.info = None return self.get_schema(retry=False) - #TODO: REMOVE THIS BUG CORRECTION + # TODO: REMOVE THIS BUG CORRECTION if not schema and self.settings.type == "test_result": schema = index.mappings["test_results"] # DONE BUG CORRECTION @@ -1155,12 +1188,7 @@ class Alias(Features): def delete(self, filter): self.cluster.get_metadata() - if self.cluster.info.version.number.startswith("0.90"): - query = {"filtered": { - "query": {"match_all": {}}, - "filter": filter - }} - elif self.cluster.info.version.number.startswith("1."): + if self.cluster.info.version.number.startswith("1."): query = {"query": {"filtered": { "query": {"match_all": {}}, "filter": filter @@ -1195,18 +1223,15 @@ class Alias(Features): message=status._shards.failures[0].reason ) - def search(self, query, timeout=None): + def search(self, query, timeout=None, scroll=None): query = wrap(query) try: - if self.debug: - if len(query.facets.keys()) > 20: - show_query = query.copy() - show_query.facets = {k: "..." for k in query.facets.keys()} - else: - show_query = query - Log.note("Query {{path}}\n{{query|indent}}", path=self.path + "/_search", query=show_query) + suffix = "/_search?scroll=" + scroll if scroll else "/_search" + path = self.path + suffix + self.debug and Log.note("Query {{path}}\n{{query|indent}}", path=path, query=query) + return self.cluster.post( - self.path + "/_search", + path, data=query, timeout=coalesce(timeout, self.settings.timeout) ) @@ -1218,15 +1243,47 @@ class Alias(Features): cause=e ) + def scroll(self, scroll_id): + try: + # POST /_search/scroll + # { + # "scroll" : "1m", + # "scroll_id" : "DXF1ZXJ5QW5kRmV0Y2gBAAAAAAAAAD4WYm9laVYtZndUQlNsdDcwakFMNjU1QQ==" + # } + return self.cluster.post( + "_search/scroll", + data={"scroll": "5m", "scroll_id": scroll_id} + ) + except Exception as e: + Log.error( + "Problem with scroll (scroll_id={{scroll_id}})", + path= "_search/scroll", + scroll_id=scroll_id, + cause=e + ) + def refresh(self): self.cluster.post("/" + self.settings.alias + "/_refresh") -def parse_properties(parent_index_name, parent_name, esProperties): +def parse_properties(parent_index_name, parent_name, nested_path, esProperties): """ RETURN THE COLUMN DEFINITIONS IN THE GIVEN esProperties OBJECT """ columns = FlatList() + + if parent_name == '.': + # ROOT PROPERTY IS THE ELASTICSEARCH DOCUMENT (AN OBJECT) + columns.append(Column( + name='.', + es_index=parent_index_name, + es_column='.', + es_type="object", + jx_type=OBJECT, + last_updated=Date.now(), + nested_path=nested_path + )) + for name, property in esProperties.items(): index_name = parent_index_name column_name = concat_field(parent_name, name) @@ -1235,29 +1292,32 @@ def parse_properties(parent_index_name, parent_name, esProperties): if property.type == "nested" and property.properties: # NESTED TYPE IS A NEW TYPE DEFINITION # MARKUP CHILD COLUMNS WITH THE EXTRA DEPTH - self_columns = parse_properties(index_name, column_name, property.properties) - for c in self_columns: - c.nested_path = [column_name] + c.nested_path + self_columns = parse_properties(index_name, column_name, [column_name] + nested_path, property.properties) columns.extend(self_columns) columns.append(Column( + name=jx_name, es_index=index_name, es_column=column_name, - names={".": jx_name}, es_type="nested", - nested_path=ROOT_PATH + jx_type=NESTED, + multi=1001, + last_updated=Date.now(), + nested_path=nested_path )) continue if property.properties: - child_columns = parse_properties(index_name, column_name, property.properties) + child_columns = parse_properties(index_name, column_name, nested_path, property.properties) columns.extend(child_columns) columns.append(Column( - names={".": jx_name}, + name=jx_name, es_index=index_name, es_column=column_name, - nested_path=ROOT_PATH, - es_type="source" if property.enabled == False else "object" + es_type="source" if property.enabled == False else "object", + jx_type=OBJECT, + last_updated=Date.now(), + nested_path=nested_path )) if property.dynamic: @@ -1265,11 +1325,10 @@ def parse_properties(parent_index_name, parent_name, esProperties): if not property.type: continue - - cardinality = 0 if not property.store and not name != '_id' else None + cardinality = 0 if not (property.store or property.enabled) and name != '_id' else None if property.fields: - child_columns = parse_properties(index_name, column_name, property.fields) + child_columns = parse_properties(index_name, column_name, nested_path, property.fields) if cardinality is None: for cc in child_columns: cc.cardinality = None @@ -1277,30 +1336,36 @@ def parse_properties(parent_index_name, parent_name, esProperties): if property.type in es_type_to_json_type.keys(): columns.append(Column( + name=jx_name, es_index=index_name, es_column=column_name, - names={".": jx_name}, - nested_path=ROOT_PATH, + es_type=property.type, + jx_type=es_type_to_json_type[property.type], cardinality=cardinality, - es_type=property.type + last_updated=Date.now(), + nested_path=nested_path )) if property.index_name and name != property.index_name: columns.append(Column( + name=jx_name, es_index=index_name, es_column=column_name, - names={".": jx_name}, - nested_path=ROOT_PATH, + es_type=property.type, + jx_type=es_type_to_json_type[property.type], cardinality=0 if property.store else None, - es_type=property.type + last_updated=Date.now(), + nested_path=nested_path )) elif property.enabled == None or property.enabled == False: columns.append(Column( + name=jx_name, es_index=index_name, es_column=column_name, - names={".": jx_name}, - nested_path=ROOT_PATH, + es_type="source" if property.enabled == False else "object", + jx_type=OBJECT, cardinality=0 if property.store else None, - es_type="source" if property.enabled == False else "object" + last_updated=Date.now(), + nested_path=nested_path )) else: Log.warning("unknown type {{type}} for property {{path}}", type=property.type, path=parent_name) @@ -1327,13 +1392,14 @@ def _get_best_type_from_mapping(mapping): return best_type_name, best_mapping -def get_encoder(id_expression="_id"): - get_id = jx_expression_to_function(id_expression) +def get_encoder(id_info): + get_id = jx.get(id_info.field) + get_version = jx.get(id_info.version) def _encoder(r): id = r.get("id") r_value = r.get('value') - if isinstance(r_value, Mapping): + if is_data(r_value): r_id = get_id(r_value) r_value.pop('_id', None) if id == None: @@ -1343,21 +1409,21 @@ def get_encoder(id_expression="_id"): if id == None: id = random_id() + version = get_version(r_value) + if "json" in r: Log.error("can not handle pure json inserts anymore") json = r["json"] - elif r_value or isinstance(r_value, (dict, Data)): - json = convert.value2json(r_value) + elif r_value or is_data(r_value): + json = value2json(r_value) else: raise Log.error("Expecting every record given to have \"value\" or \"json\" property") - return {"id": id, "json": json} + return id, version, json return _encoder - - def random_id(): return Random.hex(40) @@ -1391,7 +1457,7 @@ def retro_schema(schema): :return: """ output = wrap({ - "mappings":{ + "mappings": { typename: { "dynamic_templates": [ retro_dynamic_template(*(t.items()[0])) @@ -1453,7 +1519,7 @@ def retro_properties(properties): def add_typed_annotations(meta): - if meta.type in ["text", "keyword", "string", "float", "double", "integer", "nested", "boolean"]: + if meta.type in ["text", "keyword", "string", "float", "double", "integer", "boolean"]: return { "type": "object", "dynamic": True, @@ -1484,7 +1550,8 @@ def diff_schema(A, B): :param B: elasticsearch properties :return: (name, properties) PAIRS WHERE name IS DOT-DELIMITED PATH """ - output =[] + output = [] + def _diff_schema(path, A, B): for k, av in A.items(): if k == "_id" and path == ".": @@ -1508,37 +1575,61 @@ def diff_schema(A, B): DEFAULT_DYNAMIC_TEMPLATES = wrap([ { "default_typed_boolean": { - "mapping": {"type": "boolean", "store": True}, + "mapping": { + "type": "boolean", + "store": True, + "norms": False + }, "match": BOOLEAN_TYPE } }, { "default_typed_number": { - "mapping": {"type": "double", "store": True}, + "mapping": { + "type": "double", + "store": True, + "norms": False + }, "match": NUMBER_TYPE } }, { "default_typed_string": { - "mapping": {"type": "keyword", "store": True}, + "mapping": { + "type": "keyword", + "store": True, + "norms": False + }, "match": STRING_TYPE } }, { "default_typed_exist": { - "mapping": {"type": "long", "store": True}, + "mapping": { + "type": "long", + "store": True, + "norms": False + }, "match": EXISTS_TYPE } }, { "default_typed_nested": { - "mapping": {"type": "nested", "store": True}, + "mapping": { + "type": "nested", + "store": True, + "norms": False + }, "match": NESTED_TYPE } }, { "default_string": { - "mapping": {"type": "keyword", "store": True}, + "mapping": { + "type": "keyword", + "store": True, + "norms": False + }, "match_mapping_type": "string" } }, @@ -1562,23 +1653,21 @@ DEFAULT_DYNAMIC_TEMPLATES = wrap([ } ]) - es_type_to_json_type = { - "text": "string", - "string": "string", - "keyword": "string", - "float": "number", - "double": "number", - "long": "number", - "integer": "number", - "object": "object", - "nested": "nested", + "text": STRING, + "string": STRING, + "keyword": STRING, + "float": NUMBER, + "double": NUMBER, + "long": NUMBER, + "integer": NUMBER, + "object": OBJECT, + "nested": NESTED, "source": "json", - "boolean": "boolean", - "exists": "exists" + "boolean": BOOLEAN, + "exists": EXISTS } - _merge_type = { "boolean": { "boolean": "boolean", @@ -1661,3 +1750,55 @@ _merge_type = { "nested": "nested" } } + + +class IterableBytes(object): + def __init__(self, encode, records): + """ + DO NOT SERIALIZE TO BYTES UNTIL REQUIRED + + :param encode: FUNCTION TO ENCODE INTO JSON TEXT + :param records: EXPECTING OBJECT WITH __iter__() + """ + self.encode = encode + self.records = records + + def __iter__(self): + for r in self.records: + if '_id' in r or 'value' not in r: # I MAKE THIS MISTAKE SO OFTEN, I NEED A CHECK + Log.error('Expecting {"id":id, "value":document} form. Not expecting _id') + id, version, json_text = self.encode(r) + + if DEBUG and not json_text.startswith('{'): + self.encode(r) + Log.error("string {{doc}} will not be accepted as a document", doc=json_text) + + if version: + yield value2json({"index": {"_id": id, "version": int(version), "version_type": "external_gte"}}).encode('utf8') + else: + yield ('{"index":{"_id": ' + value2json(id) + '}}').encode('utf8') + yield LF + yield json_text.encode('utf8') + yield LF + + +lists.sequence_types = lists.sequence_types + (IterableBytes,) + + +def quote2string(value): + with suppress_exception: + return ast.literal_eval(value) + + +def value2number(v): + try: + if isinstance(v, float) and round(v, 0) != v: + return v + # IF LOOKS LIKE AN INT, RETURN AN INT + return int(v) + except Exception: + try: + return float(v) + except Exception as e: + Log.error("Not a number ({{value}})", value= v, cause=e) + diff --git a/vendor/jx_elasticsearch/es09/aggop.py b/vendor/jx_elasticsearch/es09/aggop.py deleted file mode 100644 index b5abdb3..0000000 --- a/vendor/jx_elasticsearch/es09/aggop.py +++ /dev/null @@ -1,106 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from jx_base.expressions import Variable -from jx_base.queries import is_variable_name -from jx_elasticsearch import es09 -from jx_elasticsearch.es09.util import aggregates, fix_es_stats, build_es_query -from jx_elasticsearch import post as es_post -# from jx_elasticsearch.es52.expressions import Variable -from jx_python.containers.cube import Cube -from jx_python.expressions import jx_expression_to_function -from mo_collections.matrix import Matrix -from mo_dots import listwrap, unwrap, literal_field -from mo_math import AND - - -def is_aggop(query): - if not query.edges: - return True - return False - - -def es_aggop(es, mvel, query): - select = listwrap(query.select) - FromES = build_es_query(query) - - isSimple = AND(aggregates[s.aggregate] == "count" for s in select) - if isSimple: - return es_countop(es, query) # SIMPLE, USE TERMS FACET INSTEAD - - - value2facet = dict() # ONLY ONE FACET NEEDED PER - name2facet = dict() # MAP name TO FACET WITH STATS - - for s in select: - if s.value not in value2facet: - if isinstance(s.value, Variable): - unwrap(FromES.facets)[s.name] = { - "statistical": { - "field": s.value.var - }, - "facet_filter": query.where.to_esfilter() - } - else: - unwrap(FromES.facets)[s.name] = { - "statistical": { - "script": jx_expression_to_function(s.value) - }, - "facet_filter": query.where.to_es_filter() - } - value2facet[s.value] = s.name - name2facet[s.name] = value2facet[s.value] - - data = es_post(es, FromES, query.limit) - - matricies = {s.name: Matrix(value=fix_es_stats(data.facets[literal_field(s.name)])[aggregates[s.aggregate]]) for s in select} - cube = Cube(query.select, [], matricies) - cube.frum = query - return cube - - - -def es_countop(es, mvel, query): - """ - RETURN SINGLE COUNT - """ - select = listwrap(query.select) - FromES = build_es_query(query) - for s in select: - - if is_variable_name(s.value): - FromES.facets[s.name] = { - "terms": { - "field": s.value, - "size": query.limit, - }, - "facet_filter":{"exists":{"field":s.value}} - } - else: - # COMPLICATED value IS PROBABLY A SCRIPT, USE IT - FromES.facets[s.name] = { - "terms": { - "script_field": es09.expressions.compile_expression(s.value, query), - "size": 200000 - } - } - - data = es_post(es, FromES, query.limit) - - matricies = {} - for s in select: - matricies[s.name] = Matrix(value=data.hits.facets[s.name].total) - - cube = Cube(query.select, query.edges, matricies) - cube.frum = query - return cube diff --git a/vendor/jx_elasticsearch/es09/expressions.py b/vendor/jx_elasticsearch/es09/expressions.py deleted file mode 100644 index ba00a16..0000000 --- a/vendor/jx_elasticsearch/es09/expressions.py +++ /dev/null @@ -1,730 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from collections import Mapping - -from datetime import datetime -import re - -from jx_base.queries import keyword_pattern - -from mo_future import text_type -from pyLibrary import convert -from mo_collections import reverse -from mo_logs import Log -from mo_logs.strings import quote -from mo_math import Math -from mo_dots import split_field, Data, Null, join_field, coalesce, listwrap -from mo_times.durations import Duration - - -class _MVEL(object): - def __init__(self, fromData, isLean=False): - self.fromData = fromData - self.isLean = isLean - self.prefixMap = [] - self.functions = {} - - - def code(self, query): - """ - RETURN THE MVEL THAT WILL FILTER USING query.where AND TERM-PACK THE query.select CLAUSE - """ - selectList = listwrap(query.select) - fromPath = query.frum.name # FIRST NAME IS THE INDEX - sourceVar = "__sourcedoc__" - whereClause = query.where - - # PARSE THE fromPath - code = self.frum(fromPath, sourceVar, "__loop") - select = self.select(selectList, fromPath, "output", sourceVar) - - body = "var output = \"\";\n" + \ - code.replace( - "", - "if (" + _where(whereClause, lambda v: self._translate(v)) + "){\n" + - select.body + - "}\n" - ) + \ - "output\n" - - # ADD REFERENCED CONTEXT VARIABLES - context = self.getFrameVariables(body) - - func = UID() - predef = addFunctions(select.head+context+body).head - param = "_source" if body.find(sourceVar) else "" - - output = predef + \ - select.head + \ - context + \ - 'var ' + func + ' = function('+sourceVar+'){\n' + \ - body + \ - '};\n' + \ - func + '('+param+')\n' - - return Compiled(output) - - def frum(self, fromPath, sourceVar, loopVariablePrefix): - """ - indexName NAME USED TO REFER TO HIGH LEVEL DOCUMENT - loopVariablePrefix PREFIX FOR LOOP VARIABLES - """ - loopCode = "if ( != null){ for( : ){\n\n}}\n" - self.prefixMap = [] - code = "" - path = split_field(fromPath) - - # ADD LOCAL VARIABLES - columns = INDEX_CACHE[path[0]].columns - for i, c in enumerate(columns): - if c.name.find("\\.") >= 0: - self.prefixMap.insert(0, { - "path": c.name, - "variable": "get(" + sourceVar + ", \"" + c.name.replace("\\.", ".") + "\")" - }) - else: - self.prefixMap.insert(0, { - "path": c.name, - "variable": sourceVar + ".?" + c.name - }) - - # ADD LOOP VARIABLES - currPath = [] - # self.prefixMap.insert(0, {"path": path[0], "variable": path[0]}) - for i, step in enumerate(path[1::]): - loopVariable = loopVariablePrefix + str(i) - currPath.append(step) - pathi = ".".join(currPath) - shortPath = self._translate(pathi) - self.prefixMap.insert(0, {"path": pathi, "variable": loopVariable}) - - loop = loopCode.replace("", loopVariable).replace("", shortPath) - code = code.replace("", loop) - return code - - def _translate(self, variableName): - shortForm = variableName - for p in self.prefixMap: - prefix = p["path"] - if shortForm == prefix: - shortForm = p["variable"] - else: - shortForm = replacePrefix(shortForm, prefix + ".", p["variable"] + ".?") # ADD NULL CHECK - shortForm = replacePrefix(shortForm, prefix + "[", p["variable"] + "[") - return shortForm - - # CREATE A PIPE DELIMITED RESULT SET - def select(self, selectList, fromPath, varName, sourceVar): - path = split_field(fromPath) - is_deep = len(path) > 1 - heads = [] - list = [] - for s in selectList: - if is_deep: - if s.value and is_variable_name(s.value): - shortForm = self._translate(s.value) - list.append("Value2Pipe(" + shortForm + ")\n") - else: - Log.error("do not know how to handle yet") - else: - if s.value and is_variable_name(s.value): - list.append("Value2Pipe(getDocValue(" + value2MVEL(s.value) + "))\n") - elif s.value: - shortForm = self._translate(s.value) - list.append("Value2Pipe(" + shortForm + ")\n") - else: - code, decode = self.Parts2Term(s.domain) - heads.append(code.head) - list.append("Value2Pipe(" + code.body + ")\n") - - - if len(split_field(fromPath)) > 1: - output = 'if (' + varName + ' != "") ' + varName + '+="|";\n' + varName + '+=' + '+"|"+'.join(["Value2Pipe("+v+")\n" for v in list]) + ';\n' - else: - output = varName + ' = ' + '+"|"+'.join(["Value2Pipe("+v+")\n" for v in list]) + ';\n' - - return Data( - head="".join(heads), - body=output - ) - def Parts2Term(self, domain): - """ - TERMS ARE ALWAYS ESCAPED SO THEY CAN BE COMPOUNDED WITH PIPE (|) - - CONVERT AN ARRAY OF PARTS{name, esfilter} TO AN MVEL EXPRESSION - RETURN expression, function PAIR, WHERE - expression - MVEL EXPRESSION - function - TAKES RESULT OF expression AND RETURNS PART - """ - fields = domain.dimension.fields - - term = [] - if len(split_field(self.fromData.name)) == 1 and fields: - if isinstance(fields, Mapping): - # CONVERT UNORDERED FIELD DEFS - jx_fields, es_fields = transpose(*[(k, fields[k]) for k in sorted(fields.keys())]) - else: - jx_fields, es_fields = transpose(*[(i, e) for i, e in enumerate(fields)]) - - # NO LOOPS BECAUSE QUERY IS SHALLOW - # DOMAIN IS FROM A DIMENSION, USE IT'S FIELD DEFS TO PULL - if len(es_fields) == 1: - def fromTerm(term): - return domain.getPartByKey(term) - - return Data( - head="", - body='getDocValue('+quote(domain.dimension.fields[0])+')' - ), fromTerm - else: - def fromTerm(term): - terms = [convert.pipe2value(t) for t in convert.pipe2value(term).split("|")] - - candidate = dict(zip(jx_fields, terms)) - for p in domain.partitions: - for k, t in candidate.items(): - if p.value[k] != t: - break - else: - return p - if domain.type in ["uid", "default"]: - part = {"value": candidate} - domain.partitions.append(part) - return part - else: - return Null - - for f in es_fields: - term.append('Value2Pipe(getDocValue('+quote(f)+'))') - - return Data( - head="", - body='Value2Pipe('+('+"|"+'.join(term))+')' - ), fromTerm - else: - for v in domain.partitions: - term.append("if (" + _where(v.esfilter, lambda x: self._translate(x)) + ") " + value2MVEL(domain.getKey(v)) + "; else ") - term.append(value2MVEL(domain.getKey(domain.NULL))) - - func_name = "_temp"+UID() - return self.register_function("+\"|\"+".join(term)) - - def Parts2TermScript(self, domain): - code, decode = self.Parts2Term(domain) - func = addFunctions(code.head + code.body) - return func.head + code.head + code.body, decode - - def getFrameVariables(self, body): - contextVariables = [] - columns = self.fromData.columns - - parentVarNames = set() # ALL PARENTS OF VARIABLES WITH "." IN NAME - body = body.replace(".?", ".") - - for i, c in enumerate(columns): - j = body.find(c.name, 0) - while j >= 0: - s = j - j = body.find(c.name, s + 1) - - test0 = body[s - 1: s + len(c.name) + 1:] - test3 = body[s - 8: s + len(c.name):] - - if test0[:-1] == "\"" + c.name: - continue - if test3 == "_source." + c.name: - continue - - def defParent(name): - # DO NOT MAKE THE SAME PARENT TWICE - if name in parentVarNames: - return - parentVarNames.add(name) - - if len(split_field(name)) == 1: - contextVariables.append("Map " + name + " = new HashMap();\n") - else: - defParent(join_field(split_field(name)[0:-1])) - contextVariables.append(name + " = new HashMap();\n") - - body = body.replace(c.name, "-"*len(c.name)) - - if self.isLean or c.useSource: - if len(split_field(c.name)) > 1: - defParent(join_field(split_field(c.name)[0:-1])) - contextVariables.append(c.name + " = getSourceValue(\"" + c.name + "\");\n") - else: - contextVariables.append(c.name + " = _source[\"" + c.name + "\"];\n") - else: - if len(split_field(c.name)) > 1: - defParent(join_field(split_field(c.name)[0:-1])) - contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n") - else: - contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n") - break - - return "".join(contextVariables) - - def compile_expression(self, expression, constants=None): - # EXPAND EXPRESSION WITH ANY CONSTANTS - expression = setValues(expression, constants) - - fromPath = self.fromData.name # FIRST NAME IS THE INDEX - indexName = join_field(split_field(fromPath)[:1:]) - - context = self.getFrameVariables(expression) - if context == "": - return addFunctions(expression).head+expression - - func = UID() - code = addFunctions(context+expression) - output = code.head + \ - 'var ' + func + ' = function(' + indexName + '){\n' + \ - context + \ - expression + ";\n" + \ - '};\n' + \ - func + '(_source)\n' - - return Compiled(output) - - def register_function(self, code): - for n, c in self.functions.items(): - if c == code: - break - else: - n = "_temp" + UID() - self.functions[n] = code - - return Data( - head='var ' + n + ' = function(){\n' + code + '\n};\n', - body=n + '()\n' - ) - - -class Compiled(object): - def __init__(self, code): - self.code=code - - def __str__(self): - return self.code - - def __data__(self): - return self.code - - - - -__UID__ = 1000 - - -def UID(): - output = "_" + str(__UID__) - globals()["__UID__"] += 1 - return output - - -def setValues(expression, constants): - if not constants: - return expression - - constants = constants.copy() - - # EXPAND ALL CONSTANTS TO PRIMITIVE VALUES (MVEL CAN ONLY ACCEPT PRIMITIVE VALUES) - for c in constants: - value = c.value - n = c.name - if len(split_field(n)) >= 3: - continue # DO NOT GO TOO DEEP - if isinstance(value, list): - continue # DO NOT MESS WITH ARRAYS - - if isinstance(value, Mapping): - for k, v in value.items(): - constants.append({"name": n + "." + k, "value": v}) - - for c in reverse(constants):# REVERSE ORDER, SO LONGER NAMES ARE TESTED FIRST - s = 0 - while True: - s = expression.find(c.name, s) - if s == -1: - break - if re.match(r"\w", expression[s - 1]): - break - if re.match(r"\w", expression[s + len(c.name)]): - break - - v = value2MVEL(c.value) - expression = expression[:s:] + "" + v + expression[:s + len(c.name):] - - return expression - - -def unpack_terms(facet, selects): - # INTERPRET THE TERM-PACKED ES RESULTS AND RETURN DATA CUBE - # ASSUME THE .term IS JSON OBJECT WITH ARRAY OF RESULT OBJECTS - mod = len(selects) - output = [] - for t in facet.terms: - if t.term == "": - continue # NO DATA - value = [] - for i, v in enumerate(t.term.split("|")): - value.append(convert.pipe2value(v)) - if ((i + 1) % mod) == 0: - value.append(t.count) - output.append(value) - value = [] - - return output - - -# PASS esFilter SIMPLIFIED ElasticSearch FILTER OBJECT -# RETURN MVEL EXPRESSION -def _where(esFilter, _translate): - if not esFilter or esFilter is True: - return "true" - - keys = esFilter.keys() - if len(keys) != 1: - Log.error("Expecting only one filter aggregate") - - op = keys[0] - if op == "and": - list = esFilter[op] - if not (list): - return "true" - if len(list) == 1: - return _where(list[0], _translate) - output = "(" + " && ".join(_where(l, _translate) for l in list) + ")" - return output - elif op == "or": - list = esFilter[op] - if not list: - return "false" - if len(list) == 1: - return _where(list[0], _translate) - output = "(" + " || ".join(_where(l, _translate) for l in list) + ")" - return output - elif op == "not": - return "!(" + _where(esFilter[op, _translate]) + ")" - elif op == "term": - pair = esFilter[op] - if len(pair.keys()) == 1: - return [_translate(k) + "==" + value2MVEL(v) for k, v in pair.items()][0] - else: - return "(" + " && ".join(_translate(k) + "==" + value2MVEL(v) for k, v in pair.items()) + ")" - elif op == "terms": - output = [] - for variableName, valueList in esFilter[op].items(): - if not valueList: - Log.error("Expecting something in 'terms' array") - if len(valueList) == 1: - output.append(_translate(variableName) + "==" + value2MVEL(valueList[0])) - else: - output.append("(" + " || ".join(_translate(variableName) + "==" + value2MVEL(v) for v in valueList) + ")") - return " && ".join(output) - elif op == "exists": - # "exists":{"field":"myField"} - pair = esFilter[op] - variableName = pair.field - return "(" + _translate(variableName) + "!=null)" - elif op == "missing": - fieldName = _translate(esFilter[op].field) - testExistence = coalesce(esFilter[op].existence, True) - testNull = coalesce(esFilter[op].null_value, True) - - output = [] - if testExistence and not testNull: - output.append("(" + fieldName.replace(".?", ".") + " == empty)") # REMOVE THE .? SO WE REFER TO THE FIELD, NOT GET THE VALUE - if testNull: - output.append("(" + fieldName + "==null)") - return " || ".join(output) - elif op == "range": - pair = esFilter[op] - ranges = [] - - for variableName, r in pair.items(): - if r.gte: - ranges.append(value2MVEL(r.gte) + "<=" + _translate(variableName)) - elif r.gt: - ranges.append(value2MVEL(r.gt) + "<" + _translate(variableName)) - elif r["from"]: - if r.include_lower == None or r.include_lower: - ranges.append(value2MVEL(r["from"]) + "<=" + _translate(variableName)) - else: - ranges.append(value2MVEL(r["from"]) + "<" + _translate(variableName)) - - if r.lte: - ranges.append(value2MVEL(r.lte) + ">=" + _translate(variableName)) - elif r.lt: - ranges.append(value2MVEL(r.lt) + ">" + _translate(variableName)) - elif r["from"]: - if r.include_lower == None or r.include_lower: - ranges.append(value2MVEL(r["from"]) + ">=" + _translate(variableName)) - else: - ranges.append(value2MVEL(r["from"]) + ">" + _translate(variableName)) - - return "("+" && ".join(ranges)+")" - - elif op == "script": - script = esFilter[op].script - return _translate(script) - elif op == "prefix": - pair = esFilter[op] - variableName, value = pair.items()[0] - return _translate(variableName) + ".startsWith(" + quote(value) + ")" - elif op == "match_all": - return "true" - else: - Log.error("'" + op + "' is an unknown aggregate") - - return "" - - -VAR_CHAR = "abcdefghijklmnopqurstvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_.\"" - - - - -def value2MVEL(value): - """ - FROM PYTHON VALUE TO MVEL EQUIVALENT - """ - if isinstance(value, datetime): - return str(convert.datetime2milli(value)) + " /*" + value.format("yyNNNdd HHmmss") + "*/" # TIME - if isinstance(value, Duration): - return str(convert.timedelta2milli(value)) + " /*" + str(value) + "*/" # DURATION - - if Math.is_number(value): - return str(value) - return quote(value) - -# FROM PYTHON VALUE TO ES QUERY EQUIVALENT -def value2query(value): - if isinstance(value, datetime): - return convert.datetime2milli(value) - if isinstance(value, Duration): - return value.milli - - if Math.is_number(value): - return value - return quote(value) - - -def value2value(value): - """ - CONVERT FROM PYTHON VALUE TO ES EQUIVALENT - """ - if isinstance(value, datetime): - return convert.datetime2milli(value) - if isinstance(value, Duration): - return value.milli # DURATION - return value - - - - - - - -def addFunctions(mvel): - """ - PREPEND THE REQUIRED MVEL FUNCTIONS TO THE CODE - """ - isAdded = Data() # SOME FUNCTIONS DEPEND ON OTHERS - - head=[] - body=mvel - - keepAdding = True - while keepAdding: - keepAdding = False - for func_name, func_code in FUNCTIONS.items(): - if isAdded[func_name]: - continue - if mvel.find(func_name) == -1: - continue - keepAdding = True - isAdded[func_name] = func_code - head.append(func_code) - mvel = func_code + mvel - return Data( - head="".join(head), - body=body - ) - - -FUNCTIONS = { - "String2Quote": - "var String2Quote = function(str){\n" + - "if (!(str is String)){ str; }else{\n" + # LAST VALUE IS RETURNED. "return" STOPS EXECUTION COMPLETELY! - "" + value2MVEL("\"") + "+" + - "str.replace(" + value2MVEL("\\") + "," + value2MVEL("\\\\") + - ").replace(" + value2MVEL("\"") + "," + value2MVEL("\\\"") + - ").replace(" + value2MVEL("\'") + "," + value2MVEL("\\\'") + ")+" + - value2MVEL("\"") + ";\n" + - "}};\n", - - "Value2Pipe": - 'var Value2Pipe = function(value){\n' + # SPACES ARE IMPORTANT BETWEEN "=". - "if (value==null){ \"0\" }else " + - "if (value is ArrayList || value is org.elasticsearch.common.mvel2.util.FastList){" + - "var out = \"\";\n" + - "foreach (v : value) out = (out==\"\") ? v : out + \"|\" + Value2Pipe(v);\n" + - "'a'+Value2Pipe(out);\n" + - "}else \n" + - "if (value is Long || value is Integer || value is Double){ 'n'+value; }else \n" + - "if (!(value is String)){ 's'+value.getClass().getName(); }else \n" + - '"s"+value.replace("\\\\", "\\\\\\\\").replace("|", "\\\\p");' + # CAN NOT value TO MAKE NUMBER A STRING (OR EVEN TO PREPEND A STRING!) - "};\n", - - # "replaceAll": - # "var replaceAll = function(output, find, replace){\n" + - # "if (output.length()==0) return output;\n"+ - # "s = output.indexOf(find, 0);\n" + - # "while(s>=0){\n" + - # "output=output.replace(find, replace);\n" + - # "s=s-find.length()+replace.length();\n" + - # "s = output.indexOf(find, s);\n" + - # "}\n"+ - # "output;\n"+ - # '};\n', - - "floorDay": - "var floorDay = function(value){ Math.floor(value/86400000))*86400000;};\n", - - "floorInterval": - "var floorInterval = function(value, interval){ Math.floor((double)value/(double)interval)*interval;};\n", - - "maximum": # JUST BECAUSE MVEL'S MAX ONLY USES MAX(int, int). G*DDA*NIT! - "var maximum = function(a, b){if (a==null) b; else if (b==null) a; else if (a>b) a; else b;\n};\n", - - "minimum": # JUST BECAUSE MVEL'S MAX ONLY USES MAX(int, int). G*DDA*NIT! - "var minimum = function(a, b){if (a==null) b; else if (b==null) a; else if (a0;\n" + - "for (v : value.toCharArray()){\n" + - "if (\"0123456789\".indexOf(v)==-1) isNum = false;\n" + - "};\n" + - "isNum;\n" + - "};\n", - - "alpha2zero": - "var alpha2zero = function(value){\n" + - "var output = 0;\n" + - "if (isNumeric(value)) output = value-0;\n" + - "return output;" + - "};\n", - - # KANBAN SOFTWARE - # CAN SEE QUEUE BLOCKAGES AND SEE SINGLE BLOCKERS - - - "concat": - "var concat = function(array){\n" + - "if (array==null) \"\"; else {\n" + - "var output = \"\";\n" + - "for (v : array){ output = output+\"|\"+v+\"|\"; };\n" + - "output;\n" + - "}};\n", - - # "contains": - # "var contains = function(array, value){\n"+ - # "if (array==null) false; else {\n"+ - # "var good = false;\n"+ - # "for (v : array){ if (v==value) good=true; };\n"+ - # "good;\n"+ - # "}};\n", - - "getFlagValue": # SPECIFICALLY FOR cf_* FLAGS: CONCATENATE THE ATTRIBUTE NAME WITH ATTRIBUTE VALUE, IF EXISTS - "var getFlagValue = function(name){\n" + - "if (_source[name]!=null)" + - "\" \"+name+_source[name];\n" + - "else \n" + - "\"\";\n" + - "};\n", - - "getDocValue": - "var getDocValue = function(name){\n" + - "var out = [];\n" + - "var v = doc[name];\n" + - # "if (v is org.elasticsearch.common.mvel2.ast.Function) v = v();=n" + - "if (v==null || v.value==null) { null; } else\n" + - "if (v.values.size()<=1){ v.value; } else\n" + # ES MAKES NO DISTINCTION BETWEEN v or [v], SO NEITHER DO I - "{for(k : v.values) out.add(k); out;}" + - "};\n", - - "getSourceValue": - "var getSourceValue = function(name){\n" + - "var out = [];\n" + - "var v = _source[name];\n" + - # "if (v is org.elasticsearch.common.mvel2.ast.Function) v = v();=n" + - "if (v==null) { null; } else\n" + - "if (v[\"values\"]==null || v.values.size()<=1){ v.value; } else {\n" + # ES MAKES NO DISTINCTION BETWEEN v or [v], SO NEITHER DO I - "for(k : v) out.add(k); out;\n" + # .size() MUST BE USED INSTEAD OF .length, THE LATTER WILL CRASH IF JITTED (https://github.com/elasticsearch/elasticsearch/issues/3094) - "}};\n", - - "getDocArray": - "var getDocArray = function(name){\n" + - "var out = [];\n" + - "var v = doc[name];\n" + - "if (v!=null && v.value!=null) for(k : v.values) out.add(k);" + - "out;" + - "};\n", - - - "milli2Month": - "var milli2Month = function(value, milliOffset){\n" + - "g=new java.util.GregorianCalendar(new java.util.SimpleTimeZone(0, \"GMT\"));\n" + - "g.setTimeInMillis(value);\n" + - "g.add(java.util.GregorianCalendar.MILLISECOND, -milliOffset);\n" + - "m = g.get(java.util.GregorianCalendar.MONTH);\n" + - "output = \"\"+g.get(java.util.GregorianCalendar.YEAR)+(m>9?\"\":\"0\")+m;\n" + - "output;\n" + - "};\n", - - "between": - "var between = function(value, prefix, suffix){\n" + - "if (value==null){ null; }else{\n" + - "var start = value.indexOf(prefix, 0);\n" + - "if (start==-1){ null; }else{\n" + - "var end = value.indexOf(suffix, start+prefix.length());\n" + - "if (end==-1){ null; }else{\n" + - "value.substring(start+prefix.length(), end);\n" + - "}}}\n" + - "};\n" -} - - -def replacePrefix(value, prefix, new_prefix): - try: - if value.startswith(prefix): - return new_prefix+value[len(prefix)::] - return value - except Exception as e: - Log.error("can not replace prefix", e) diff --git a/vendor/jx_elasticsearch/es09/setop.py b/vendor/jx_elasticsearch/es09/setop.py deleted file mode 100644 index 55d0116..0000000 --- a/vendor/jx_elasticsearch/es09/setop.py +++ /dev/null @@ -1,248 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from collections import Mapping - -from jx_base import domains -from jx_base.expressions import TRUE, jx_expression, Variable, LeavesOp -from jx_base.queries import is_variable_name -from jx_elasticsearch import es09 -from jx_elasticsearch.es09.expressions import unpack_terms -from jx_elasticsearch.es09.util import aggregates -from jx_elasticsearch import post as es_post -from jx_python.containers.cube import Cube -from mo_collections.matrix import Matrix -from mo_dots import coalesce, split_field, Data, wrap -from mo_dots import listwrap, unwrap -from mo_dots.lists import FlatList -from mo_logs import Log -from mo_math import AND, SUM, OR - - -def is_fieldop(query): - # THESE SMOOTH EDGES REQUIRE ALL DATA (SETOP) - - select = listwrap(query.select) - if not query.edges: - isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT - isSimple = AND(s.value != None and (s.value == "*" or is_variable_name(s.value)) for s in select) - noAgg = AND(s.aggregate == "none" for s in select) - - if not isDeep and isSimple and noAgg: - return True - else: - isSmooth = AND((e.domain.type in domains.ALGEBRAIC and e.domain.interval == "none") for e in query.edges) - if isSmooth: - return True - - return False - - -def es_fieldop(es, query): - FromES = es09.util.build_es_query(query) - select = listwrap(query.select) - FromES.query = { - "bool": { - "query": { - "match_all": {} - }, - "filter": jx_expression(query.where).to_esfilter() - } - } - FromES.size = coalesce(query.limit, 200000) - FromES.fields = FlatList() - for s in select.value: - if s == "*": - FromES.fields = None - elif isinstance(s, list): - FromES.fields.extend(s) - elif isinstance(s, Mapping): - FromES.fields.extend(s.values()) - else: - FromES.fields.append(s) - FromES.sort = [{s.field: "asc" if s.sort >= 0 else "desc"} for s in query.sort] - - data = es_post(es, FromES, query.limit) - - T = data.hits.hits - matricies = {} - for s in select: - if s.value == "*": - matricies[s.name] = Matrix.wrap([t._source for t in T]) - elif isinstance(s.value, Mapping): - # for k, v in s.value.items(): - # matricies[join_field(split_field(s.name)+[k])] = Matrix.wrap([unwrap(t.fields)[v] for t in T]) - matricies[s.name] = Matrix.wrap([{k: unwrap(t.fields).get(v, None) for k, v in s.value.items()}for t in T]) - elif isinstance(s.value, list): - matricies[s.name] = Matrix.wrap([tuple(unwrap(t.fields).get(ss, None) for ss in s.value) for t in T]) - elif not s.value: - matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T]) - else: - try: - matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T]) - except Exception as e: - Log.error("", e) - - cube = Cube(query.select, query.edges, matricies, frum=query) - cube.frum = query - return cube - - -def is_setop(query): - select = listwrap(query.select) - - if not query.edges: - isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT - simpleAgg = AND([s.aggregate in ("count", "none") for s in select]) # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT - - # NO EDGES IMPLIES SIMPLER QUERIES: EITHER A SET OPERATION, OR RETURN SINGLE AGGREGATE - if simpleAgg or isDeep: - return True - else: - isSmooth = AND((e.domain.type in domains.ALGEBRAIC and e.domain.interval == "none") for e in query.edges) - if isSmooth: - return True - - return False - - -def es_setop(es, mvel, query): - FromES = es09.util.build_es_query(query) - select = listwrap(query.select) - - isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT - isComplex = OR([s.value == None and s.aggregate not in ("count", "none") for s in select]) # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT - - if not isDeep and not isComplex: - if len(select) == 1 and isinstance(select[0].value, LeavesOp): - FromES = wrap({ - "query": {"bool": { - "query": {"match_all": {}}, - "filter": query.where.to_esfilter() - }}, - "sort": query.sort, - "size": 0 - }) - elif all(isinstance(v, Variable) for v in select.value): - FromES = wrap({ - "query": {"bool": { - "query": {"match_all": {}}, - "filter": query.where.to_esfilter() - }}, - "fields": select.value, - "sort": query.sort, - "size": coalesce(query.limit, 200000) - }) - elif not isDeep: - simple_query = query.copy() - simple_query.where = TRUE # THE FACET FILTER IS FASTER - FromES.facets.mvel = { - "terms": { - "script_field": mvel.code(simple_query), - "size": coalesce(simple_query.limit, 200000) - }, - "facet_filter": jx_expression(query.where).to_esfilter() - } - else: - FromES.facets.mvel = { - "terms": { - "script_field": mvel.code(query), - "size": coalesce(query.limit, 200000) - }, - "facet_filter": jx_expression(query.where).to_esfilter() - } - - data = es_post(es, FromES, query.limit) - - if len(select) == 1 and isinstance(select[0].value, LeavesOp): - # SPECIAL CASE FOR SINGLE COUNT - cube = wrap(data).hits.hits._source - elif isinstance(select[0].value, Variable): - # SPECIAL CASE FOR SINGLE TERM - cube = wrap(data).hits.hits.fields - else: - data_list = unpack_terms(data.facets.mvel, select) - if not data_list: - cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select}) - else: - output = transpose(*data_list) - cube = Cube(select, [], {s.name: Matrix(list=output[i]) for i, s in enumerate(select)}) - - return Data( - meta={"esquery": FromES}, - data=cube - ) - - -def is_deep(query): - select = listwrap(query.select) - if len(select) > 1: - return False - - if aggregates[select[0].aggregate] not in ("none", "count"): - return False - - if len(query.edges)<=1: - return False - - isDeep = len(split_field(query["from"].name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT - if not isDeep: - return False # BETTER TO USE TERM QUERY - - return True - - -def es_deepop(es, mvel, query): - FromES = es09.util.build_es_query(query) - - select = query.edges - - temp_query = query.copy() - temp_query.select = select - temp_query.edges = FlatList() - FromES.facets.mvel = { - "terms": { - "script_field": mvel.code(temp_query), - "size": query.limit - }, - "facet_filter": jx_expression(query.where).to_esfilter() - } - - data = es_post(es, FromES, query.limit) - - rows = unpack_terms(data.facets.mvel, query.edges) - terms = transpose(*rows) - - # NUMBER ALL EDGES FOR JSON EXPRESSION INDEXING - edges = query.edges - for f, e in enumerate(edges): - for r in terms[f]: - e.domain.getPartByKey(r) - - e.index = f - for p, part in enumerate(e.domain.partitions): - part.dataIndex = p - e.domain.NULL.dataIndex = len(e.domain.partitions) - - # MAKE CUBE - dims = [len(e.domain.partitions) for e in query.edges] - output = Matrix(*dims) - - # FILL CUBE - for r in rows: - term_coord = [e.domain.getPartByKey(r[i]).dataIndex for i, e in enumerate(edges)] - output[term_coord] = SUM(output[term_coord], r[-1]) - - cube = Cube(query.select, query.edges, {query.select.name: output}) - cube.frum = query - return cube diff --git a/vendor/jx_elasticsearch/es09/terms.py b/vendor/jx_elasticsearch/es09/terms.py deleted file mode 100644 index 32d03bf..0000000 --- a/vendor/jx_elasticsearch/es09/terms.py +++ /dev/null @@ -1,152 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from jx_elasticsearch.es09.util import aggregates, build_es_query, compileEdges2Term -from jx_elasticsearch import post as es_post -from jx_python import jx -from jx_python.containers.cube import Cube -from mo_collections.matrix import Matrix -from mo_dots import coalesce -from mo_dots import wrap, listwrap -from mo_dots.lists import FlatList -from mo_math import AND - - -def is_terms(query): - select = listwrap(query.select) - - isSimple = not query.select or AND(aggregates[s.aggregate] in ("none", "count") for s in select) - if isSimple: - return True - return False - - -def es_terms(es, mvel, query): - """ - RETURN LIST OF ALL EDGE QUERIES - - EVERY FACET IS NAMED , , ... WHERE ARE THE ELEMENT COORDINATES - WE TRY TO PACK DIMENSIONS INTO THE TERMS TO MINIMIZE THE CROSS-PRODUCT EXPLOSION - """ - if len(query.edges) == 2: - return _es_terms2(es, mvel, query) - - select = listwrap(query.select) - FromES = build_es_query(query) - packed_term = compileEdges2Term(mvel, query.edges, wrap([])) - for s in select: - FromES.facets[s.name] = { - "terms": { - "field": packed_term.field, - "script_field": packed_term.expression, - "size": coalesce(query.limit, 200000) - }, - "facet_filter": simplify_esfilter(query.where) - } - - term2Parts = packed_term.term2parts - - data = es_post(es, FromES, query.limit) - - # GETTING ALL PARTS WILL EXPAND THE EDGES' DOMAINS - # BUT HOW TO UNPACK IT FROM THE term FASTER IS UNKNOWN - for k, f in data.facets.items(): - for t in f.terms: - term2Parts(t.term) - - # NUMBER ALL EDGES FOR jx INDEXING - for f, e in enumerate(query.edges): - e.index = f - if e.domain.type in ["uid", "default"]: - # e.domain.partitions = jx.sort(e.domain.partitions, "value") - for p, part in enumerate(e.domain.partitions): - part.dataIndex = p - e.domain.NULL.dataIndex = len(e.domain.partitions) - - # MAKE CUBE - output = {} - dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges] - for s in select: - output[s.name] = Matrix(*dims) - - # FILL CUBE - # EXPECTING ONLY SELECT CLAUSE FACETS - for facetName, facet in data.facets.items(): - for term in facet.terms: - term_coord = term2Parts(term.term).dataIndex - for s in select: - try: - output[s.name][term_coord] = term[aggregates[s.aggregate]] - except Exception as e: - # USUALLY CAUSED BY output[s.name] NOT BEING BIG ENOUGH TO HANDLE NULL COUNTS - pass - cube = Cube(query.select, query.edges, output) - cube.query = query - return cube - - -def _es_terms2(es, mvel, query): - """ - WE ASSUME THERE ARE JUST TWO EDGES, AND EACH HAS A SIMPLE value - """ - - # REQUEST VALUES IN FIRST DIMENSION - q1 = query.copy() - q1.edges = query.edges[0:1:] - values1 = es_terms(es, mvel, q1).edges[0].domain.partitions.value - - select = listwrap(query.select) - FromES = build_es_query(query) - for s in select: - for i, v in enumerate(values1): - FromES.facets[s.name + "," + str(i)] = { - "terms": { - "field": query.edges[1].value, - "size": coalesce(query.limit, 200000) - }, - "facet_filter": simplify_esfilter({"and": [ - query.where, - {"term": {query.edges[0].value: v}} - ]}) - } - - data = es_post(es, FromES, query.limit) - - # UNION ALL TERMS FROM SECOND DIMENSION - values2 = set() - for k, f in data.facets.items(): - values2.update(f.terms.term) - values2 = jx.sort(values2) - term2index = {v: i for i, v in enumerate(values2)} - query.edges[1].domain.partitions = FlatList([{"name": v, "value": v} for v in values2]) - - # MAKE CUBE - output = {} - dims = [len(values1), len(values2)] - for s in select: - output[s.name] = Matrix(*dims) - - # FILL CUBE - # EXPECTING ONLY SELECT CLAUSE FACETS - for facetName, facet in data.facets.items(): - coord = facetName.split(",") - s = [s for s in select if s.name == coord[0]][0] - i1 = int(coord[1]) - for term in facet.terms: - i2 = term2index[term.term] - output[s.name][(i1, i2)] = term[aggregates[s.aggregate]] - - cube = Cube(query.select, query.edges, output) - cube.query = query - return cube - diff --git a/vendor/jx_elasticsearch/es09/terms_stats.py b/vendor/jx_elasticsearch/es09/terms_stats.py deleted file mode 100644 index b97a2e5..0000000 --- a/vendor/jx_elasticsearch/es09/terms_stats.py +++ /dev/null @@ -1,337 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from jx_base.queries import is_variable_name -from jx_elasticsearch.es09.expressions import UID -from jx_elasticsearch.es09.util import aggregates, build_es_query, compileEdges2Term -from jx_python import domains -from jx_python import es09 -from jx_python.containers.cube import Cube -from jx_python.expressions import simplify_esfilter -from mo_collections.matrix import Matrix -from mo_dots import literal_field, coalesce -from mo_dots import wrap, listwrap -from mo_dots.lists import FlatList -from mo_logs import Log -from mo_math import COUNT, PRODUCT - - -def is_terms_stats(query): - # ONLY ALLOWED ONE UNKNOWN DOMAIN - num_unknown = COUNT(1 for e in query.edges if e.domain.type not in domains.KNOWN) - - if num_unknown <= 1: - if query.sort: - Log.error("terms_stats can not be sorted") - - return True - return False - - -def es_terms_stats(esq, mvel, query): - select = listwrap(query.select) - facetEdges = [] # EDGES THAT WILL REQUIRE A FACET FOR EACH PART - termsEdges = FlatList() - specialEdge = None - special_index = -1 - - # A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME - # FIND THE specialEdge, IF ONE - for f, tedge in enumerate(query.edges): - if tedge.domain.type in domains.KNOWN: - for p, part in enumerate(tedge.domain.partitions): - part.dataIndex = p - - # FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY) - # OR IF WE ARE NOT SIMPLY COUNTING - # OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet) - # OR IF WE JUST WANT TO FORCE IT :) - # OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM - - facetEdges.append(tedge) - else: - if specialEdge: - Log.error("There is more than one open-ended edge: self can not be handled") - specialEdge = tedge - special_index = f - termsEdges.append(tedge) - - if not specialEdge: - # WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS - # THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE? - num_parts = 0 - special_index = -1 - for i, e in enumerate(facetEdges): - l = len(e.domain.partitions) - if ((e.value and is_variable_name(e.value)) or len(e.domain.dimension.fields) == 1) and l > num_parts: - num_parts = l - specialEdge = e - special_index = i - - facetEdges.pop(special_index) - termsEdges.append(specialEdge) - - total_facets = PRODUCT(len(f.domain.partitions) for f in facetEdges)*len(select) - if total_facets > 100: - # WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY: - counts = esq.query({ - "from": query.frum, - "select": {"aggregate": "count"}, - "edges": facetEdges, - "where": query.where, - "limit": query.limit - }) - - esFacets = [] - - def add_facet(value, parts, cube): - if value: - esFacets.append(parts) - - counts["count"].forall(add_facet) - - Log.note("{{theory_count}} theoretical combinations, {{real_count}} actual combos found", real_count= len(esFacets), theory_count=total_facets) - - if not esFacets: - # MAKE EMPTY CUBE - matricies = {} - dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges] - for s in select: - matricies[s.name] = Matrix(*dims) - cube = Cube(query.select, query.edges, matricies) - cube.frum = query - return cube - - else: - # GENERATE ALL COMBOS - esFacets = getAllEdges(facetEdges) - - calcTerm = compileEdges2Term(mvel, termsEdges, FlatList()) - term2parts = calcTerm.term2parts - - if len(esFacets) * len(select) > 1000: - Log.error("not implemented yet") # WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES - pass - - FromES = build_es_query(query) - - for s in select: - for parts in esFacets: - condition = FlatList() - constants = FlatList() - name = [literal_field(s.name)] - for f, fedge in enumerate(facetEdges): - name.append(str(parts[f].dataIndex)) - condition.append(buildCondition(mvel, fedge, parts[f])) - constants.append({"name": fedge.domain.name, "value": parts[f]}) - condition.append(query.where) - name = ",".join(name) - - FromES.facets[name] = { - "terms_stats": { - "key_field": calcTerm.field, - "value_field": s.value if is_variable_name(s.value) else None, - "value_script": mvel.compile_expression(s.value) if not is_variable_name(s.value) else None, - "size": coalesce(query.limit, 200000) - } - } - if condition: - FromES.facets[name].facet_filter = simplify_esfilter({"and": condition}) - - data = es_post(esq.es, FromES, query.limit) - - if specialEdge.domain.type not in domains.KNOWN: - # WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED - partitions = FlatList() - map = {} - for facetName, parts in data.facets.items(): - for stats in parts.terms: - if not map[stats]: - part = {"value": stats, "name": stats} - partitions.append(part) - map[stats] = part - - partitions.sort(specialEdge.domain.compare) - for p, part in enumerate(partitions): - part.dataIndex = p - - specialEdge.domain.map = map - specialEdge.domain.partitions = partitions - - # MAKE CUBE - matricies = {} - dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges] - for s in select: - matricies[s.name] = Matrix(*dims) - - name2agg = {s.name: aggregates[s.aggregate] for s in select} - - # FILL CUBE - for edgeName, parts in data.facets.items(): - temp = edgeName.split(",") - pre_coord = tuple(int(c) for c in temp[1:]) - sname = temp[0] - - for stats in parts.terms: - if specialEdge: - special = term2parts(stats.term)[0] - coord = pre_coord[:special_index]+(special.dataIndex, )+pre_coord[special_index:] - else: - coord = pre_coord - matricies[sname][coord] = stats[name2agg[sname]] - - cube = Cube(query.select, query.edges, matricies) - cube.frum = query - return cube - - -def register_script_field(FromES, code): - if not FromES.script_fields: - FromES.script_fields = {} - - # IF CODE IS IDENTICAL, THEN USE THE EXISTING SCRIPT - for n, c in FromES.script_fields.items(): - if c.script == code: - return n - - name = "script" + UID() - FromES.script_fields[name].script = code - return name - - -def getAllEdges(facetEdges): - if not facetEdges: - return [()] - return _getAllEdges(facetEdges, 0) - - -def _getAllEdges(facetEdges, edgeDepth): - """ - RETURN ALL PARTITION COMBINATIONS: A LIST OF ORDERED TUPLES - """ - if edgeDepth == len(facetEdges): - return [()] - edge = facetEdges[edgeDepth] - - deeper = _getAllEdges(facetEdges, edgeDepth + 1) - - output = FlatList() - partitions = edge.domain.partitions - for part in partitions: - for deep in deeper: - output.append((part,) + deep) - return output - - -def buildCondition(mvel, edge, partition): - """ - RETURN AN ES FILTER OBJECT - """ - output = {} - - if edge.domain.isFacet: - # MUST USE THIS' esFacet - condition = wrap(coalesce(partition.where, {"and": []})) - - if partition.min and partition.max and is_variable_name(edge.value): - condition["and"].append({ - "range": {edge.value: {"gte": partition.min, "lt": partition.max}} - }) - - # ES WILL FREAK OUT IF WE SEND {"not":{"and":x}} (OR SOMETHING LIKE THAT) - return simplify_esfilter(condition) - elif edge.range: - # THESE REALLY NEED FACETS TO PERFORM THE JOIN-TO-DOMAIN - # USE MVEL CODE - if edge.domain.type in domains.ALGEBRAIC: - output = {"and": []} - - if edge.range.mode and edge.range.mode == "inclusive": - # IF THE range AND THE partition OVERLAP, THEN MATCH IS MADE - if is_variable_name(edge.range.min): - output["and"].append({"range": {edge.range.min: {"lt": es09.expressions.value2value(partition.max)}}}) - else: - # WHOA!! SUPER SLOW!! - output["and"].append({"script": {"script": mvel.compile_expression( - edge.range.min + " < " + es09.expressions.value2MVEL(partition.max) - )}}) - - if is_variable_name(edge.range.max): - output["and"].append({"or": [ - {"missing": {"field": edge.range.max}}, - {"range": {edge.range.max, {"gt": es09.expressions.value2value(partition.min)}}} - ]}) - else: - # WHOA!! SUPER SLOW!! - output["and"].append({"script": {"script": mvel.compile_expression( - edge.range.max + " > " + es09.expressions.value2MVEL(partition.min))}}) - - else: - # SNAPSHOT - IF range INCLUDES partition.min, THEN MATCH IS MADE - if is_variable_name(edge.range.min): - output["and"].append({"range": {edge.range.min: {"lte": es09.expressions.value2value(partition.min)}}}) - else: - # WHOA!! SUPER SLOW!! - output["and"].append({"script": {"script": mvel.compile_expression( - edge.range.min + "<=" + es09.expressions.value2MVEL(partition.min) - )}}) - - if is_variable_name(edge.range.max): - output["and"].append({"or": [ - {"missing": {"field": edge.range.max}}, - {"range": {edge.range.max, {"gte": es09.expressions.value2value(partition.min)}}} - ]}) - else: - # WHOA!! SUPER SLOW!! - output["and"].append({"script": {"script": mvel.compile_expression( - es09.expressions.value2MVEL(partition.min) + " <= " + edge.range.max - )}}) - return output - else: - Log.error("Do not know how to handle range query on non-continuous domain") - - elif not edge.value: - # MUST USE THIS' esFacet, AND NOT(ALL THOSE ABOVE) - return partition.esfilter - elif is_variable_name(edge.value): - # USE FAST ES SYNTAX - if edge.domain.type in domains.ALGEBRAIC: - output.range = {} - output.range[edge.value] = {"gte": es09.expressions.value2query(partition.min), "lt": es09.expressions.value2query(partition.max)} - elif edge.domain.type == "set": - if partition.value: - if partition.value != edge.domain.getKey(partition): - Log.error("please ensure the key attribute of the domain matches the value attribute of all partitions, if only because we are now using the former") - # DEFAULT TO USING THE .value ATTRIBUTE, IF ONLY BECAUSE OF LEGACY REASONS - output.term = {edge.value: partition.value} - else: - output.term = {edge.value: edge.domain.getKey(partition)} - - elif edge.domain.type == "default": - output.term = dict() - output.term[edge.value] = partition.value - else: - Log.error("Edge \"" + edge.name + "\" is not supported") - - return output - else: - # USE MVEL CODE - if edge.domain.type in domains.ALGEBRAIC: - output.script = {"script": edge.value + ">=" + es09.expressions.value2MVEL(partition.min) + " and " + edge.value + "<" + es09.expressions.value2MVEL(partition.max)} - else: - output.script = {"script": "( " + edge.value + " ) ==" + es09.expressions.value2MVEL(partition.value)} - - code = es09.expressions.addFunctions(output.script.script) - output.script.script = code.head + code.body - return output - diff --git a/vendor/jx_elasticsearch/es09/util.py b/vendor/jx_elasticsearch/es09/util.py deleted file mode 100644 index 6d7c4c7..0000000 --- a/vendor/jx_elasticsearch/es09/util.py +++ /dev/null @@ -1,355 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from datetime import datetime - -from jx_base.queries import is_variable_name - -from mo_logs.strings import quote - -from mo_logs import Log, strings -from mo_dots import Data -from mo_dots import coalesce -from mo_dots import wrap -from mo_dots.lists import FlatList -from pyLibrary import convert -from mo_math import COUNT -from mo_math import Math -from mo_math import stats -from jx_base import domains -from jx_elasticsearch.es09.expressions import value2MVEL -from mo_times import durations - - -DEBUG = False - - -def build_es_query(query): - output = wrap({ - "query": {"match_all": {}}, - "from": 0, - "size": 100 if DEBUG else 0, - "sort": [], - "facets": { - } - }) - - if DEBUG: - # TO LIMIT RECORDS TO WHAT'S IN FACETS - output.query = { - "bool": { - "query": { - "match_all": {} - }, - "filter": query.where.to_esfilter() - } - } - - return output - - - - - -def compileTime2Term(edge): - """ - RETURN MVEL CODE THAT MAPS TIME AND DURATION DOMAINS DOWN TO AN INTEGER AND - AND THE JAVASCRIPT THAT WILL TURN THAT INTEGER BACK INTO A PARTITION (INCLUDING NULLS) - """ - if edge.esscript: - Log.error("edge script not supported yet") - - # IS THERE A LIMIT ON THE DOMAIN? - numPartitions = len(edge.domain.partitions) - value = edge.value - if is_variable_name(value): - value = "doc[\"" + value + "\"].value" - - nullTest = compileNullTest(edge) - ref = coalesce(edge.domain.min, edge.domain.max, datetime(2000, 1, 1)) - - if edge.domain.interval.month > 0: - offset = ref.subtract(ref.floorMonth(), durations.DAY).milli - if offset > durations.DAY.milli * 28: - offset = ref.subtract(ref.ceilingMonth(), durations.DAY).milli - partition2int = "milli2Month(" + value + ", " + value2MVEL(offset) + ")" - partition2int = "((" + nullTest + ") ? 0 : " + partition2int + ")" - - def int2Partition(value): - if Math.round(value) == 0: - return edge.domain.NULL - - d = datetime(str(value)[:4:], str(value)[-2:], 1) - d = d.addMilli(offset) - return edge.domain.getPartByKey(d) - else: - partition2int = "Math.floor((" + value + "-" + value2MVEL(ref) + ")/" + edge.domain.interval.milli + ")" - partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")" - - def int2Partition(value): - if Math.round(value) == numPartitions: - return edge.domain.NULL - return edge.domain.getPartByKey(ref.add(edge.domain.interval.multiply(value))) - - return Data(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition) - - -# RETURN MVEL CODE THAT MAPS DURATION DOMAINS DOWN TO AN INTEGER AND -# AND THE JAVASCRIPT THAT WILL TURN THAT INTEGER BACK INTO A PARTITION (INCLUDING NULLS) -def compileDuration2Term(edge): - if edge.esscript: - Log.error("edge script not supported yet") - - # IS THERE A LIMIT ON THE DOMAIN? - numPartitions = len(edge.domain.partitions) - value = edge.value - if is_variable_name(value): - value = "doc[\"" + value + "\"].value" - - ref = coalesce(edge.domain.min, edge.domain.max, durations.ZERO) - nullTest = compileNullTest(edge) - - ms = edge.domain.interval.milli - if edge.domain.interval.month > 0: - ms = durations.YEAR.milli / 12 * edge.domain.interval.month - - partition2int = "Math.floor((" + value + "-" + value2MVEL(ref) + ")/" + ms + ")" - partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")" - - def int2Partition(value): - if Math.round(value) == numPartitions: - return edge.domain.NULL - return edge.domain.getPartByKey(ref.add(edge.domain.interval.multiply(value))) - - return Data(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition) - - -# RETURN MVEL CODE THAT MAPS THE numeric DOMAIN DOWN TO AN INTEGER AND -# AND THE JAVASCRIPT THAT WILL TURN THAT INTEGER BACK INTO A PARTITION (INCLUDING NULLS) -def compileNumeric2Term(edge): - if edge.script: - Log.error("edge script not supported yet") - - if edge.domain.type != "numeric" and edge.domain.type != "count": - Log.error("can only translate numeric domains") - - numPartitions = len(edge.domain.partitions) - value = edge.value - if is_variable_name(value): - value = "doc[\"" + value + "\"].value" - - if not edge.domain.max: - if not edge.domain.min: - ref = 0 - partition2int = "Math.floor(" + value + ")/" + value2MVEL(edge.domain.interval) + ")" - nullTest = "false" - else: - ref = value2MVEL(edge.domain.min) - partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL(edge.domain.interval) + ")" - nullTest = "" + value + "<" + ref - elif not edge.domain.min: - ref = value2MVEL(edge.domain.max) - partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL(edge.domain.interval) + ")" - nullTest = "" + value + ">=" + ref - else: - top = value2MVEL(edge.domain.max) - ref = value2MVEL(edge.domain.min) - partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL(edge.domain.interval) + ")" - nullTest = "(" + value + "<" + ref + ") or (" + value + ">=" + top + ")" - - partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")" - offset = convert.value2int(ref) - - def int2Partition(value): - if Math.round(value) == numPartitions: - return edge.domain.NULL - return edge.domain.getPartByKey((value * edge.domain.interval) + offset) - - return Data(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition) - - -def compileString2Term(edge): - if edge.esscript: - Log.error("edge script not supported yet") - - value = edge.value - if is_variable_name(value): - value = strings.expand_template("getDocValue({{path}})", {"path": quote(value)}) - else: - Log.error("not handled") - - def fromTerm(value): - return edge.domain.getPartByKey(value) - - return Data( - toTerm={"head": "", "body": value}, - fromTerm=fromTerm - ) - - -def compileNullTest(edge): - """ - RETURN A MVEL EXPRESSION THAT WILL EVALUATE TO true FOR OUT-OF-BOUNDS - """ - if edge.domain.type not in domains.ALGEBRAIC: - Log.error("can only translate time and duration domains") - - # IS THERE A LIMIT ON THE DOMAIN? - value = edge.value - if is_variable_name(value): - value = "doc[\"" + value + "\"].value" - - if not edge.domain.max: - if not edge.domain.min: - return False - bot = value2MVEL(edge.domain.min) - nullTest = "" + value + "<" + bot - elif not edge.domain.min: - top = value2MVEL(edge.domain.max) - nullTest = "" + value + ">=" + top - else: - top = value2MVEL(edge.domain.max) - bot = value2MVEL(edge.domain.min) - nullTest = "(" + value + "<" + bot + ") or (" + value + ">=" + top + ")" - - return nullTest - - -def compileEdges2Term(mvel_compiler, edges, constants): - """ - TERMS ARE ALWAYS ESCAPED SO THEY CAN BE COMPOUNDED WITH PIPE (|) - - GIVE MVEL CODE THAT REDUCES A UNIQUE TUPLE OF PARTITIONS DOWN TO A UNIQUE TERM - GIVE LAMBDA THAT WILL CONVERT THE TERM BACK INTO THE TUPLE - RETURNS TUPLE OBJECT WITH "type" and "value" ATTRIBUTES. - "type" CAN HAVE A VALUE OF "script", "field" OR "count" - CAN USE THE constants (name, value pairs) - """ - - # IF THE QUERY IS SIMPLE ENOUGH, THEN DO NOT USE TERM PACKING - edge0 = edges[0] - - if len(edges) == 1 and edge0.domain.type in ["set", "default"]: - # THE TERM RETURNED WILL BE A MEMBER OF THE GIVEN SET - def temp(term): - return FlatList([edge0.domain.getPartByKey(term)]) - - if edge0.value and is_variable_name(edge0.value): - return Data( - field=edge0.value, - term2parts=temp - ) - elif COUNT(edge0.domain.dimension.fields) == 1: - return Data( - field=edge0.domain.dimension.fields[0], - term2parts=temp - ) - elif not edge0.value and edge0.domain.partitions: - script = mvel_compiler.Parts2TermScript(edge0.domain) - return Data( - expression=script, - term2parts=temp - ) - else: - return Data( - expression=mvel_compiler.compile_expression(edge0.value, constants), - term2parts=temp - ) - - mvel_terms = [] # FUNCTION TO PACK TERMS - fromTerm2Part = [] # UNPACK TERMS BACK TO PARTS - for e in edges: - domain = e.domain - fields = domain.dimension.fields - - if not e.value and fields: - code, decode = mvel_compiler.Parts2Term(e.domain) - t = Data( - toTerm=code, - fromTerm=decode - ) - elif fields: - Log.error("not expected") - elif e.domain.type == "time": - t = compileTime2Term(e) - elif e.domain.type == "duration": - t = compileDuration2Term(e) - elif e.domain.type in domains.ALGEBRAIC: - t = compileNumeric2Term(e) - elif e.domain.type == "set" and not fields: - def fromTerm(term): - return e.domain.getPartByKey(term) - - code, decode = mvel_compiler.Parts2Term(e.domain) - t = Data( - toTerm=code, - fromTerm=decode - ) - else: - t = compileString2Term(e) - - if not t.toTerm.body: - mvel_compiler.Parts2Term(e.domain) - Log.unexpected("what?") - - fromTerm2Part.append(t.fromTerm) - mvel_terms.append(t.toTerm.body) - - # REGISTER THE DECODE FUNCTION - def temp(term): - terms = term.split('|') - - output = FlatList([t2p(t) for t, t2p in zip(terms, fromTerm2Part)]) - return output - - return Data( - expression=mvel_compiler.compile_expression("+'|'+".join(mvel_terms), constants), - term2parts=temp - ) - - -def fix_es_stats(s): - """ - ES RETURNS BAD DEFAULT VALUES FOR STATS - """ - s = wrap(s) - if s.count == 0: - return stats.zero - return s - - -# MAP NAME TO SQL FUNCTION -aggregates = { - "none": "none", - "one": "count", - "sum": "total", - "add": "total", - "count": "count", - "maximum": "max", - "minimum": "min", - "max": "max", - "min": "min", - "mean": "mean", - "average": "mean", - "avg": "mean", - "N": "count", - "X0": "count", - "X1": "total", - "X2": "sum_of_squares", - "std": "std_deviation", - "stddev": "std_deviation", - "var": "variance", - "variance": "variance" -} - - diff --git a/vendor/jx_elasticsearch/es14/__init__.py b/vendor/jx_elasticsearch/es14/__init__.py deleted file mode 100644 index 8f25db1..0000000 --- a/vendor/jx_elasticsearch/es14/__init__.py +++ /dev/null @@ -1,238 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from collections import Mapping - -from jx_base import container -from jx_base.container import Container -from jx_base.dimensions import Dimension -from jx_base.expressions import jx_expression -from jx_base.queries import is_variable_name -from jx_base.query import QueryOp -from jx_elasticsearch.es14.aggs import es_aggsop, is_aggsop -from jx_elasticsearch.es14.deep import is_deepop, es_deepop -from jx_elasticsearch.es14.setop import is_setop, es_setop -from jx_elasticsearch.es14.util import aggregates -from jx_elasticsearch.meta import ElasticsearchMetadata, Table -from jx_python import jx -from mo_dots import Data, Null, unwrap, coalesce, split_field, literal_field, unwraplist, join_field, wrap, listwrap, FlatList -from mo_json import scrub, value2json -from mo_json.typed_encoder import TYPE_PREFIX, EXISTS_TYPE -from mo_kwargs import override -from mo_logs import Log, Except -from pyLibrary.env import elasticsearch, http - - -class ES14(Container): - """ - SEND jx QUERIES TO ElasticSearch - """ - - def __new__(cls, *args, **kwargs): - if (len(args) == 1 and args[0].get("index") == "meta") or kwargs.get("index") == "meta": - output = ElasticsearchMetadata.__new__(ElasticsearchMetadata, *args, **kwargs) - output.__init__(*args, **kwargs) - return output - else: - return Container.__new__(cls) - - @override - def __init__( - self, - host, - index, - type=None, - name=None, - port=9200, - read_only=True, - timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests) - wait_for_active_shards=1, # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency) - typed=None, - kwargs=None - ): - Container.__init__(self) - if not container.config.default: - container.config.default = { - "type": "elasticsearch", - "settings": unwrap(kwargs) - } - self.settings = kwargs - self.name = name = coalesce(name, alias, index) - if read_only: - self.es = elasticsearch.Alias(alias=coalesce(alias, index), kwargs=kwargs) - else: - self.es = elasticsearch.Cluster(kwargs=kwargs).get_index(read_only=read_only, kwargs=kwargs) - - self._namespace = ElasticsearchMetadata(kwargs=kwargs) - self.settings.type = self.es.settings.type - self.edges = Data() - self.worker = None - - columns = self._namespace.get_snowflake(self.es.settings.alias).columns # ABSOLUTE COLUMNS - is_typed = any(c.es_column == EXISTS_TYPE for c in columns) - - if typed == None: - # SWITCH ON TYPED MODE - self.typed = is_typed - else: - if is_typed != typed: - Log.error("Expecting given typed {{typed}} to match {{is_typed}}", typed=typed, is_typed=is_typed) - self.typed = typed - - @property - def snowflake(self): - return self._namespace.get_snowflake(self.es.settings.alias) - - @property - def namespace(self): - return self._namespace - - - def get_table(self, full_name): - return Table(full_name, self) - - def get_schema(self, query_path): - return self._namespace.get_schema(query_path) - - def __data__(self): - settings = self.settings.copy() - settings.settings = None - return settings - - def __enter__(self): - Log.error("No longer used") - return self - - def __exit__(self, type, value, traceback): - if not self.worker: - return - - if isinstance(value, Exception): - self.worker.stop() - self.worker.join() - else: - self.worker.join() - - @property - def query_path(self): - return join_field(split_field(self.name)[1:]) - - @property - def url(self): - return self.es.url - - def query(self, _query): - try: - query = QueryOp.wrap(_query, container=self, namespace=self.namespace) - - for s in listwrap(query.select): - if s.aggregate != None and not aggregates.get(s.aggregate): - Log.error( - "ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate", - name=s.name, - aggregate=s.aggregate - ) - - frum = query["from"] - if isinstance(frum, QueryOp): - result = self.query(frum) - q2 = query.copy() - q2.frum = result - return jx.run(q2) - - if is_deepop(self.es, query): - return es_deepop(self.es, query) - if is_aggsop(self.es, query): - return es_aggsop(self.es, frum, query) - if is_setop(self.es, query): - return es_setop(self.es, query) - Log.error("Can not handle") - except Exception as e: - e = Except.wrap(e) - if "Data too large, data for" in e: - http.post(self.es.cluster.url / "_cache/clear") - Log.error("Problem (Tried to clear Elasticsearch cache)", e) - Log.error("problem", e) - - def addDimension(self, dim): - if isinstance(dim, list): - Log.error("Expecting dimension to be a object, not a list:\n{{dim}}", dim= dim) - self._addDimension(dim, []) - - def _addDimension(self, dim, path): - dim.full_name = dim.name - for e in dim.edges: - d = Dimension(e, dim, self) - self.edges[d.full_name] = d - - def __getitem__(self, item): - c = self.get_columns(table_name=self.name, column_name=item) - if c: - if len(c) > 1: - Log.error("Do not know how to handle multipole matches") - return c[0] - - e = self.edges[item] - if not c: - Log.warning("Column with name {{column|quote}} can not be found in {{table}}", column=item, table=self.name) - return e - - def __getattr__(self, item): - return self.edges[item] - - def update(self, command): - """ - EXPECTING command == {"set":term, "where":where} - THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES - THE where CLAUSE IS AN ES FILTER - """ - command = wrap(command) - schema = self.es.get_properties() - - # GET IDS OF DOCUMENTS - results = self.es.search({ - "fields": listwrap(schema._routing.path), - "query": {"filtered": { - "filter": jx_expression(command.where).to_esfilter(Null) - }}, - "size": 10000 - }) - - # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT) - scripts = FlatList() - for k, v in command.set.items(): - if not is_variable_name(k): - Log.error("Only support simple paths for now") - if isinstance(v, Mapping) and v.doc: - scripts.append({"doc": v.doc}) - else: - v = scrub(v) - scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_es_script(schema).script(schema)}) - - if results.hits.hits: - updates = [] - for h in results.hits.hits: - for s in scripts: - updates.append({"update": {"_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)])}}) - updates.append(s) - content = ("\n".join(value2json(c) for c in updates) + "\n") - response = self.es.cluster.post( - self.es.path + "/_bulk", - data=content, - headers={"Content-Type": "application/json"}, - timeout=self.settings.timeout, - params={"wait_for_active_shards": self.settings.wait_for_active_shards} - ) - if response.errors: - Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)]) - diff --git a/vendor/jx_elasticsearch/es14/aggs.py b/vendor/jx_elasticsearch/es14/aggs.py deleted file mode 100644 index d390c30..0000000 --- a/vendor/jx_elasticsearch/es14/aggs.py +++ /dev/null @@ -1,469 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from jx_base.domains import SetDomain -from jx_base.expressions import TupleOp, NULL -from jx_base.query import DEFAULT_LIMIT, MAX_LIMIT -from jx_elasticsearch import post as es_post -from jx_elasticsearch.es14.decoders import DefaultDecoder, AggsDecoder, ObjectDecoder, DimFieldListDecoder -from jx_elasticsearch.es14.expressions import split_expression_by_depth, AndOp, Variable, NullOp -from jx_elasticsearch.es14.setop import get_pull_stats -from jx_elasticsearch.es14.util import aggregates -from jx_python import jx -from jx_python.expressions import jx_expression_to_function -from mo_dots import listwrap, Data, wrap, literal_field, set_default, coalesce, Null, split_field, FlatList, unwrap, unwraplist -from mo_future import text_type -from mo_json.typed_encoder import EXISTS -from mo_json.typed_encoder import encode_property -from mo_logs import Log -from mo_math import Math, MAX, UNION -from mo_times.timer import Timer - - -def is_aggsop(es, query): - if query.edges or query.groupby or any(a != None and a != "none" for a in listwrap(query.select).aggregate): - return True - return False - - -def get_decoders_by_depth(query): - """ - RETURN A LIST OF DECODER ARRAYS, ONE ARRAY FOR EACH NESTED DEPTH - """ - schema = query.frum.schema - output = FlatList() - - if query.edges: - if query.sort and query.format != "cube": - # REORDER EDGES/GROUPBY TO MATCH THE SORT - query.edges = sort_edges(query, "edges") - elif query.groupby: - if query.sort and query.format != "cube": - query.groupby = sort_edges(query, "groupby") - - for edge in wrap(coalesce(query.edges, query.groupby, [])): - limit = coalesce(edge.domain.limit, query.limit, DEFAULT_LIMIT) - if edge.value != None and not isinstance(edge.value, NullOp): - edge = edge.copy() - vars_ = edge.value.vars() - for v in vars_: - if not schema.leaves(v.var): - Log.error("{{var}} does not exist in schema", var=v) - elif edge.range: - vars_ = edge.range.min.vars() | edge.range.max.vars() - for v in vars_: - if not schema[v.var]: - Log.error("{{var}} does not exist in schema", var=v) - elif edge.domain.dimension: - vars_ = edge.domain.dimension.fields - edge.domain.dimension = edge.domain.dimension.copy() - edge.domain.dimension.fields = [schema[v].es_column for v in vars_] - elif all(edge.domain.partitions.where): - vars_ = set() - for p in edge.domain.partitions: - vars_ |= p.where.vars() - - try: - vars_ |= edge.value.vars() - depths = set(len(c.nested_path) - 1 for v in vars_ for c in schema.leaves(v.var)) - if -1 in depths: - Log.error( - "Do not know of column {{column}}", - column=unwraplist([v for v in vars_ if schema[v] == None]) - ) - if len(depths) > 1: - Log.error("expression {{expr|quote}} spans tables, can not handle", expr=edge.value) - max_depth = MAX(depths) - while len(output) <= max_depth: - output.append([]) - except Exception as e: - # USUALLY THE SCHEMA IS EMPTY, SO WE ASSUME THIS IS A SIMPLE QUERY - max_depth = 0 - output.append([]) - - output[max_depth].append(AggsDecoder(edge, query, limit)) - return output - - -def sort_edges(query, prop): - ordered_edges = [] - remaining_edges = getattr(query, prop) - for s in query.sort: - for e in remaining_edges: - if e.value == s.value: - if isinstance(e.domain, SetDomain): - pass # ALREADY SORTED? - else: - e.domain.sort = s.sort - ordered_edges.append(e) - remaining_edges.remove(e) - break - else: - Log.error("Can not sort by {{expr}}, can only sort by an existing edge expression", expr=s.value) - - ordered_edges.extend(remaining_edges) - return ordered_edges - - -def es_aggsop(es, frum, query): - query = query.copy() # WE WILL MARK UP THIS QUERY - schema = frum.schema - select = listwrap(query.select) - - es_query = Data() - new_select = Data() # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING - formula = [] - for s in select: - if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".": - if schema.query_path == ".": - s.pull = jx_expression_to_function("doc_count") - else: - s.pull = jx_expression_to_function({"coalesce": ["_nested.doc_count", "doc_count", 0]}) - elif isinstance(s.value, Variable): - if s.aggregate == "count": - new_select["count_"+literal_field(s.value.var)] += [s] - else: - new_select[literal_field(s.value.var)] += [s] - elif s.aggregate: - formula.append(s) - - for canonical_name, many in new_select.items(): - for s in many: - columns = frum.schema.values(s.value.var) - - if s.aggregate == "count": - canonical_names = [] - for column in columns: - cn = literal_field(column.es_column + "_count") - if column.jx_type == EXISTS: - canonical_names.append(cn + ".doc_count") - es_query.aggs[cn].filter.range = {column.es_column: {"gt": 0}} - else: - canonical_names.append(cn+ ".value") - es_query.aggs[cn].value_count.field = column.es_column - if len(canonical_names) == 1: - s.pull = jx_expression_to_function(canonical_names[0]) - else: - s.pull = jx_expression_to_function({"add": canonical_names}) - elif s.aggregate == "median": - if len(columns) > 1: - Log.error("Do not know how to count columns with more than one type (script probably)") - # ES USES DIFFERENT METHOD FOR PERCENTILES - key = literal_field(canonical_name + " percentile") - - es_query.aggs[key].percentiles.field = columns[0].es_column - es_query.aggs[key].percentiles.percents += [50] - s.pull = jx_expression_to_function(key + ".values.50\\.0") - elif s.aggregate == "percentile": - if len(columns) > 1: - Log.error("Do not know how to count columns with more than one type (script probably)") - # ES USES DIFFERENT METHOD FOR PERCENTILES - key = literal_field(canonical_name + " percentile") - if isinstance(s.percentile, text_type) or s.percetile < 0 or 1 < s.percentile: - Log.error("Expecting percentile to be a float from 0.0 to 1.0") - percent = Math.round(s.percentile * 100, decimal=6) - - es_query.aggs[key].percentiles.field = columns[0].es_column - es_query.aggs[key].percentiles.percents += [percent] - es_query.aggs[key].percentiles.compression = 2 - s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent))) - elif s.aggregate == "cardinality": - canonical_names = [] - for column in columns: - cn = literal_field(column.es_column + "_cardinality") - canonical_names.append(cn) - es_query.aggs[cn].cardinality.field = column.es_column - if len(columns) == 1: - s.pull = jx_expression_to_function(canonical_names[0] + ".value") - else: - s.pull = jx_expression_to_function({"add": [cn + ".value" for cn in canonical_names], "default": 0}) - elif s.aggregate == "stats": - if len(columns) > 1: - Log.error("Do not know how to count columns with more than one type (script probably)") - # REGULAR STATS - stats_name = literal_field(canonical_name) - es_query.aggs[stats_name].extended_stats.field = columns[0].es_column - - # GET MEDIAN TOO! - median_name = literal_field(canonical_name + "_percentile") - es_query.aggs[median_name].percentiles.field = columns[0].es_column - es_query.aggs[median_name].percentiles.percents += [50] - - s.pull = get_pull_stats(stats_name, median_name) - elif s.aggregate == "union": - pulls = [] - for column in columns: - stats_name = encode_property(column.es_column) - - if column.nested_path[0] == ".": - es_query.aggs[stats_name] = {"terms": { - "field": column.es_column, - "size": Math.min(s.limit, MAX_LIMIT) - }} - pulls.append(get_bucket_keys(stats_name)) - - else: - es_query.aggs[stats_name] = { - "nested": {"path": column.nested_path[0]}, - "aggs": {"_nested": {"terms": { - "field": column.es_column, - "size": Math.min(s.limit, MAX_LIMIT) - }}} - } - pulls.append(get_bucket_keys(stats_name+"._nested")) - if len(pulls) == 0: - s.pull = NULL - elif len(pulls) == 1: - s.pull = pulls[0] - else: - s.pull = lambda row: UNION( - p(row) - for p in pulls - ) - else: - if len(columns) > 1: - Log.error("Do not know how to count columns with more than one type (script probably)") - - # PULL VALUE OUT OF THE stats AGGREGATE - es_query.aggs[literal_field(canonical_name)].extended_stats.field = columns[0].es_column - s.pull = jx_expression_to_function({"coalesce": [literal_field(canonical_name) + "." + aggregates[s.aggregate], s.default]}) - - for i, s in enumerate(formula): - canonical_name = literal_field(s.name) - - if isinstance(s.value, TupleOp): - if s.aggregate == "count": - # TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY - s.pull = "doc_count" - else: - Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate) - elif s.aggregate == "count": - es_query.aggs[literal_field(canonical_name)].value_count.script = s.value.partial_eval().to_es_script(schema).script(schema) - s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value") - elif s.aggregate == "median": - # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT - key = literal_field(canonical_name + " percentile") - - es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema) - es_query.aggs[key].percentiles.percents += [50] - s.pull = jx_expression_to_function(key + ".values.50\\.0") - elif s.aggregate == "percentile": - # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT - key = literal_field(canonical_name + " percentile") - percent = Math.round(s.percentile * 100, decimal=6) - - es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema) - es_query.aggs[key].percentiles.percents += [percent] - s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent))) - elif s.aggregate == "cardinality": - # ES USES DIFFERENT METHOD FOR CARDINALITY - key = canonical_name + " cardinality" - - es_query.aggs[key].cardinality.script = s.value.to_es_script(schema).script(schema) - s.pull = jx_expression_to_function(key + ".value") - elif s.aggregate == "stats": - # REGULAR STATS - stats_name = literal_field(canonical_name) - es_query.aggs[stats_name].extended_stats.script = s.value.to_es_script(schema).script(schema) - - # GET MEDIAN TOO! - median_name = literal_field(canonical_name + " percentile") - es_query.aggs[median_name].percentiles.script = s.value.to_es_script(schema).script(schema) - es_query.aggs[median_name].percentiles.percents += [50] - - s.pull = get_pull_stats(stats_name, median_name) - elif s.aggregate=="union": - # USE TERMS AGGREGATE TO SIMULATE union - stats_name = literal_field(canonical_name) - es_query.aggs[stats_name].terms.script_field = s.value.to_es_script(schema).script(schema) - s.pull = jx_expression_to_function(stats_name + ".buckets.key") - else: - # PULL VALUE OUT OF THE stats AGGREGATE - s.pull = jx_expression_to_function(canonical_name + "." + aggregates[s.aggregate]) - es_query.aggs[canonical_name].extended_stats.script = s.value.to_es_script(schema).script(schema) - - decoders = get_decoders_by_depth(query) - start = 0 - - # THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested - split_where = split_expression_by_depth(query.where, schema=frum.schema) - - if len(split_field(frum.name)) > 1: - if any(split_where[2::]): - Log.error("Where clause is too deep") - - for d in decoders[1]: - es_query = d.append_query(es_query, start) - start += d.num_columns - - if split_where[1]: - #TODO: INCLUDE FILTERS ON EDGES - filter_ = AndOp("and", split_where[1]).to_esfilter(schema) - es_query = Data( - aggs={"_filter": set_default({"filter": filter_}, es_query)} - ) - - es_query = wrap({ - "aggs": {"_nested": set_default( - {"nested": {"path": schema.query_path[0]}}, - es_query - )} - }) - else: - if any(split_where[1::]): - Log.error("Where clause is too deep") - - if decoders: - for d in jx.reverse(decoders[0]): - es_query = d.append_query(es_query, start) - start += d.num_columns - - if split_where[0]: - #TODO: INCLUDE FILTERS ON EDGES - filter = AndOp("and", split_where[0]).to_esfilter(schema) - es_query = Data( - aggs={"_filter": set_default({"filter": filter}, es_query)} - ) - # - - if not es_query: - es_query = wrap({"query": {"match_all": {}}}) - - es_query.size = 0 - - with Timer("ES query time") as es_duration: - result = es_post(es, es_query, query.limit) - - try: - format_time = Timer("formatting") - with format_time: - decoders = [d for ds in decoders for d in ds] - result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total) # IT APPEARS THE OLD doc_count IS GONE - - formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format] - if query.edges: - output = formatter(decoders, result.aggregations, start, query, select) - elif query.groupby: - output = groupby_formatter(decoders, result.aggregations, start, query, select) - else: - output = aggop_formatter(decoders, result.aggregations, start, query, select) - - output.meta.timing.formatting = format_time.duration - output.meta.timing.es_search = es_duration.duration - output.meta.content_type = mime_type - output.meta.es_query = es_query - return output - except Exception as e: - if query.format not in format_dispatch: - Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e) - Log.error("Some problem", cause=e) - - -EMPTY = {} -EMPTY_LIST = [] - - -def get_bucket_keys(stats_name): - buckets = jx_expression_to_function(stats_name + ".buckets") - def output(row): - return [b['key'] for b in listwrap(buckets(row))] - return output - - -def drill(agg): - deeper = agg.get("_filter") or agg.get("_nested") - while deeper: - agg = deeper - deeper = agg.get("_filter") or agg.get("_nested") - return agg - -def aggs_iterator(aggs, decoders, coord=True): - """ - DIG INTO ES'S RECURSIVE aggs DATA-STRUCTURE: - RETURN AN ITERATOR OVER THE EFFECTIVE ROWS OF THE RESULTS - - :param aggs: ES AGGREGATE OBJECT - :param decoders: - :param coord: TURN ON LOCAL COORDINATE LOOKUP - """ - depth = max(d.start + d.num_columns for d in decoders) - - def _aggs_iterator(agg, d): - agg = drill(agg) - - if d > 0: - for k, v in agg.items(): - if k == "_match": - v = drill(v) - for i, b in enumerate(v.get("buckets", EMPTY_LIST)): - b["_index"] = i - for a, parts in _aggs_iterator(b, d - 1): - yield a, parts + (b,) - elif k == "_other": - for b in v.get("buckets", EMPTY_LIST): - for a, parts in _aggs_iterator(b, d - 1): - yield a, parts + (Null,) - elif k == "_missing": - b = drill(v) - for a, parts in _aggs_iterator(b, d - 1): - yield a, parts + (b,) - elif k.startswith("_join_"): - v["key"] = int(k[6:]) - for a, parts in _aggs_iterator(v, d - 1): - yield a, parts + (v,) - else: - for k, v in agg.items(): - if k == "_match": - v = drill(v) - for i, b in enumerate(v.get("buckets", EMPTY_LIST)): - b["_index"] = i - yield b, (b,) - elif k == "_other": - for b in v.get("buckets", EMPTY_LIST): - yield b, (Null,) - elif k == "_missing": - b = drill(v,) - yield b, (v,) - elif k.startswith("_join_"): - v["_index"] = int(k[6:]) - yield v, (v,) - - if coord: - for a, parts in _aggs_iterator(unwrap(aggs), depth - 1): - coord = tuple(d.get_index(parts) for d in decoders) - if any(c is None for c in coord): - continue - yield parts, coord, a - else: - for a, parts in _aggs_iterator(unwrap(aggs), depth - 1): - yield parts, None, a - - - -def count_dim(aggs, decoders): - if any(isinstance(d, (DefaultDecoder, DimFieldListDecoder, ObjectDecoder)) for d in decoders): - # ENUMERATE THE DOMAINS, IF UNKNOWN AT QUERY TIME - for row, coord, agg in aggs_iterator(aggs, decoders, coord=False): - for d in decoders: - d.count(row) - for d in decoders: - d.done_count() - new_edges = wrap([d.edge for d in decoders]) - return new_edges - - -format_dispatch = {} -from jx_elasticsearch.es14.format import format_cube - -_ = format_cube - diff --git a/vendor/jx_elasticsearch/es14/decoders.py b/vendor/jx_elasticsearch/es14/decoders.py deleted file mode 100644 index efc169f..0000000 --- a/vendor/jx_elasticsearch/es14/decoders.py +++ /dev/null @@ -1,753 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from collections import Mapping - -from jx_base.dimensions import Dimension -from jx_base.domains import SimpleSetDomain, DefaultDomain, PARTITION -from jx_base.expressions import TupleOp, TRUE -from jx_base.query import MAX_LIMIT, DEFAULT_LIMIT -from jx_elasticsearch.es14.expressions import Variable, NotOp, InOp, Literal, AndOp, InequalityOp, LeavesOp, LIST_TO_PIPE -from jx_python import jx -from mo_dots import wrap, set_default, coalesce, literal_field, Data, relative_field, unwraplist -from mo_future import text_type -from mo_json.typed_encoder import STRING, NUMBER, BOOLEAN -from mo_json.typed_encoder import untype_path -from mo_logs import Log -from mo_logs.strings import quote, expand_template -from mo_math import MAX, MIN, Math -from pyLibrary.convert import string2boolean - - -class AggsDecoder(object): - def __new__(cls, e=None, query=None, *args, **kwargs): - e.allowNulls = coalesce(e.allowNulls, True) - - if e.value and e.domain.type == "default": - # if query.groupby: - # return object.__new__(DefaultDecoder, e) - - if isinstance(e.value, text_type): - Log.error("Expecting Variable or Expression, not plain string") - - if isinstance(e.value, LeavesOp): - return object.__new__(ObjectDecoder, e) - elif isinstance(e.value, TupleOp): - # THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields - # JUST PULL THE FIELDS - if not all(isinstance(t, Variable) for t in e.value.terms): - Log.error("Can only handle variables in tuples") - - e.domain = Data( - dimension={"fields": e.value.terms} - ) - return object.__new__(DimFieldListDecoder, e) - elif isinstance(e.value, Variable): - schema = query.frum.schema - cols = schema.leaves(e.value.var) - if not cols: - return object.__new__(DefaultDecoder, e) - if len(cols) != 1: - return object.__new__(ObjectDecoder, e) - col = cols[0] - limit = coalesce(e.domain.limit, query.limit, DEFAULT_LIMIT) - - if col.partitions != None: - if col.multi > 1 and len(col.partitions) < 6: - return object.__new__(MultivalueDecoder) - - partitions = col.partitions[:limit:] - if e.domain.sort==-1: - partitions = list(reversed(sorted(partitions))) - else: - partitions = sorted(partitions) - e.domain = SimpleSetDomain(partitions=partitions, limit=limit) - else: - e.domain = set_default(DefaultDomain(limit=limit), e.domain.__data__()) - return object.__new__(DefaultDecoder, e) - - else: - return object.__new__(DefaultDecoder, e) - - if e.value and e.domain.type in PARTITION: - return object.__new__(SetDecoder, e) - if isinstance(e.domain.dimension, Dimension): - e.domain = e.domain.dimension.getDomain() - return object.__new__(SetDecoder, e) - if e.value and e.domain.type == "time": - return object.__new__(TimeDecoder, e) - if e.range: - return object.__new__(GeneralRangeDecoder, e) - if e.value and e.domain.type == "duration": - return object.__new__(DurationDecoder, e) - elif e.value and e.domain.type == "range": - return object.__new__(RangeDecoder, e) - elif not e.value and e.domain.dimension.fields: - # THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields - # JUST PULL THE FIELDS - fields = e.domain.dimension.fields - if isinstance(fields, Mapping): - Log.error("No longer allowed: All objects are expressions") - else: - return object.__new__(DimFieldListDecoder, e) - elif not e.value and all(e.domain.partitions.where): - return object.__new__(GeneralSetDecoder, e) - else: - Log.error("domain type of {{type}} is not supported yet", type=e.domain.type) - - def __init__(self, edge, query, limit): - self.start = None - self.edge = edge - self.name = literal_field(self.edge.name) - self.query = query - self.limit = limit - self.schema = self.query.frum.schema - - def append_query(self, es_query, start): - Log.error("Not supported") - - def count(self, row): - pass - - def done_count(self): - pass - - def get_value_from_row(self, row): - raise NotImplementedError() - - def get_value(self, index): - raise NotImplementedError() - - def get_index(self, row): - raise NotImplementedError() - - @property - def num_columns(self): - return 0 - - -class SetDecoder(AggsDecoder): - - def __init__(self, edge, query, limit): - AggsDecoder.__init__(self, edge, query, limit) - domain = self.domain = edge.domain - self.sorted = None - self.pull = pull_functions[STRING] - - # WE ASSUME IF THE VARIABLES MATCH, THEN THE SORT TERM AND EDGE TERM MATCH, AND WE SORT BY TERM - # self.sorted = {1: "asc", -1: "desc", None: None}[getattr(edge.domain, 'sort', None)] - edge_var = set(v.var for v in edge.value.vars()) - if query.sort: - for s in query.sort: - if not edge_var - set(v.var for v in s.value.vars()): - self.sorted = {1: "asc", -1: "desc"}[s.sort] - parts = jx.sort(domain.partitions, {"value": domain.key, "sort": s.sort}) - edge.domain = self.domain = SimpleSetDomain(key=domain.key, label=domain.label, partitions=parts) - - def append_query(self, es_query, start): - self.start = start - domain = self.domain - - domain_key = domain.key - include, text_include = transpose(*( - ( - float(v) if isinstance(v, (int, float)) else v, - text_type(float(v)) if isinstance(v, (int, float)) else v - ) - for v in (p[domain_key] for p in domain.partitions) - )) - value = self.edge.value - exists = AndOp("and", [ - value.exists(), - InOp("in", [value, Literal("literal", include)]) - ]).partial_eval() - - limit = coalesce(self.limit, len(domain.partitions)) - - if isinstance(value, Variable): - es_field = self.query.frum.schema.leaves(value.var)[0].es_column # ALREADY CHECKED THERE IS ONLY ONE - terms = set_default({"terms": { - "field": es_field, - "size": limit, - "order": {"_term": self.sorted} if self.sorted else None - }}, es_query) - else: - terms = set_default({"terms": { - "script": value.to_es_script(self.schema).script(self.schema), - "size": limit - }}, es_query) - - if self.edge.allowNulls: - missing = set_default( - {"filter": NotOp("not", exists).to_esfilter(self.schema)}, - es_query - ) - else: - missing = None - - return wrap({"aggs": { - "_match": { - "filter": exists.to_esfilter(self.schema), - "aggs": { - "_filter": terms - } - }, - "_missing": missing - }}) - - def get_value(self, index): - return self.domain.getKeyByIndex(index) - - def get_value_from_row(self, row): - return self.pull(row[self.start].get('key')) - - def get_index(self, row): - try: - part = row[self.start] - return self.domain.getIndexByKey(part.get('key')) - except Exception as e: - Log.error("problem", cause=e) - - @property - def num_columns(self): - return 1 - - -def _range_composer(edge, domain, es_query, to_float, schema): - # USE RANGES - _min = coalesce(domain.min, MIN(domain.partitions.min)) - _max = coalesce(domain.max, MAX(domain.partitions.max)) - - if edge.allowNulls: - missing_filter = set_default( - { - "filter": NotOp("not", AndOp("and", [ - edge.value.exists(), - InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]), - InequalityOp("lt", [edge.value, Literal(None, to_float(_max))]) - ]).partial_eval()).to_esfilter(schema) - }, - es_query - ) - else: - missing_filter = None - - if isinstance(edge.value, Variable): - calc = {"field": schema.leaves(edge.value.var)[0].es_column} - else: - calc = {"script": edge.value.to_es_script(schema).script(schema)} - - return wrap({"aggs": { - "_match": set_default( - {"range": calc}, - {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}}, - es_query - ), - "_missing": missing_filter - }}) - - -class TimeDecoder(AggsDecoder): - def append_query(self, es_query, start): - self.start = start - schema = self.query.frum.schema - return _range_composer(self.edge, self.edge.domain, es_query, lambda x: x.unix, schema) - - def get_value(self, index): - return self.edge.domain.getKeyByIndex(index) - - def get_index(self, row): - domain = self.edge.domain - part = row[self.start] - if part == None: - return len(domain.partitions) - - f = coalesce(part.get('from'), part.get('key')) - t = coalesce(part.get('to'), part.get('key')) - if f == None or t == None: - return len(domain.partitions) - else: - for p in domain.partitions: - if p.min.unix <= f < p.max.unix: - return p.dataIndex - sample = part.copy - sample.buckets = None - Log.error("Expecting to find {{part}}", part=sample) - - @property - def num_columns(self): - return 1 - - -class GeneralRangeDecoder(AggsDecoder): - """ - Accept an algebraic domain, and an edge with a `range` attribute - This class assumes the `snapshot` version - where we only include - partitions that have their `min` value in the range. - """ - - def __init__(self, edge, query, limit): - AggsDecoder.__init__(self, edge, query, limit) - if edge.domain.type == "time": - self.to_float = lambda x: x.unix - elif edge.domain.type == "range": - self.to_float = lambda x: x - else: - Log.error("Unknown domain of type {{type}} for range edge", type=edge.domain.type) - - def append_query(self, es_query, start): - self.start = start - - edge = self.edge - range = edge.range - domain = edge.domain - - aggs = {} - for i, p in enumerate(domain.partitions): - filter_ = AndOp("and", [ - InequalityOp("lte", [range.min, Literal("literal", self.to_float(p.min))]), - InequalityOp("gt", [range.max, Literal("literal", self.to_float(p.min))]) - ]) - aggs["_join_" + text_type(i)] = set_default( - {"filter": filter_.to_esfilter(self.schema)}, - es_query - ) - - return wrap({"aggs": aggs}) - - def get_value(self, index): - return self.edge.domain.getKeyByIndex(index) - - def get_index(self, row): - domain = self.edge.domain - part = row[self.start] - if part == None: - return len(domain.partitions) - return part["_index"] - - @property - def num_columns(self): - return 1 - - -class GeneralSetDecoder(AggsDecoder): - """ - EXPECTING ALL PARTS IN partitions TO HAVE A where CLAUSE - """ - - def append_query(self, es_query, start): - self.start = start - - parts = self.edge.domain.partitions - filters = [] - notty = [] - - for p in parts: - w = p.where - filters.append(AndOp("and", [w] + notty).to_esfilter(self.schema)) - notty.append(NotOp("not", w)) - - missing_filter = None - if self.edge.allowNulls: # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER - missing_filter = set_default( - {"filter": AndOp("and", notty).to_esfilter(self.schema)}, - es_query - ) - - return wrap({"aggs": { - "_match": set_default( - {"filters": {"filters": filters}}, - es_query - ), - "_missing": missing_filter - }}) - - def get_value(self, index): - return self.edge.domain.getKeyByIndex(index) - - def get_index(self, row): - domain = self.edge.domain - part = row[self.start] - # if part == None: - # return len(domain.partitions) - return part.get("_index", len(domain.partitions)) - - @property - def num_columns(self): - return 1 - - -class DurationDecoder(AggsDecoder): - def append_query(self, es_query, start): - self.start = start - return _range_composer(self.edge, self.edge.domain, es_query, lambda x: x.seconds, self.schema) - - def get_value(self, index): - return self.edge.domain.getKeyByIndex(index) - - def get_index(self, row): - domain = self.edge.domain - part = row[self.start] - if part == None: - return len(domain.partitions) - - f = coalesce(part.get('from'), part.get('key')) - t = coalesce(part.get('to'), part.get('key')) - if f == None or t == None: - return len(domain.partitions) - else: - for p in domain.partitions: - if p.min.seconds <= f < p.max.seconds: - return p.dataIndex - sample = part.copy - sample.buckets = None - Log.error("Expecting to find {{part}}", part=sample) - - @property - def num_columns(self): - return 1 - - -class RangeDecoder(AggsDecoder): - def append_query(self, es_query, start): - self.start = start - return _range_composer(self.edge, self.edge.domain, es_query, lambda x: x, self.schema) - - def get_value(self, index): - return self.edge.domain.getKeyByIndex(index) - - def get_index(self, row): - domain = self.edge.domain - part = row[self.start] - if part == None: - return len(domain.partitions) - - f = coalesce(part.get('from'), part.get('key')) - t = coalesce(part.get('to'), part.get('key')) - if f == None or t == None: - return len(domain.partitions) - else: - for p in domain.partitions: - if p.min <= f < p.max: - return p.dataIndex - sample = part.copy - sample.buckets = None - Log.error("Expecting to find {{part}}", part=sample) - - @property - def num_columns(self): - return 1 - - -class MultivalueDecoder(SetDecoder): - def __init__(self, edge, query, limit): - AggsDecoder.__init__(self, edge, query, limit) - self.var = edge.value.var - self.values = query.frum.schema[edge.value.var][0].partitions - self.parts = [] - - def append_query(self, es_query, start): - self.start = start - - es_field = self.query.frum.schema.leaves(self.var)[0].es_column - es_query = wrap({"aggs": { - "_match": set_default({"terms": { - "script": expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'}) - }}, es_query) - }}) - - return es_query - - def get_value_from_row(self, row): - values = row[self.start]['key'].replace("||", "\b").split("|") - if len(values) == 2: - return None - return unwraplist([v.replace("\b", "|") for v in values[1:-1]]) - - def get_index(self, row): - find = self.get_value_from_row(row) - try: - return self.parts.index(find) - except Exception: - self.parts.append(find) - return len(self.parts)-1 - - @property - def num_columns(self): - return 1 - - -class ObjectDecoder(AggsDecoder): - def __init__(self, edge, query, limit): - AggsDecoder.__init__(self, edge, query, limit) - if isinstance(edge.value, LeavesOp): - prefix = edge.value.term.var - flatter = lambda k: literal_field(relative_field(k, prefix)) - else: - prefix = edge.value.var - flatter = lambda k: relative_field(k, prefix) - - self.put, self.fields = transpose(*[ - (flatter(untype_path(c.names["."])), c.es_column) - for c in query.frum.schema.leaves(prefix) - ]) - - self.domain = self.edge.domain = wrap({"dimension": {"fields": self.fields}}) - self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) - self.parts = list() - self.key2index = {} - self.computed_domain = False - - def append_query(self, es_query, start): - self.start = start - for i, v in enumerate(self.fields): - nest = wrap({"aggs": { - "_match": set_default({"terms": { - "field": v, - "size": self.domain.limit - }}, es_query), - "_missing": set_default( - {"filter": {"missing": {"field": v}}}, - es_query - ) - }}) - es_query = nest - return es_query - - def count(self, row): - value = self.get_value_from_row(row) - i = self.key2index.get(value) - if i is None: - i = self.key2index[value] = len(self.parts) - self.parts.append(value) - - def done_count(self): - self.computed_domain = True - self.edge.domain = self.domain = SimpleSetDomain( - key="value", - partitions=[{"value": p, "dataIndex": i} for i, p in enumerate(self.parts)] - ) - - def get_index(self, row): - value = self.get_value_from_row(row) - if self.computed_domain: - return self.domain.getIndexByKey(value) - - if value is None: - return -1 - i = self.key2index.get(value) - if i is None: - i = self.key2index[value] = len(self.parts) - self.parts.append(value) - return i - - def get_value_from_row(self, row): - part = row[self.start:self.start + self.num_columns:] - if not part[0]['doc_count']: - return None - - output = Data() - for k, v in zip(self.put, part): - output[k] = v.get('key') - return output - - @property - def num_columns(self): - return len(self.fields) - - -class DefaultDecoder(SetDecoder): - # FOR DECODING THE default DOMAIN TYPE (UNKNOWN-AT-QUERY-TIME SET OF VALUES) - - def __init__(self, edge, query, limit): - AggsDecoder.__init__(self, edge, query, limit) - self.domain = edge.domain - self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) - self.parts = list() - self.key2index = {} - self.computed_domain = False - self.script = self.edge.value.partial_eval().to_es_script(self.schema) - self.pull = pull_functions[self.script.data_type] - self.missing = self.script.miss.partial_eval() - self.exists = NotOp("not", self.missing).partial_eval() - - # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM - sort_candidates = [s for s in self.query.sort if s.value == self.edge.value] - if sort_candidates: - self.es_order = {"_term": {1: "asc", -1: "desc"}[sort_candidates[0].sort]} - else: - self.es_order = None - - def append_query(self, es_query, start): - self.start = start - - if not isinstance(self.edge.value, Variable): - if self.exists is TRUE: - # IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH) - output = wrap({"aggs": { - "_match": set_default( - {"terms": { - "script": self.script.expr, - "size": self.domain.limit, - "order": self.es_order - }}, - es_query - ) - }}) - else: - output = wrap({"aggs": { - "_match": { # _match AND _filter REVERSED SO _match LINES UP WITH _missing - "filter": self.exists.to_esfilter(self.schema), - "aggs": { - "_filter": set_default( - {"terms": { - "script": self.script.expr, - "size": self.domain.limit, - "order": self.es_order - }}, - es_query - ) - } - }, - "_missing": set_default( - {"filter": self.missing.to_esfilter(self.schema)}, - es_query - ) - }}) - return output - else: - output = wrap({"aggs": { - "_match": set_default( - {"terms": { - "field": self.schema.leaves(self.edge.value.var)[0].es_column, - "size": self.domain.limit, - "order": self.es_order - }}, - es_query - ), - "_missing": set_default( - {"filter": self.missing.to_esfilter(self.schema)}, - es_query - ) - }}) - return output - - def count(self, row): - part = row[self.start] - if part['doc_count']: - if part.get('key') != None: - self.parts.append(self.pull(part.get('key'))) - else: - self.edge.allowNulls = True # OK! WE WILL ALLOW NULLS - - def done_count(self): - self.edge.domain = self.domain = SimpleSetDomain( - partitions=jx.sort(set(self.parts)) - ) - self.parts = None - self.computed_domain = True - - def get_index(self, row): - if self.computed_domain: - try: - part = row[self.start] - return self.domain.getIndexByKey(self.pull(part.get('key'))) - except Exception as e: - Log.error("problem", cause=e) - else: - try: - part = row[self.start] - key = self.pull(part.get('key')) - i = self.key2index.get(key) - if i is None: - i = len(self.parts) - part = {"key": key, "dataIndex": i} - self.parts.append(part) - self.key2index[key] = i - return i - except Exception as e: - Log.error("problem", cause=e) - - @property - def num_columns(self): - return 1 - - -class DimFieldListDecoder(SetDecoder): - def __init__(self, edge, query, limit): - AggsDecoder.__init__(self, edge, query, limit) - edge.allowNulls = False - self.fields = edge.domain.dimension.fields - self.domain = self.edge.domain - self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) - self.parts = list() - - def append_query(self, es_query, start): - # TODO: USE "reverse_nested" QUERY TO PULL THESE - self.start = start - for i, v in enumerate(self.fields): - exists = v.exists().partial_eval() - nest = wrap({"aggs": {"_match": { - "filter": exists.to_esfilter(self.schema), - "aggs": {"_filter": set_default({"terms": { - "field": self.schema.leaves(v.var)[0].es_column, - "size": self.domain.limit - }}, es_query)} - }}}) - nest.aggs._missing = set_default( - {"filter": NotOp("not", exists).to_esfilter(self.schema)}, - es_query - ) - es_query = nest - - if self.domain.where: - filter_ = self.domain.where.partial_eval().to_esfilter(self.schema) - es_query = {"aggs": {"_filter": set_default({"filter": filter_}, es_query)}} - - return es_query - - def count(self, row): - part = row[self.start:self.start + len(self.fields):] - if part[0]['doc_count']: - value = tuple(p.get("key") for p in part) - self.parts.append(value) - - def done_count(self): - columns = map(text_type, range(len(self.fields))) - parts = wrap([{text_type(i): p for i, p in enumerate(part)} for part in set(self.parts)]) - self.parts = None - sorted_parts = jx.sort(parts, columns) - - self.edge.domain = self.domain = SimpleSetDomain( - key="value", - partitions=[{"value": tuple(v[k] for k in columns), "dataIndex": i} for i, v in enumerate(sorted_parts)] - ) - - def get_index(self, row): - part = row[self.start:self.start + len(self.fields):] - if part[0]['doc_count']==0: - return None - find = tuple(p.get("key") for p in part) - output = self.domain.getIndexByKey(find) - return output - @property - def num_columns(self): - return len(self.fields) - - -pull_functions = { - STRING: lambda x: x, - NUMBER: lambda x: float(x) if x !=None else None, - BOOLEAN: string2boolean -} - diff --git a/vendor/jx_elasticsearch/es14/deep.py b/vendor/jx_elasticsearch/es14/deep.py deleted file mode 100644 index e0c793e..0000000 --- a/vendor/jx_elasticsearch/es14/deep.py +++ /dev/null @@ -1,238 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from jx_base.expressions import NULL -from jx_base.query import DEFAULT_LIMIT -from jx_elasticsearch import post as es_post -from jx_elasticsearch.es14.expressions import split_expression_by_depth, AndOp, Variable, LeavesOp -from jx_elasticsearch.es14.setop import format_dispatch, get_pull_function, get_pull -from jx_elasticsearch.es14.util import jx_sort_to_es_sort, es_query_template -from jx_python.expressions import compile_expression, jx_expression_to_function -from mo_dots import split_field, FlatList, listwrap, literal_field, coalesce, Data, concat_field, set_default, relative_field, startswith_field -from mo_json.typed_encoder import NESTED -from mo_json.typed_encoder import untype_path, EXISTS_TYPE -from mo_logs import Log -from mo_threads import Thread -from mo_times.timer import Timer -from pyLibrary import convert - -EXPRESSION_PREFIX = "_expr." - -_ = convert - - -def is_deepop(es, query): - if query.edges or query.groupby: - return False - if all(s.aggregate not in (None, "none") for s in listwrap(query.select)): - return False - if len(split_field(query.frum.name)) > 1: - return True - - # ASSUME IT IS NESTED IF WE ARE ASKING FOR NESTED COLUMNS - # vars_ = query_get_all_vars(query) - # columns = query.frum.get_columns() - # if any(c for c in columns if len(c.nested_path) != 1 and c.name in vars_): - # return True - return False - - -def es_deepop(es, query): - schema = query.frum.schema - query_path = schema.query_path[0] - - # TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions - # THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER: ES ALLOWS - # {"inner_hit":{"script_fields":[{"script":""}...]}}, BUT THEN YOU - # LOOSE "_source" BUT GAIN "fields", FORCING ALL FIELDS TO BE EXPLICIT - post_expressions = {} - es_query, es_filters = es_query_template(query_path) - - # SPLIT WHERE CLAUSE BY DEPTH - wheres = split_expression_by_depth(query.where, schema) - for i, f in enumerate(es_filters): - script = AndOp("and", wheres[i]).partial_eval().to_esfilter(schema) - set_default(f, script) - - if not wheres[1]: - # WITHOUT NESTED CONDITIONS, WE MUST ALSO RETURN DOCS WITH NO NESTED RECORDS - more_filter = { - "and": [ - es_filters[0], - {"missing": {"field": untype_path(query_path) + "." + EXISTS_TYPE}} - ] - } - else: - more_filter = None - - es_query.size = coalesce(query.limit, DEFAULT_LIMIT) - - # es_query.sort = jx_sort_to_es_sort(query.sort) - map_to_es_columns = schema.map_to_es() - # {c.names["."]: c.es_column for c in schema.leaves(".")} - query_for_es = query.map(map_to_es_columns) - es_query.sort = jx_sort_to_es_sort(query_for_es.sort, schema) - - es_query.fields = [] - - is_list = isinstance(query.select, list) - new_select = FlatList() - - i = 0 - for s in listwrap(query.select): - if isinstance(s.value, LeavesOp) and isinstance(s.value.term, Variable): - # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS - leaves = schema.leaves(s.value.term.var) - col_names = set() - for c in leaves: - if c.nested_path[0] == ".": - if c.jx_type == NESTED: - continue - es_query.fields += [c.es_column] - c_name = untype_path(c.names[query_path]) - col_names.add(c_name) - new_select.append({ - "name": concat_field(s.name, c_name), - "nested_path": c.nested_path[0], - "put": {"name": concat_field(s.name, literal_field(c_name)), "index": i, "child": "."}, - "pull": get_pull_function(c) - }) - i += 1 - - # REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS - for n in new_select: - if n.name.startswith("..") and n.name.lstrip(".") not in col_names: - n.put.name = n.name = n.name.lstrip(".") - col_names.add(n.name) - elif isinstance(s.value, Variable): - net_columns = schema.leaves(s.value.var) - if not net_columns: - new_select.append({ - "name": s.name, - "nested_path": ".", - "put": {"name": s.name, "index": i, "child": "."}, - "pull": NULL - }) - else: - for n in net_columns: - pull = get_pull_function(n) - if n.nested_path[0] == ".": - if n.jx_type == NESTED: - continue - es_query.fields += [n.es_column] - - # WE MUST FIGURE OUT WHICH NAMESSPACE s.value.var IS USING SO WE CAN EXTRACT THE child - for np in n.nested_path: - c_name = untype_path(n.names[np]) - if startswith_field(c_name, s.value.var): - child = relative_field(c_name, s.value.var) - break - else: - child = relative_field(untype_path(n.names[n.nested_path[0]]), s.value.var) - - new_select.append({ - "name": s.name, - "pull": pull, - "nested_path": n.nested_path[0], - "put": { - "name": s.name, - "index": i, - "child": child - } - }) - i += 1 - else: - expr = s.value - for v in expr.vars(): - for c in schema[v.var]: - if c.nested_path[0] == ".": - es_query.fields += [c.es_column] - # else: - # Log.error("deep field not expected") - - pull_name = EXPRESSION_PREFIX + s.name - map_to_local = MapToLocal(schema) - pull = jx_expression_to_function(pull_name) - post_expressions[pull_name] = compile_expression(expr.map(map_to_local).to_python()) - - new_select.append({ - "name": s.name if is_list else ".", - "pull": pull, - "value": expr.__data__(), - "put": {"name": s.name, "index": i, "child": "."} - }) - i += 1 - - # ES needs two calls to get all documents - more = [] - def get_more(please_stop): - more.append(es_post( - es, - Data( - query={"filtered": {"filter": more_filter}}, - fields=es_query.fields - ), - query.limit - )) - if more_filter: - need_more = Thread.run("get more", target=get_more) - - with Timer("call to ES") as call_timer: - data = es_post(es, es_query, query.limit) - - # EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED - def inners(): - for t in data.hits.hits: - for i in t.inner_hits[literal_field(query_path)].hits.hits: - t._inner = i._source - for k, e in post_expressions.items(): - t[k] = e(t) - yield t - if more_filter: - Thread.join(need_more) - for t in more[0].hits.hits: - yield t - # - - try: - formatter, groupby_formatter, mime_type = format_dispatch[query.format] - - output = formatter(inners(), new_select, query) - output.meta.timing.es = call_timer.duration - output.meta.content_type = mime_type - output.meta.es_query = es_query - return output - except Exception as e: - Log.error("problem formatting", e) - - -class MapToLocal(object): - """ - MAP FROM RELATIVE/ABSOLUTE NAMESPACE TO PYTHON THAT WILL EXTRACT RESULT - """ - def __init__(self, map_to_columns): - self.map_to_columns = map_to_columns - - def __getitem__(self, item): - return self.get(item) - - def get(self, item): - cs = self.map_to_columns[item] - if len(cs) == 0: - return "Null" - elif len(cs) == 1: - return get_pull(cs[0]) - else: - return "coalesce(" + (",".join(get_pull(c) for c in cs)) + ")" - - diff --git a/vendor/jx_elasticsearch/es14/expressions.py b/vendor/jx_elasticsearch/es14/expressions.py deleted file mode 100644 index 2546c4d..0000000 --- a/vendor/jx_elasticsearch/es14/expressions.py +++ /dev/null @@ -1,1429 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -import itertools - -from jx_base.expressions import Variable, TupleOp, LeavesOp, BinaryOp, OrOp, ScriptOp, \ - WhenOp, InequalityOp, extend, Literal, NullOp, TrueOp, FalseOp, DivOp, FloorOp, \ - EqOp, NeOp, NotOp, LengthOp, NumberOp, StringOp, CountOp, MultiOp, RegExpOp, CoalesceOp, MissingOp, ExistsOp, \ - PrefixOp, NotLeftOp, InOp, CaseOp, AndOp, \ - ConcatOp, IsNumberOp, Expression, BasicIndexOfOp, MaxOp, MinOp, BasicEqOp, BooleanOp, IntegerOp, BasicSubstringOp, ZERO, NULL, FirstOp, FALSE, TRUE, SuffixOp, simplified, ONE -from jx_elasticsearch.es14.util import es_not, es_script, es_or, es_and, es_missing -from mo_dots import coalesce, wrap, Null, set_default, literal_field -from mo_future import text_type -from mo_json.typed_encoder import NUMBER, STRING, BOOLEAN, OBJECT, INTEGER -from mo_logs import Log, suppress_exception -from mo_logs.strings import expand_template, quote -from mo_math import MAX, OR -from pyLibrary.convert import string2regexp - -TO_STRING = """ - ({it -> - value = {{expr}}; - if (value==null) return ""; - output = String.valueOf(value); - if (output.endsWith(".0")) output = output.substring(0, output.length() - 2); - return output; - })() -""" - - -LIST_TO_PIPE = """ -StringBuffer output=new StringBuffer(); -for(String s : {{expr}}){ - output.append("|"); - String sep2=""; - StringTokenizer parts = new StringTokenizer(s, "|"); - while (parts.hasMoreTokens()){ - output.append(sep2); - output.append(parts.nextToken()); - sep2="||"; - }//for -}//for -output.append("|"); -return output.toString() -""" - - -class EsScript(Expression): - __slots__ = ("miss", "data_type", "expr", "many") - - def __init__(self, type, expr, frum, miss=None, many=False): - self.miss = coalesce(miss, FALSE) # Expression that will return true/false to indicate missing result - self.data_type = type - self.expr = expr - self.many = many # True if script returns multi-value - self.frum = frum - - @property - def type(self): - return self.data_type - - def script(self, schema): - """ - RETURN A SCRIPT SUITABLE FOR CODE OUTSIDE THIS MODULE (NO KNOWLEDGE OF Painless) - :param schema: - :return: - """ - missing = self.miss.partial_eval() - if missing is FALSE: - return self.partial_eval().to_es_script(schema).expr - elif missing is TRUE: - return "null" - - return "(" + missing.to_es_script(schema).expr + ")?null:(" + self.expr + ")" - - def to_esfilter(self, schema): - return {"script": es_script(self.script(schema))} - - def to_es_script(self, schema): - return self - - def missing(self): - return self.miss - - def __data__(self): - return {"script": self.script} - - def __eq__(self, other): - if not isinstance(other, EsScript): - return False - elif self.expr==other.expr: - return True - else: - return False - - -@extend(BinaryOp) -def to_es_script(self, schema): - lhs = NumberOp("number", self.lhs).partial_eval().to_es_script(schema).expr - rhs = NumberOp("number", self.rhs).partial_eval().to_es_script(schema).expr - script = "(" + lhs + ") " + BinaryOp.operators[self.op] + " (" + rhs + ")" - missing = OrOp("or", [self.lhs.missing(), self.rhs.missing()]) - - return WhenOp( - "when", - missing, - **{ - "then": self.default, - "else": - EsScript(type=NUMBER, expr=script, frum=self) - } - ).partial_eval().to_es_script(schema) - - -@extend(BinaryOp) -def to_esfilter(self, schema): - if not isinstance(self.lhs, Variable) or not isinstance(self.rhs, Literal) or self.op in BinaryOp.operators: - return self.to_es_script(schema).to_esfilter(schema) - - if self.op in ["eq", "term"]: - return {"term": {self.lhs.var: self.rhs.to_esfilter(schema)}} - elif self.op in ["ne", "neq"]: - return es_not({"term": {self.lhs.var: self.rhs.to_esfilter(schema)}}) - elif self.op in BinaryOp.ineq_ops: - return {"range": {self.lhs.var: {self.op: self.rhs.value}}} - else: - Log.error("Logic error") - - -@extend(CaseOp) -def to_es_script(self, schema): - acc = self.whens[-1].partial_eval().to_es_script(schema) - for w in reversed(self.whens[0:-1]): - acc = WhenOp( - "when", - w.when, - **{"then": w.then, "else": acc} - ).partial_eval().to_es_script(schema) - return acc - - -@extend(CaseOp) -def to_esfilter(self, schema): - if self.type == BOOLEAN: - return OrOp( - "or", - [ - AndOp("and", [w.when, w.then]) - for w in self.whens[:-1] - ] + - self.whens[-1:] - ).partial_eval().to_esfilter(schema) - else: - Log.error("do not know how to handle") - return ScriptOp("script", self.to_es_script(schema).script(schema)).to_esfilter(schema) - - -@extend(ConcatOp) -def to_esfilter(self, schema): - if isinstance(self.value, Variable) and isinstance(self.find, Literal): - return {"regexp": {self.value.var: ".*" + string2regexp(self.find.value) + ".*"}} - else: - return ScriptOp("script", self.to_es_script(schema).script(schema)).to_esfilter(schema) - - -@extend(ConcatOp) -def to_es_script(self, schema): - if len(self.terms) == 0: - return self.default.to_es_script(schema) - - acc = [] - separator = StringOp("string", self.separator).partial_eval() - sep = separator.to_es_script(schema).expr - for t in self.terms: - val = WhenOp( - "when", - t.missing(), - **{ - "then": Literal("literal", ""), - "else": EsScript(type=STRING, expr=sep + "+" + StringOp(None, t).partial_eval().to_es_script(schema).expr, frum=t) - # "else": ConcatOp("concat", [sep, t]) - } - ) - acc.append("(" + val.partial_eval().to_es_script(schema).expr + ")") - expr_ = "(" + "+".join(acc) + ").substring(" + LengthOp("length", separator).to_es_script(schema).expr + ")" - - if isinstance(self.default, NullOp): - return EsScript( - miss=self.missing(), - type=STRING, - expr=expr_, - frum=self - ) - else: - return EsScript( - miss=self.missing(), - type=STRING, - expr="((" + expr_ + ").length==0) ? (" + self.default.to_es_script(schema).expr + ") : (" + expr_ + ")", - frum=self - ) - - -@extend(Literal) -def to_es_script(self, schema): - def _convert(v): - if v is None: - return NULL.to_es_script(schema) - if v is True: - return EsScript( - type=BOOLEAN, - expr="true", - frum=self - ) - if v is False: - return EsScript( - type=BOOLEAN, - expr="false", - frum=self - ) - if isinstance(v, text_type): - return EsScript( - type=STRING, - expr=quote(v), - frum=self - ) - if isinstance(v, int): - return EsScript( - type=INTEGER, - expr=text_type(v), - frum=self - ) - if isinstance(v, float): - return EsScript( - type=NUMBER, - expr=text_type(v), - frum=self - ) - if isinstance(v, dict): - return EsScript( - type=OBJECT, - expr="[" + ", ".join(quote(k) + ": " + _convert(vv) for k, vv in v.items()) + "]", - frum=self - ) - if isinstance(v, (list, tuple)): - return EsScript( - type=OBJECT, - expr="[" + ", ".join(_convert(vv).expr for vv in v) + "]", - frum=self - ) - - return _convert(self.term) - - -@extend(CoalesceOp) -def to_es_script(self, schema): - if not self.terms: - return NULL.to_es_script(schema) - - v = self.terms[-1] - acc = FirstOp("first", v).partial_eval().to_es_script(schema) - for v in reversed(self.terms[:-1]): - m = v.missing().partial_eval() - e = NotOp("not", m).partial_eval().to_es_script(schema) - r = FirstOp("first", v).partial_eval().to_es_script(schema) - - if r.miss is TRUE: - continue - elif r.miss is FALSE: - acc = r - continue - elif acc.type == r.type: - new_type = r.type - elif acc.type == NUMBER and r.type == INTEGER: - new_type = NUMBER - elif acc.type == INTEGER and r.type == NUMBER: - new_type = NUMBER - else: - new_type = OBJECT - - acc = EsScript( - miss=AndOp("and", [acc.miss, m]).partial_eval(), - type=new_type, - expr="(" + e.expr + ") ? (" + r.expr + ") : (" + acc.expr + ")", - frum=self - ) - return acc - - -@extend(CoalesceOp) -def to_esfilter(self, schema): - return {"bool": {"should": [{"exists": {"field": v}} for v in self.terms]}} - - -@extend(ExistsOp) -def to_es_script(self, schema): - return self.field.exists().partial_eval().to_es_script(schema) - - -@extend(ExistsOp) -def to_esfilter(self, schema): - return self.field.exists().partial_eval().to_esfilter(schema) - - -@extend(Literal) -def to_esfilter(self, schema): - return self.json - - -@extend(NullOp) -def to_es_script(self, schema): - return EsScript( - miss=TRUE, - type=OBJECT, - expr="null", - frum=self - ) - -@extend(NullOp) -def to_esfilter(self, schema): - return es_not({"match_all": {}}) - - -@extend(FalseOp) -def to_es_script(self, schema): - return EsScript(type=BOOLEAN, expr="false", frum=self) - - -@extend(FalseOp) -def to_esfilter(self, schema): - return MATCH_NONE - - -@extend(TupleOp) -def to_esfilter(self, schema): - Log.error("not supported") - - -@extend(TupleOp) -def to_es_script(self, schema): - terms = [FirstOp("first", t).partial_eval().to_es_script(schema) for t in self.terms] - expr = 'new Object[]{'+','.join(t.expr for t in terms)+'}' - return EsScript( - type=OBJECT, - expr=expr, - miss=FALSE, - many=FALSE, - frum=self - ) - - -@extend(LeavesOp) -def to_es_script(self, schema): - Log.error("not supported") - - -@extend(LeavesOp) -def to_esfilter(self, schema): - Log.error("not supported") - - -@extend(InequalityOp) -def to_es_script(self, schema): - lhs = NumberOp("number", self.lhs).partial_eval().to_es_script(schema).expr - rhs = NumberOp("number", self.rhs).partial_eval().to_es_script(schema).expr - script = "(" + lhs + ") " + InequalityOp.operators[self.op] + " (" + rhs + ")" - - output = WhenOp( - "when", - OrOp("or", [self.lhs.missing(), self.rhs.missing()]), - **{ - "then": FALSE, - "else": - EsScript(type=BOOLEAN, expr=script, frum=self) - } - ).partial_eval().to_es_script(schema) - return output - - -@extend(InequalityOp) -def to_esfilter(self, schema): - if isinstance(self.lhs, Variable) and isinstance(self.rhs, Literal): - cols = schema.leaves(self.lhs.var) - if not cols: - lhs = self.lhs.var # HAPPENS DURING DEBUGGING, AND MAYBE IN REAL LIFE TOO - elif len(cols) == 1: - lhs = schema.leaves(self.lhs.var)[0].es_column - else: - Log.error("operator {{op|quote}} does not work on objects", op=self.op) - return {"range": {lhs: {self.op: self.rhs.value}}} - else: - script = self.to_es_script(schema) - if script.miss is not FALSE: - Log.error("inequality must be decisive") - return {"script": es_script(script.expr)} - - -@extend(DivOp) -def to_es_script(self, schema): - lhs = NumberOp("number", self.lhs).partial_eval() - rhs = NumberOp("number", self.rhs).partial_eval() - script = "(" + lhs.to_es_script(schema).expr + ") / (" + rhs.to_es_script(schema).expr + ")" - - output = WhenOp( - "when", - OrOp("or", [self.lhs.missing(), self.rhs.missing(), EqOp("eq", [self.rhs, ZERO])]), - **{ - "then": self.default, - "else": EsScript(type=NUMBER, expr=script, frum=self) - } - ).partial_eval().to_es_script(schema) - - return output - - -@extend(DivOp) -def to_esfilter(self, schema): - return NotOp("not", self.missing()).partial_eval().to_esfilter(schema) - - -@extend(FloorOp) -def to_es_script(self, schema): - lhs = self.lhs.partial_eval().to_es_script(schema) - rhs = self.rhs.partial_eval().to_es_script(schema) - - if rhs.frum is ONE: - script = "(int)Math.floor(" + lhs.expr + ")" - else: - script = "Math.floor((" + lhs.expr + ") / (" + rhs.expr + "))*(" + rhs.expr + ")" - - output = WhenOp( - "when", - OrOp("or", [lhs.miss, rhs.miss, EqOp("eq", [self.rhs, ZERO])]), - **{ - "then": self.default, - "else": - EsScript( - type=NUMBER, - expr=script, - frum=self, - miss=FALSE - ) - } - ).to_es_script(schema) - return output - - -@extend(FloorOp) -def to_esfilter(self, schema): - Log.error("Logic error") - - -@simplified -@extend(EqOp) -def partial_eval(self): - lhs = self.lhs.partial_eval() - rhs = self.rhs.partial_eval() - return EqOp("eq", [lhs, rhs]) - - -@extend(EqOp) -def to_es_script(self, schema): - return CaseOp("case", [ - WhenOp("when", self.lhs.missing(), **{"then": self.rhs.missing()}), - WhenOp("when", self.rhs.missing(), **{"then": FALSE}), - BasicEqOp("eq", [self.lhs, self.rhs]) - ]).partial_eval().to_es_script(schema) - - -@extend(EqOp) -def to_esfilter(self, schema): - if isinstance(self.lhs, Variable) and isinstance(self.rhs, Literal): - rhs = self.rhs.value - lhs = self.lhs.var - cols = schema.leaves(lhs) - if cols: - lhs = cols[0].es_column - - if isinstance(rhs, list): - if len(rhs) == 1: - return {"term": {lhs: rhs[0]}} - else: - return {"terms": {lhs: rhs}} - else: - return {"term": {lhs: rhs}} - - else: - return CaseOp("case", [ - WhenOp("when", self.lhs.missing(), **{"then": self.rhs.missing()}), - WhenOp("when", self.rhs.missing(), **{"then": FALSE}), - BasicEqOp("eq", [self.lhs, self.rhs]) - ]).partial_eval().to_esfilter(schema) - - -@extend(BasicEqOp) -def to_es_script(self, schema): - lhs = self.lhs.partial_eval().to_es_script(schema) - rhs = self.rhs.partial_eval().to_es_script(schema) - - if lhs.many: - if rhs.many: - return AndOp("and", [ - EsScript(type=BOOLEAN, expr="(" + lhs.expr + ").size()==(" + rhs.expr + ").size()", frum=self), - EsScript(type=BOOLEAN, expr="(" + rhs.expr + ").containsAll(" + lhs.expr + ")", frum=self) - ]).to_es_script(schema) - else: - return EsScript(type=BOOLEAN, expr="(" + lhs.expr + ").contains(" + rhs.expr + ")", frum=self) - elif rhs.many: - return EsScript( - type=BOOLEAN, - expr="(" + rhs.expr + ").contains(" + lhs.expr + ")", - frum=self - ) - else: - return EsScript( - type=BOOLEAN, - expr="(" + lhs.expr + "==" + rhs.expr + ")", - frum=self - ) - - -@extend(BasicEqOp) -def to_esfilter(self, schema): - if isinstance(self.lhs, Variable) and isinstance(self.rhs, Literal): - lhs = self.lhs.var - cols = schema.leaves(lhs) - if cols: - lhs = cols[0].es_column - rhs = self.rhs.value - if isinstance(rhs, list): - if len(rhs) == 1: - return {"term": {lhs: rhs[0]}} - else: - return {"terms": {lhs: rhs}} - else: - return {"term": {lhs: rhs}} - else: - return self.to_es_script(schema).to_esfilter(schema) - - - -@extend(MissingOp) -def to_es_script(self, schema, not_null=False, boolean=True): - if isinstance(self.expr, Variable): - if self.expr.var == "_id": - return EsScript(type=BOOLEAN, expr="false", frum=self) - else: - columns = schema.leaves(self.expr.var) - if len(columns) == 1: - return EsScript(type=BOOLEAN, expr="doc[" + quote(columns[0].es_column) + "].isEmpty()", frum=self) - else: - return AndOp("and", [ - EsScript( - type=BOOLEAN, - expr="doc[" + quote(c.es_column) + "].isEmpty()", - frum=self - ) - for c in columns - ]).partial_eval().to_es_script(schema) - elif isinstance(self.expr, Literal): - return self.expr.missing().to_es_script(schema) - else: - return self.expr.missing().partial_eval().to_es_script(schema) - - -@extend(MissingOp) -def to_esfilter(self, schema): - if isinstance(self.expr, Variable): - cols = schema.leaves(self.expr.var) - if not cols: - return {"match_all": {}} - elif len(cols) == 1: - return es_missing(cols[0].es_column) - else: - return es_and([ - es_missing(c.es_column) for c in cols - ]) - else: - return ScriptOp("script", self.to_es_script(schema).script(schema)).to_esfilter(schema) - - -@extend(NotLeftOp) -def to_es_script(self, schema): - v = StringOp("string", self.value).partial_eval().to_es_script(schema).expr - l = NumberOp("number", self.length).partial_eval().to_es_script(schema).expr - - expr = "(" + v + ").substring((int)Math.max(0, (int)Math.min(" + v + ".length(), " + l + ")))" - return EsScript( - miss=OrOp("or", [self.value.missing(), self.length.missing()]), - type=STRING, - expr=expr, - frum=self - ) - - -@extend(NeOp) -def to_es_script(self, schema): - return CaseOp("case", [ - WhenOp("when", self.lhs.missing(), **{"then": NotOp("not", self.rhs.missing())}), - WhenOp("when", self.rhs.missing(), **{"then": NotOp("not", self.lhs.missing())}), - NotOp("not", BasicEqOp("eq", [self.lhs, self.rhs])) - ]).partial_eval().to_es_script(schema) - - -@extend(NeOp) -def to_esfilter(self, schema): - if isinstance(self.lhs, Variable) and isinstance(self.rhs, Literal): - columns = schema.values(self.lhs.var) - if len(columns) == 0: - return {"match_all": {}} - elif len(columns) == 1: - return es_not({"term": {columns[0].es_column: self.rhs.value}}) - else: - Log.error("column split to multiple, not handled") - else: - lhs = self.lhs.partial_eval().to_es_script(schema) - rhs = self.rhs.partial_eval().to_es_script(schema) - - if lhs.many: - if rhs.many: - return es_not( - ScriptOp( - "script", - ( - "(" + lhs.expr + ").size()==(" + rhs.expr + ").size() && " + - "(" + rhs.expr + ").containsAll(" + lhs.expr + ")" - ) - ).to_esfilter(schema) - ) - else: - return es_not( - ScriptOp("script", "(" + lhs.expr + ").contains(" + rhs.expr + ")").to_esfilter(schema) - ) - else: - if rhs.many: - return es_not( - ScriptOp("script", "(" + rhs.expr + ").contains(" + lhs.expr + ")").to_esfilter(schema) - ) - else: - return es_not( - ScriptOp("script", "(" + lhs.expr + ") != (" + rhs.expr + ")").to_esfilter(schema) - ) - -@extend(NotOp) -def to_es_script(self, schema): - return EsScript( - type=BOOLEAN, - expr="!(" + self.term.to_es_script(schema).expr + ")", - frum=self - ) - - -@extend(NotOp) -def to_esfilter(self, schema): - if isinstance(self.term, MissingOp) and isinstance(self.term.expr, Variable): - v = self.term.expr.var - cols = schema.leaves(v) - if cols: - v = cols[0].es_column - return {"exists": {"field": v}} - else: - operand = self.term.to_esfilter(schema) - return es_not(operand) - - -@extend(AndOp) -def to_es_script(self, schema): - if not self.terms: - return TRUE.to_es_script() - else: - return EsScript( - miss=FALSE, - type=BOOLEAN, - expr=" && ".join("(" + t.to_es_script(schema).expr + ")" for t in self.terms), - frum=self - ) - - -@extend(AndOp) -def to_esfilter(self, schema): - if not len(self.terms): - return {"match_all": {}} - else: - return es_and([t.to_esfilter(schema) for t in self.terms]) - - -@extend(OrOp) -def to_es_script(self, schema): - return EsScript( - miss=FALSE, - type=BOOLEAN, - expr=" || ".join("(" + t.to_es_script(schema).expr + ")" for t in self.terms if t), - frum=self - ) - - -@extend(OrOp) -def to_esfilter(self, schema): - # OR(x) == NOT(AND(NOT(xi) for xi in x)) - output = es_not(es_and([ - NotOp("not", t).partial_eval().to_esfilter(schema) - for t in self.terms - ])) - return output - - # WE REQUIRE EXIT-EARLY SEMANTICS, OTHERWISE EVERY EXPRESSION IS A SCRIPT EXPRESSION - # {"bool":{"should" :[a, b, c]}} RUNS IN PARALLEL - # {"bool":{"must_not":[a, b, c]}} ALSO RUNS IN PARALLEL - - -@extend(LengthOp) -def to_es_script(self, schema): - value = StringOp("string", self.term).to_es_script(schema) - missing = self.term.missing().partial_eval() - return EsScript( - miss=missing, - type=INTEGER, - expr="(" + value.expr + ").length()", - frum=self - ) - - -@extend(FirstOp) -def to_es_script(self, schema): - if isinstance(self.term, Variable): - columns = schema.values(self.term.var) - if len(columns) == 1: - return self.term.to_es_script(schema, many=False) - - term = self.term.to_es_script(schema) - - if isinstance(term.frum, CoalesceOp): - return CoalesceOp("coalesce", [FirstOp("first", t.partial_eval().to_es_script(schema)) for t in term.frum.terms]).to_es_script(schema) - - if term.many: - return EsScript( - miss=term.miss, - type=term.type, - expr="(" + term.expr + ")[0]", - frum=term.frum - ).to_es_script(schema) - else: - return term - - -@extend(BooleanOp) -def to_es_script(self, schema): - value = self.term.to_es_script(schema) - if value.many: - return BooleanOp("boolean", EsScript( - miss=value.miss, - type=value.type, - expr="(" + value.expr + ")[0]", - frum=value.frum - )).to_es_script(schema) - elif value.type == BOOLEAN: - miss = value.miss - value.miss = FALSE - return WhenOp("when", miss, **{"then": FALSE, "else": value}).partial_eval().to_es_script(schema) - else: - return NotOp("not", value.miss).partial_eval().to_es_script(schema) - -@extend(BooleanOp) -def to_esfilter(self, schema): - if isinstance(self.term, Variable): - return {"term": {self.term.var: True}} - else: - return self.to_es_script(schema).to_esfilter(schema) - - -@extend(IntegerOp) -def to_es_script(self, schema): - value = self.term.to_es_script(schema) - if value.many: - return IntegerOp("integer", EsScript( - miss=value.missing, - type=value.type, - expr="(" + value.expr + ")[0]", - frum=value.frum - )).to_es_script(schema) - elif value.type == BOOLEAN: - return EsScript( - miss=value.missing, - type=INTEGER, - expr=value.expr + " ? 1 : 0", - frum=self - ) - elif value.type == INTEGER: - return value - elif value.type == NUMBER: - return EsScript( - miss=value.missing, - type=INTEGER, - expr="(int)(" + value.expr + ")", - frum=self - ) - elif value.type == STRING: - return EsScript( - miss=value.missing, - type=INTEGER, - expr="Integer.parseInt(" + value.expr + ")", - frum=self - ) - else: - return EsScript( - miss=value.missing, - type=INTEGER, - expr="((" + value.expr + ") instanceof String) ? Integer.parseInt(" + value.expr + ") : (int)(" + value.expr + ")", - frum=self - ) - -@extend(NumberOp) -def to_es_script(self, schema): - term = FirstOp("first", self.term).partial_eval() - value = term.to_es_script(schema) - - if isinstance(value.frum, CoalesceOp): - return CoalesceOp("coalesce", [NumberOp("number", t).partial_eval().to_es_script(schema) for t in value.frum.terms]).to_es_script(schema) - - if value.type == BOOLEAN: - return EsScript( - miss=term.missing().partial_eval(), - type=NUMBER, - expr=value.expr + " ? 1 : 0", - frum=self - ) - elif value.type == INTEGER: - return EsScript( - miss=term.missing().partial_eval(), - type=NUMBER, - expr=value.expr, - frum=self - ) - elif value.type == NUMBER: - return EsScript( - miss=term.missing().partial_eval(), - type=NUMBER, - expr=value.expr, - frum=self - ) - elif value.type == STRING: - return EsScript( - miss=term.missing().partial_eval(), - type=NUMBER, - expr="Double.parseDouble(" + value.expr + ")", - frum=self - ) - elif value.type == OBJECT: - return EsScript( - miss=term.missing().partial_eval(), - type=NUMBER, - expr="((" + value.expr + ") instanceof String) ? Double.parseDouble(" + value.expr + ") : (" + value.expr + ")", - frum=self - ) - - -@extend(IsNumberOp) -def to_es_script(self, schema): - value = self.term.to_es_script(schema) - if value.expr or value.i: - return TRUE.to_es_script(schema) - else: - return EsScript( - miss=FALSE, - type=BOOLEAN, - expr="(" + value.expr + ") instanceof java.lang.Double", - frum=self - ) - -@extend(CountOp) -def to_es_script(self, schema): - return EsScript( - miss=FALSE, - type=INTEGER, - expr="+".join("((" + t.missing().partial_eval().to_es_script(schema).expr + ") ? 0 : 1)" for t in self.terms), - frum=self - ) - - -@extend(LengthOp) -def to_esfilter(self, schema): - return {"regexp": {self.var.var: self.pattern.value}} - - -@extend(MaxOp) -def to_es_script(self, schema): - acc = NumberOp("number", self.terms[-1]).partial_eval().to_es_script(schema).expr - for t in reversed(self.terms[0:-1]): - acc = "Math.max(" + NumberOp("number", t).partial_eval().to_es_script(schema).expr + " , " + acc + ")" - return EsScript( - miss=AndOp("or", [t.missing() for t in self.terms]), - type=NUMBER, - expr=acc, - frum=self - ) - - -@extend(MinOp) -def to_es_script(self, schema): - acc = NumberOp("number", self.terms[-1]).partial_eval().to_es_script(schema).expr - for t in reversed(self.terms[0:-1]): - acc = "Math.min(" + NumberOp("number", t).partial_eval().to_es_script(schema).expr + " , " + acc + ")" - return EsScript( - miss=AndOp("or", [t.missing() for t in self.terms]), - type=NUMBER, - expr=acc, - frum=self - ) - - -_painless_operators = { - "add": (" + ", "0"), # (operator, zero-array default value) PAIR - "sum": (" + ", "0"), - "mul": (" * ", "1"), - "mult": (" * ", "1"), - "multiply": (" * ", "1") -} - - -@extend(MultiOp) -def to_es_script(self, schema): - op, unit = _painless_operators[self.op] - if self.nulls: - calc = op.join( - "((" + t.missing().to_es_script(schema).expr + ") ? " + unit + " : (" + NumberOp("number", t).partial_eval().to_es_script(schema).expr + "))" - for t in self.terms - ) - return WhenOp( - "when", - AndOp("and", [t.missing() for t in self.terms]), - **{"then": self.default, "else": EsScript(type=NUMBER, expr=calc, frum=self)} - ).partial_eval().to_es_script(schema) - else: - calc = op.join( - "(" + NumberOp("number", t).to_es_script(schema).expr + ")" - for t in self.terms - ) - return WhenOp( - "when", - OrOp("or", [t.missing() for t in self.terms]), - **{"then": self.default, "else": EsScript(type=NUMBER, expr=calc, frum=self)} - ).partial_eval().to_es_script(schema) - - -@extend(RegExpOp) -def to_esfilter(self, schema): - if isinstance(self.pattern, Literal) and isinstance(self.var, Variable): - cols = schema.leaves(self.var.var) - if len(cols) == 0: - return MATCH_NONE - elif len(cols) == 1: - return {"regexp": {cols[0].es_column: self.pattern.value}} - else: - Log.error("regex on not supported ") - else: - Log.error("regex only accepts a variable and literal pattern") - - -@extend(StringOp) -def to_es_script(self, schema): - term = FirstOp("first", self.term).partial_eval() - value = term.to_es_script(schema) - - if isinstance(value.frum, CoalesceOp): - return CoalesceOp("coalesce", [StringOp("string", t).partial_eval() for t in value.frum.terms]).to_es_script(schema) - - if value.type == BOOLEAN: - return EsScript( - miss=self.term.missing().partial_eval(), - type=STRING, - expr=value.expr + ' ? "T" : "F"', - frum=self - ) - elif value.type == INTEGER: - return EsScript( - miss=self.term.missing().partial_eval(), - type=STRING, - expr="String.valueOf(" + value.expr + ")", - frum=self - ) - elif value.type == NUMBER: - return EsScript( - miss=self.term.missing().partial_eval(), - type=STRING, - expr=expand_template(TO_STRING, {"expr":value.expr}), - frum=self - ) - elif value.type == STRING: - return value - else: - return EsScript( - miss=self.term.missing().partial_eval(), - type=STRING, - expr=expand_template(TO_STRING, {"expr":value.expr}), - frum=self - ) - - # ((Runnable)(() -> {int a=2; int b=3; System.out.println(a+b);})).run(); - # "((Runnable)((value) -> {String output=String.valueOf(value); if (output.endsWith('.0')) {return output.substring(0, output.length-2);} else return output;})).run(" + value.expr + ")" - - -@extend(TrueOp) -def to_es_script(self, schema): - return EsScript(type=BOOLEAN, expr="true", frum=self) - - -@extend(TrueOp) -def to_esfilter(self, schema): - return {"match_all": {}} - - -@extend(PrefixOp) -def to_es_script(self, schema): - if not self.field: - return "true" - else: - return "(" + self.field.to_es_script(schema) + ").startsWith(" + self.prefix.to_es_script(schema) + ")" - - -@extend(PrefixOp) -def to_esfilter(self, schema): - if not self.expr: - return {"match_all": {}} - elif isinstance(self.expr, Variable) and isinstance(self.prefix, Literal): - var = schema.leaves(self.expr.var)[0].es_column - return {"prefix": {var: self.prefix.value}} - else: - return ScriptOp("script", self.to_es_script(schema).script(schema)).to_esfilter(schema) - -@extend(SuffixOp) -def to_es_script(self, schema): - if not self.suffix: - return "true" - else: - return "(" + self.expr.to_es_script(schema) + ").endsWith(" + self.suffix.to_es_script(schema) + ")" - - -@extend(SuffixOp) -def to_esfilter(self, schema): - if not self.suffix: - return {"match_all": {}} - elif isinstance(self.expr, Variable) and isinstance(self.suffix, Literal): - var = schema.leaves(self.expr.var)[0].es_column - return {"regexp": {var: ".*"+string2regexp(self.suffix.value)}} - else: - return ScriptOp("script", self.to_es_script(schema).script(schema)).to_esfilter(schema) - - -@extend(InOp) -def to_es_script(self, schema): - superset = self.superset.to_es_script(schema) - value = self.value.to_es_script(schema) - return EsScript( - type=BOOLEAN, - expr="(" + superset.expr + ").contains(" + value.expr + ")", - frum=self - ) - - -@extend(InOp) -def to_esfilter(self, schema): - if isinstance(self.value, Variable): - var = self.value.var - cols = schema.leaves(var) - if cols: - var = cols[0].es_column - return {"terms": {var: self.superset.value}} - else: - return ScriptOp("script", self.to_es_script(schema).script(schema)).to_esfilter(schema) - - -@extend(ScriptOp) -def to_es_script(self, schema): - return EsScript(type=self.data_type, expr=self.script, frum=self) - - -@extend(ScriptOp) -def to_esfilter(self, schema): - return {"script": es_script(self.script)} - - -@extend(Variable) -def to_es_script(self, schema, many=True): - if self.var == ".": - return "_source" - else: - if self.var == "_id": - return EsScript(type=STRING, expr='doc["_uid"].value.substring(doc["_uid"].value.indexOf(\'#\')+1)', frum=self) - - columns = schema.values(self.var) - acc = [] - for c in columns: - varname = c.es_column - frum = Variable(c.es_column) - q = quote(varname) - if many: - acc.append(EsScript( - miss=frum.missing(), - type=c.jx_type, - expr="doc[" + q + "].values" if c.jx_type != BOOLEAN else "doc[" + q + "].value==\"T\"", - frum=frum, - many=True - )) - else: - acc.append(EsScript( - miss=frum.missing(), - type=c.jx_type, - expr="doc[" + q + "].value" if c.jx_type != BOOLEAN else "doc[" + q + "].value==\"T\"", - frum=frum, - many=True - )) - - if len(acc) == 0: - return NULL.to_es_script(schema) - elif len(acc) == 1: - return acc[0] - else: - return CoalesceOp("coalesce", acc).to_es_script(schema) - - -@extend(WhenOp) -def to_es_script(self, schema): - if self.simplified: - when = self.when.to_es_script(schema) - then = self.then.to_es_script(schema) - els_ = self.els_.to_es_script(schema) - - if when is TRUE: - return then - elif when is FALSE: - return els_ - elif then.miss is TRUE: - return EsScript( - miss=self.missing(), - type=els_.type, - expr=els_.expr, - frum=self - ) - elif els_.miss is TRUE: - return EsScript( - miss=self.missing(), - type=then.type, - expr=then.expr, - frum=self - ) - - elif then.type == els_.type: - return EsScript( - miss=self.missing(), - type=then.type, - expr="(" + when.expr + ") ? (" + then.expr + ") : (" + els_.expr + ")", - frum=self - ) - elif then.type in (INTEGER, NUMBER) and els_.type in (INTEGER, NUMBER): - return EsScript( - miss=self.missing(), - type=NUMBER, - expr="(" + when.expr + ") ? (" + then.expr + ") : (" + els_.expr + ")", - frum=self - ) - else: - Log.error("do not know how to handle") - else: - return self.partial_eval().to_es_script(schema) - - -@extend(WhenOp) -def to_esfilter(self, schema): - output = OrOp("or", [ - AndOp("and", [self.when, BooleanOp("boolean", self.then)]), - AndOp("and", [NotOp("not", self.when), BooleanOp("boolean", self.els_)]) - ]).partial_eval() - - return output.to_esfilter(schema) - - -@extend(BasicIndexOfOp) -def to_es_script(self, schema): - v = StringOp("string", self.value).to_es_script(schema).expr - find = StringOp("string", self.find).to_es_script(schema).expr - start = IntegerOp("integer", self.start).to_es_script(schema).expr - - return EsScript( - miss=FALSE, - type=INTEGER, - expr="(" + v + ").indexOf(" + find + ", " + start + ")", - frum=self - ) - - -@extend(BasicIndexOfOp) -def to_esfilter(self, schema): - return ScriptOp("", self.to_es_script(schema).script(schema)).to_esfilter(schema) - - -@extend(BasicSubstringOp) -def to_es_script(self, schema): - v = StringOp("string", self.value).partial_eval().to_es_script(schema).expr - start = IntegerOp("string", self.start).partial_eval().to_es_script(schema).expr - end = IntegerOp("integer", self.end).partial_eval().to_es_script(schema).expr - - return EsScript( - miss=FALSE, - type=STRING, - expr="(" + v + ").substring(" + start + ", " + end + ")", - frum=self - ) - - - -MATCH_ALL = wrap({"match_all": {}}) -MATCH_NONE = es_not({"match_all": {}}) - - -def simplify_esfilter(esfilter): - try: - output = wrap(_normalize(wrap(esfilter))) - output.isNormal = None - return output - except Exception as e: - from mo_logs import Log - - Log.unexpected("programmer error", cause=e) - - -def _normalize(esfilter): - """ - TODO: DO NOT USE Data, WE ARE SPENDING TOO MUCH TIME WRAPPING/UNWRAPPING - REALLY, WE JUST COLLAPSE CASCADING `and` AND `or` FILTERS - """ - if esfilter == MATCH_ALL or esfilter == MATCH_NONE or esfilter.isNormal: - return esfilter - - # Log.note("from: " + convert.value2json(esfilter)) - isDiff = True - - while isDiff: - isDiff = False - - if esfilter['and']: - terms = esfilter['and'] - for (i0, t0), (i1, t1) in itertools.product(enumerate(terms), enumerate(terms)): - if i0 == i1: - continue # SAME, IGNORE - # TERM FILTER ALREADY ASSUMES EXISTENCE - with suppress_exception: - if t0.exists.field != None and t0.exists.field == t1.term.items()[0][0]: - terms[i0] = MATCH_ALL - continue - - # IDENTICAL CAN BE REMOVED - with suppress_exception: - if t0 == t1: - terms[i0] = MATCH_ALL - continue - - # MERGE range FILTER WITH SAME FIELD - if i0 > i1: - continue # SAME, IGNORE - with suppress_exception: - f0, tt0 = t0.range.items()[0] - f1, tt1 = t1.range.items()[0] - if f0 == f1: - set_default(terms[i0].range[literal_field(f1)], tt1) - terms[i1] = MATCH_ALL - - output = [] - for a in terms: - if isinstance(a, (list, set)): - from mo_logs import Log - - Log.error("and clause is not allowed a list inside a list") - a_ = _normalize(a) - if a_ is not a: - isDiff = True - a = a_ - if a == MATCH_ALL: - isDiff = True - continue - if a == MATCH_NONE: - return MATCH_NONE - if a['and']: - isDiff = True - a.isNormal = None - output.extend(a['and']) - else: - a.isNormal = None - output.append(a) - if not output: - return MATCH_ALL - elif len(output) == 1: - # output[0].isNormal = True - esfilter = output[0] - break - elif isDiff: - esfilter = es_and(output) - continue - - if esfilter.bool.should: - output = [] - for a in esfilter.bool.should: - a_ = _normalize(a) - if a_ is not a: - isDiff = True - a = a_ - - if a.bool.should: - a.isNormal = None - isDiff = True - output.extend(a.bool.should) - else: - a.isNormal = None - output.append(a) - if not output: - return MATCH_NONE - elif len(output) == 1: - esfilter = output[0] - break - elif isDiff: - esfilter = wrap({"bool": {"should": output}}) - continue - - if esfilter.term != None: - if esfilter.term.keys(): - esfilter.isNormal = True - return esfilter - else: - return MATCH_ALL - - if esfilter.terms: - for k, v in esfilter.terms.items(): - if len(v) > 0: - if OR(vv == None for vv in v): - rest = [vv for vv in v if vv != None] - if len(rest) > 0: - output = es_or([ - es_missing(k), - {"terms": {k: rest}} - ]) - else: - output = es_missing(k) - output.isNormal = True - return output - else: - esfilter.isNormal = True - return esfilter - return MATCH_NONE - - if esfilter['not']: - _sub = esfilter['not'] - sub = _normalize(_sub) - if sub == MATCH_NONE: - return MATCH_ALL - elif sub == MATCH_ALL: - return MATCH_NONE - elif sub is not _sub: - sub.isNormal = None - return wrap({"not": sub, "isNormal": True}) - else: - sub.isNormal = None - - esfilter.isNormal = True - return esfilter - - -def split_expression_by_depth(where, schema, output=None, var_to_depth=None): - """ - :param where: EXPRESSION TO INSPECT - :param schema: THE SCHEMA - :param output: - :param var_to_depth: MAP FROM EACH VARIABLE NAME TO THE DEPTH - :return: - """ - """ - It is unfortunate that ES can not handle expressions that - span nested indexes. This will split your where clause - returning {"and": [filter_depth0, filter_depth1, ...]} - """ - vars_ = where.vars() - - if var_to_depth is None: - if not vars_: - return Null - # MAP VARIABLE NAMES TO HOW DEEP THEY ARE - var_to_depth = {v.var: max(len(c.nested_path) - 1, 0) for v in vars_ for c in schema[v.var]} - all_depths = set(var_to_depth.values()) - # if -1 in all_depths: - # Log.error( - # "Can not find column with name {{column|quote}}", - # column=unwraplist([k for k, v in var_to_depth.items() if v == -1]) - # ) - if len(all_depths) == 0: - all_depths = {0} - output = wrap([[] for _ in range(MAX(all_depths) + 1)]) - else: - all_depths = set(var_to_depth[v.var] for v in vars_) - - if len(all_depths) == 1: - output[list(all_depths)[0]] += [where] - elif isinstance(where, AndOp): - for a in where.terms: - split_expression_by_depth(a, schema, output, var_to_depth) - else: - Log.error("Can not handle complex where clause") - - return output - - -def get_type(var_name): - type_ = var_name.split(".$")[1:] - if not type_: - return "j" - return json_type_to_es_script_type.get(type_[0], "j") - - -json_type_to_es_script_type = { - "string": "s", - "boolean": "b", - "number": "n" -} diff --git a/vendor/jx_elasticsearch/es14/format.py b/vendor/jx_elasticsearch/es14/format.py deleted file mode 100644 index 86618e3..0000000 --- a/vendor/jx_elasticsearch/es14/format.py +++ /dev/null @@ -1,316 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from jx_base.expressions import TupleOp -from jx_elasticsearch.es14.aggs import count_dim, aggs_iterator, format_dispatch, drill -from jx_python.containers.cube import Cube -from mo_collections.matrix import Matrix -from mo_dots import Data, set_default, wrap, split_field, coalesce -from mo_future import sort_using_key -from mo_logs import Log -from mo_logs.strings import quote -from pyLibrary import convert - -FunctionType = type(lambda: 1) - -def format_cube(decoders, aggs, start, query, select): - # decoders = sorted(decoders, key=lambda d: -d.edge.dim) # REVERSE DECODER ORDER, BECAUSE ES QUERY WAS BUILT IN REVERSE ORDER - new_edges = count_dim(aggs, decoders) - - dims = [] - for e in new_edges: - if isinstance(e.value, TupleOp): - e.allowNulls = False - - extra = 0 if e.allowNulls is False else 1 - dims.append(len(e.domain.partitions) + extra) - - dims = tuple(dims) - matricies = [(s, Matrix(dims=dims, zeros=s.default)) for s in select] - for row, coord, agg in aggs_iterator(aggs, decoders): - for s, m in matricies: - try: - v = s.pull(agg) - m[coord] = v - except Exception as e: - # THIS HAPPENS WHEN ES RETURNS MORE TUPLE COMBINATIONS THAN DOCUMENTS - if agg.get('doc_count') != 0: - Log.error("Programmer error", cause=e) - - cube = Cube( - query.select, - sort_using_key(new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY - {s.name: m for s, m in matricies} - ) - cube.frum = query - return cube - - -def format_cube_from_aggop(decoders, aggs, start, query, select): - agg = drill(aggs) - matricies = [(s, Matrix(dims=[], zeros=s.default)) for s in select] - for s, m in matricies: - m[tuple()] = s.pull(agg) - cube = Cube(query.select, [], {s.name: m for s, m in matricies}) - cube.frum = query - return cube - - -def format_table(decoders, aggs, start, query, select): - new_edges = count_dim(aggs, decoders) - header = new_edges.name + select.name - - def data(): - dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) - is_sent = Matrix(dims=dims, zeros=0) - - if query.sort and not query.groupby: - all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS - for row, coord, agg in aggs_iterator(aggs, decoders): - missing_coord = all_coord.next() - while coord != missing_coord: - record = [d.get_value(missing_coord[i]) for i, d in enumerate(decoders)] - for s in select: - if s.aggregate == "count": - record.append(0) - else: - record.append(None) - yield record - missing_coord = all_coord.next() - - output = [d.get_value(c) for c, d in zip(coord, decoders)] - for s in select: - output.append(s.pull(agg)) - yield output - else: - for row, coord, agg in aggs_iterator(aggs, decoders): - is_sent[coord] = 1 - - output = [d.get_value(c) for c, d in zip(coord, decoders)] - for s in select: - output.append(s.pull(agg)) - yield output - - # EMIT THE MISSING CELLS IN THE CUBE - if not query.groupby: - for c, v in is_sent: - if not v: - record = [d.get_value(c[i]) for i, d in enumerate(decoders)] - for s in select: - if s.aggregate == "count": - record.append(0) - else: - record.append(None) - yield record - - return Data( - meta={"format": "table"}, - header=header, - data=list(data()) - ) - - -def format_table_from_groupby(decoders, aggs, start, query, select): - header = [d.edge.name.replace("\\.", ".") for d in decoders] + select.name - - def data(): - for row, coord, agg in aggs_iterator(aggs, decoders): - if agg.get('doc_count', 0) == 0: - continue - output = [d.get_value_from_row(row) for d in decoders] - for s in select: - output.append(s.pull(agg)) - yield output - - return Data( - meta={"format": "table"}, - header=header, - data=list(data()) - ) - - -def format_table_from_aggop(decoders, aggs, start, query, select): - header = select.name - agg = drill(aggs) - row = [] - for s in select: - row.append(s.pull(agg)) - - return Data( - meta={"format": "table"}, - header=header, - data=[row] - ) - - -def format_tab(decoders, aggs, start, query, select): - table = format_table(decoders, aggs, start, query, select) - - def data(): - yield "\t".join(map(quote, table.header)) - for d in table.data: - yield "\t".join(map(quote, d)) - - return data() - - -def format_csv(decoders, aggs, start, query, select): - table = format_table(decoders, aggs, start, query, select) - - def data(): - yield ", ".join(map(quote, table.header)) - for d in table.data: - yield ", ".join(map(quote, d)) - - return data() - - -def format_list_from_groupby(decoders, aggs, start, query, select): - def data(): - for row, coord, agg in aggs_iterator(aggs, decoders): - if agg.get('doc_count', 0) == 0: - continue - output = Data() - for g, d in zip(query.groupby, decoders): - output[coalesce(g.put.name, g.name)] = d.get_value_from_row(row) - - for s in select: - output[s.name] = s.pull(agg) - yield output - - for g in query.groupby: - g.put.name = coalesce(g.put.name, g.name) - - output = Data( - meta={"format": "list"}, - data=list(data()) - ) - return output - - -def format_list(decoders, aggs, start, query, select): - new_edges = count_dim(aggs, decoders) - - def data(): - dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) - - is_sent = Matrix(dims=dims, zeros=0) - if query.sort and not query.groupby: - # TODO: USE THE format_table() TO PRODUCE THE NEEDED VALUES INSTEAD OF DUPLICATING LOGIC HERE - all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS - for _, coord, agg in aggs_iterator(aggs, decoders): - missing_coord = all_coord.next() - while coord != missing_coord: - # INSERT THE MISSING COORDINATE INTO THE GENERATION - output = Data() - for i, d in enumerate(decoders): - output[query.edges[i].name] = d.get_value(missing_coord[i]) - - for s in select: - if s.aggregate == "count": - output[s.name] = 0 - yield output - missing_coord = all_coord.next() - - output = Data() - for e, c, d in zip(query.edges, coord, decoders): - output[e.name] = d.get_value(c) - - for s in select: - output[s.name] = s.pull(agg) - yield output - else: - - for row, coord, agg in aggs_iterator(aggs, decoders): - is_sent[coord] = 1 - - output = Data() - for e, c, d in zip(query.edges, coord, decoders): - output[e.name] = d.get_value(c) - - for s in select: - output[s.name] = s.pull(agg) - yield output - - # EMIT THE MISSING CELLS IN THE CUBE - if not query.groupby: - for c, v in is_sent: - if not v: - output = Data() - for i, d in enumerate(decoders): - output[query.edges[i].name] = d.get_value(c[i]) - - for s in select: - if s.aggregate == "count": - output[s.name] = 0 - yield output - - output = Data( - meta={"format": "list"}, - data=list(data()) - ) - return output - - -def format_list_from_aggop(decoders, aggs, start, query, select): - agg = drill(aggs) - - if isinstance(query.select, list): - item = Data() - for s in select: - item[s.name] = s.pull(agg) - else: - item = select[0].pull(agg) - - if query.edges or query.groupby: - return wrap({ - "meta": {"format": "list"}, - "data": [item] - }) - else: - return wrap({ - "meta": {"format": "value"}, - "data": item - }) - - -def format_line(decoders, aggs, start, query, select): - list = format_list(decoders, aggs, start, query, select) - - def data(): - for d in list.data: - yield convert.value2json(d) - - return data() - - -set_default(format_dispatch, { - None: (format_cube, format_table_from_groupby, format_cube_from_aggop, "application/json"), - "cube": (format_cube, format_cube, format_cube_from_aggop, "application/json"), - "table": (format_table, format_table_from_groupby, format_table_from_aggop, "application/json"), - "list": (format_list, format_list_from_groupby, format_list_from_aggop, "application/json"), - # "csv": (format_csv, format_csv_from_groupby, "text/csv"), - # "tab": (format_tab, format_tab_from_groupby, "text/tab-separated-values"), - # "line": (format_line, format_line_from_groupby, "application/json") -}) - - -def _get(v, k, d): - for p in split_field(k): - try: - v = v.get(p) - if v is None: - return d - except Exception: - v = [vv.get(p) for vv in v] - return v diff --git a/vendor/jx_elasticsearch/es14/setop.py b/vendor/jx_elasticsearch/es14/setop.py deleted file mode 100644 index 212176b..0000000 --- a/vendor/jx_elasticsearch/es14/setop.py +++ /dev/null @@ -1,378 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from collections import Mapping - -from jx_base.domains import ALGEBRAIC -from jx_base.expressions import IDENTITY -from jx_base.query import DEFAULT_LIMIT -from jx_elasticsearch import post as es_post -from jx_elasticsearch.es14.expressions import Variable, LeavesOp -from jx_elasticsearch.es14.util import jx_sort_to_es_sort, es_query_template, es_and, es_or, es_script -from jx_python.containers.cube import Cube -from jx_python.expressions import jx_expression_to_function -from mo_collections.matrix import Matrix -from mo_dots import coalesce, split_field, set_default, Data, unwraplist, literal_field, unwrap, wrap, concat_field, relative_field, join_field, listwrap -from mo_dots.lists import FlatList -from mo_json.typed_encoder import NESTED -from mo_json.typed_encoder import untype_path, unnest_path, untyped -from mo_logs import Log -from mo_math import AND -from mo_math import MAX -from mo_times.timer import Timer - -format_dispatch = {} - - -def is_setop(es, query): - select = listwrap(query.select) - - if not query.edges: - isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT - simpleAgg = AND([s.aggregate in ("count", "none") for s in select]) # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT - - # NO EDGES IMPLIES SIMPLER QUERIES: EITHER A SET OPERATION, OR RETURN SINGLE AGGREGATE - if simpleAgg or isDeep: - return True - else: - isSmooth = AND((e.domain.type in ALGEBRAIC and e.domain.interval == "none") for e in query.edges) - if isSmooth: - return True - - return False - - -def es_setop(es, query): - schema = query.frum.schema - - es_query, filters = es_query_template(schema.query_path[0]) - nested_filter = None - set_default(filters[0], query.where.partial_eval().to_esfilter(schema)) - es_query.size = coalesce(query.limit, DEFAULT_LIMIT) - es_query.fields = FlatList() - - selects = wrap([s.copy() for s in listwrap(query.select)]) - new_select = FlatList() - schema = query.frum.schema - # columns = schema.columns - # nested_columns = set(c.names["."] for c in columns if c.nested_path[0] != ".") - - es_query.sort = jx_sort_to_es_sort(query.sort, schema) - - put_index = 0 - for select in selects: - # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS - if isinstance(select.value, LeavesOp) and isinstance(select.value.term, Variable): - term = select.value.term - leaves = schema.leaves(term.var) - for c in leaves: - full_name = concat_field(select.name, relative_field(untype_path(c.names["."]), term.var)) - if c.jx_type == NESTED: - es_query.fields = ["_source"] - new_select.append({ - "name": full_name, - "value": Variable(c.es_column), - "put": {"name": literal_field(full_name), "index": put_index, "child": "."}, - "pull": get_pull_source(c.es_column) - }) - put_index += 1 - elif c.nested_path[0] != ".": - pass # THE NESTED PARENT WILL CAPTURE THIS - else: - es_query.fields += [c.es_column] - new_select.append({ - "name": full_name, - "value": Variable(c.es_column), - "put": {"name": literal_field(full_name), "index": put_index, "child": "."} - }) - put_index += 1 - elif isinstance(select.value, Variable): - s_column = select.value.var - # LEAVES OF OBJECT - leaves = schema.leaves(s_column) - nested_selects = {} - if leaves: - if s_column == '.' or any(c.jx_type == NESTED for c in leaves): - # PULL WHOLE NESTED ARRAYS - es_query.fields = ["_source"] - for c in leaves: - if len(c.nested_path) == 1: - jx_name = untype_path(c.names["."]) - new_select.append({ - "name": select.name, - "value": Variable(c.es_column), - "put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)}, - "pull": get_pull_source(c.es_column) - }) - else: - # PULL ONLY WHAT'S NEEDED - for c in leaves: - if len(c.nested_path) == 1: - jx_name = untype_path(c.names["."]) - if c.jx_type == NESTED: - es_query.fields = ["_source"] - new_select.append({ - "name": select.name, - "value": Variable(c.es_column), - "put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)}, - "pull": get_pull_source(c.es_column) - }) - - else: - es_query.fields += [c.es_column] - new_select.append({ - "name": select.name, - "value": Variable(c.es_column), - "put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)} - }) - else: - if not nested_filter: - where = filters[0].copy() - nested_filter = [where] - for k in filters[0].keys(): - filters[0][k] = None - set_default( - filters[0], - es_and([where, es_or(nested_filter)]) - ) - - nested_path = c.nested_path[0] - if nested_path not in nested_selects: - where = nested_selects[nested_path] = Data() - nested_filter += [where] - where.nested.path = nested_path - where.nested.query.match_all = {} - where.nested.inner_hits._source = False - where.nested.inner_hits.fields += [c.es_column] - - child = relative_field(untype_path(c.names[schema.query_path[0]]), s_column) - pull = accumulate_nested_doc(nested_path, Variable(relative_field(s_column, unnest_path(nested_path)))) - new_select.append({ - "name": select.name, - "value": select.value, - "put": { - "name": select.name, - "index": put_index, - "child": child - }, - "pull": pull - }) - else: - nested_selects[nested_path].nested.inner_hits.fields += [c.es_column] - else: - new_select.append({ - "name": select.name, - "value": Variable("$dummy"), - "put": {"name": select.name, "index": put_index, "child": "."} - }) - put_index += 1 - else: - painless = select.value.partial_eval().to_es_script(schema) - es_query.script_fields[literal_field(select.name)] = es_script(painless.script(schema)) - new_select.append({ - "name": select.name, - "pull": jx_expression_to_function("fields." + literal_field(select.name)), - "put": {"name": select.name, "index": put_index, "child": "."} - }) - put_index += 1 - - for n in new_select: - if n.pull: - continue - elif isinstance(n.value, Variable): - if es_query.fields[0] == "_source": - es_query.fields = ["_source"] - n.pull = get_pull_source(n.value.var) - else: - n.pull = jx_expression_to_function(concat_field("fields", literal_field(n.value.var))) - else: - Log.error("Do not know what to do") - - with Timer("call to ES") as call_timer: - Log.note("{{data}}", data=es_query) - data = es_post(es, es_query, query.limit) - - T = data.hits.hits - - try: - formatter, groupby_formatter, mime_type = format_dispatch[query.format] - - output = formatter(T, new_select, query) - output.meta.timing.es = call_timer.duration - output.meta.content_type = mime_type - output.meta.es_query = es_query - return output - except Exception as e: - Log.error("problem formatting", e) - - -def accumulate_nested_doc(nested_path, expr=IDENTITY): - """ - :param nested_path: THE PATH USED TO EXTRACT THE NESTED RECORDS - :param expr: FUNCTION USED ON THE NESTED OBJECT TO GET SPECIFIC VALUE - :return: THE DE_TYPED NESTED OBJECT ARRAY - """ - name = literal_field(nested_path) - def output(doc): - acc = [] - for h in doc.inner_hits[name].hits.hits: - i = h._nested.offset - obj = Data() - for f, v in h.fields.items(): - local_path = untype_path(relative_field(f, nested_path)) - obj[local_path] = unwraplist(v) - # EXTEND THE LIST TO THE LENGTH WE REQUIRE - for _ in range(len(acc), i+1): - acc.append(None) - acc[i] = expr(obj) - return acc - return output - - -def format_list(T, select, query=None): - data = [] - if isinstance(query.select, list): - for row in T: - r = Data() - for s in select: - v = s.pull(row) - r[s.put.name][s.put.child] = unwraplist(v) - data.append(r if r else None) - elif isinstance(query.select.value, LeavesOp): - for row in T: - r = Data() - for s in select: - r[s.put.name][s.put.child] = unwraplist(s.pull(row)) - data.append(r if r else None) - else: - for row in T: - r = None - for s in select: - v = unwraplist(s.pull(row)) - if v is None: - continue - if s.put.child == ".": - r = v - else: - if r is None: - r = Data() - r[s.put.child] = v - - data.append(r) - - return Data( - meta={"format": "list"}, - data=data - ) - - -def format_table(T, select, query=None): - data = [] - num_columns = (MAX(select.put.index) + 1) - for row in T: - r = [None] * num_columns - for s in select: - value = unwraplist(s.pull(row)) - - if value == None: - continue - - index, child = s.put.index, s.put.child - if child == ".": - r[index] = value - else: - if r[index] is None: - r[index] = Data() - r[index][child] = value - - data.append(r) - - header = [None] * num_columns - - if isinstance(query.select, Mapping) and not isinstance(query.select.value, LeavesOp): - for s in select: - header[s.put.index] = s.name - else: - for s in select: - if header[s.put.index]: - continue - if s.name == ".": - header[s.put.index] = "." - else: - header[s.put.index] = s.name - - return Data( - meta={"format": "table"}, - header=header, - data=data - ) - - -def format_cube(T, select, query=None): - table = format_table(T, select, query) - - if len(table.data) == 0: - return Cube( - select, - edges=[{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": 0, "interval": 1}}], - data={h: Matrix(list=[]) for i, h in enumerate(table.header)} - ) - - cols = transpose(*unwrap(table.data)) - return Cube( - select, - edges=[{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(table.data), "interval": 1}}], - data={h: Matrix(list=cols[i]) for i, h in enumerate(table.header)} - ) - - -set_default(format_dispatch, { - None: (format_cube, None, "application/json"), - "cube": (format_cube, None, "application/json"), - "table": (format_table, None, "application/json"), - "list": (format_list, None, "application/json") -}) - - -def get_pull(column): - if column.nested_path[0] == ".": - return concat_field("fields", literal_field(column.es_column)) - else: - depth = len(split_field(column.nested_path[0])) - rel_name = split_field(column.es_column)[depth:] - return join_field(["_inner"] + rel_name) - - -def get_pull_function(column): - return jx_expression_to_function(get_pull(column)) - - -def get_pull_source(es_column): - def output(row): - return untyped(row._source[es_column]) - return output - - -def get_pull_stats(stats_name, median_name): - return jx_expression_to_function({"select": [ - {"name": "count", "value": stats_name + ".count"}, - {"name": "sum", "value": stats_name + ".sum"}, - {"name": "min", "value": stats_name + ".min"}, - {"name": "max", "value": stats_name + ".max"}, - {"name": "avg", "value": stats_name + ".avg"}, - {"name": "sos", "value": stats_name + ".sum_of_squares"}, - {"name": "std", "value": stats_name + ".std_deviation"}, - {"name": "var", "value": stats_name + ".variance"}, - {"name": "median", "value": median_name + ".values.50\\.0"} - ]}) - diff --git a/vendor/jx_elasticsearch/es14/util.py b/vendor/jx_elasticsearch/es14/util.py deleted file mode 100644 index 562e622..0000000 --- a/vendor/jx_elasticsearch/es14/util.py +++ /dev/null @@ -1,135 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from jx_elasticsearch.es14.expressions import Variable -from mo_dots import wrap -from mo_future import text_type -from mo_json.typed_encoder import STRING, BOOLEAN, NUMBER, OBJECT -from mo_logs import Log - - -def es_query_template(path): - """ - RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE - :param path: THE NESTED PATH (NOT INCLUDING TABLE NAME) - :return: - """ - - if not isinstance(path, text_type): - Log.error("expecting path to be a string") - - if path != ".": - f0 = {} - f1 = {} - output = wrap({ - "query": {"filtered": {"filter": es_and([ - f0, - {"nested": { - "path": path, - "filter": f1, - "inner_hits": {"size": 100000} - }} - ])}}, - "from": 0, - "size": 0, - "sort": [] - }) - return output, wrap([f0, f1]) - else: - f0 = {} - output = wrap({ - "query": {"filtered": {"filter": es_and([f0])}}, - "from": 0, - "size": 0, - "sort": [] - }) - return output, wrap([f0]) - - -def jx_sort_to_es_sort(sort, schema): - if not sort: - return [] - - output = [] - for s in sort: - if isinstance(s.value, Variable): - cols = schema.leaves(s.value.var) - if s.sort == -1: - types = OBJECT, STRING, NUMBER, BOOLEAN - else: - types = BOOLEAN, NUMBER, STRING, OBJECT - - for type in types: - for c in cols: - if c.jx_type == type: - if s.sort == -1: - output.append({c.es_column: "desc"}) - else: - output.append(c.es_column) - else: - from mo_logs import Log - - Log.error("do not know how to handle") - return output - - -# FOR ELASTICSEARCH aggs -aggregates = { - "none": "none", - "one": "count", - "cardinality": "cardinality", - "sum": "sum", - "add": "sum", - "count": "value_count", - "maximum": "max", - "minimum": "min", - "max": "max", - "min": "min", - "mean": "avg", - "average": "avg", - "avg": "avg", - "median": "median", - "percentile": "percentile", - "N": "count", - "s0": "count", - "s1": "sum", - "s2": "sum_of_squares", - "std": "std_deviation", - "stddev": "std_deviation", - "union": "union", - "var": "variance", - "variance": "variance", - "stats": "stats" -} - -NON_STATISTICAL_AGGS = {"none", "one"} - - -def es_and(terms): - return wrap({"and": terms}) - - -def es_or(terms): - return wrap({"or": terms}) - - -def es_not(term): - return wrap({"not": term}) - - -def es_script(term): - return wrap({"script": term}) - - -def es_missing(term): - return {"missing": {"field": term}} diff --git a/vendor/jx_elasticsearch/es52/__init__.py b/vendor/jx_elasticsearch/es52/__init__.py index f4b119b..17c9832 100644 --- a/vendor/jx_elasticsearch/es52/__init__.py +++ b/vendor/jx_elasticsearch/es52/__init__.py @@ -5,29 +5,36 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http:# mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from jx_base import container +from jx_base import Column, container from jx_base.container import Container -from jx_base.dimensions import Dimension from jx_base.expressions import jx_expression +from jx_base.language import is_op from jx_base.query import QueryOp -from jx_elasticsearch.es52.aggs import es_aggsop, is_aggsop -from jx_elasticsearch.es52.deep import is_deepop, es_deepop -from jx_elasticsearch.es52.setop import is_setop, es_setop -from jx_elasticsearch.es52.util import aggregates +from jx_elasticsearch import elasticsearch +from jx_elasticsearch.es52.expressions import ES52 as ES52Lang +from jx_elasticsearch.es52.agg_bulk import is_bulk_agg, es_bulkaggsop +from jx_elasticsearch.es52.agg_op import es_aggsop, is_aggsop +from jx_elasticsearch.es52.deep import es_deepop, is_deepop +from jx_elasticsearch.es52.painless import Painless +from jx_elasticsearch.es52.set_bulk import is_bulk_set, es_bulksetop +from jx_elasticsearch.es52.set_op import es_setop, is_setop +from jx_elasticsearch.es52.stats import QueryStats +from jx_elasticsearch.es52.util import aggregates, temper_limit from jx_elasticsearch.meta import ElasticsearchMetadata, Table from jx_python import jx -from mo_dots import Data, unwrap, coalesce, split_field, join_field, wrap, listwrap -from mo_json import value2json -from mo_json.typed_encoder import EXISTS_TYPE +from mo_dots import Data, coalesce, listwrap, split_field, startswith_field, unwrap, wrap +from mo_dots.lists import last +from mo_future import sort_using_key +from mo_json import OBJECT, value2json +from mo_json.typed_encoder import EXISTS_TYPE, NESTED_TYPE from mo_kwargs import override -from mo_logs import Log, Except -from pyLibrary.env import elasticsearch, http +from mo_logs import Except, Log +from mo_times import Date +from mo_http import http class ES52(Container): @@ -47,9 +54,10 @@ class ES52(Container): def __init__( self, host, - index, + index, # THE NAME OF THE SNOWFLAKE (IF WRITING) + alias=None, # THE NAME OF THE SNOWFLAKE (FOR READING) type=None, - name=None, + name=None, # THE FULL NAME OF THE TABLE (THE NESTED PATH INTO THE SNOWFLAKE) port=9200, read_only=True, timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests) @@ -63,17 +71,19 @@ class ES52(Container): "type": "elasticsearch", "settings": unwrap(kwargs) } + self.edges = Data() # SET EARLY, SO OTHER PROCESSES CAN REQUEST IT + self.worker = None self.settings = kwargs - self.name = name = coalesce(name, index) + self._namespace = ElasticsearchMetadata(kwargs=kwargs) + self.name = name = self._namespace._find_alias(coalesce(alias, index, name)) if read_only: - self.es = elasticsearch.Alias(alias=index, kwargs=kwargs) + self.es = elasticsearch.Alias(alias=name, index=None, kwargs=kwargs) else: self.es = elasticsearch.Cluster(kwargs=kwargs).get_index(read_only=read_only, kwargs=kwargs) - self._namespace = ElasticsearchMetadata(kwargs=kwargs) + self._ensure_max_result_window_set(name) self.settings.type = self.es.settings.type - self.edges = Data() - self.worker = None + self.stats = QueryStats(self.es.cluster) columns = self.snowflake.columns # ABSOLUTE COLUMNS is_typed = any(c.es_column == EXISTS_TYPE for c in columns) @@ -86,6 +96,42 @@ class ES52(Container): Log.error("Expecting given typed {{typed}} to match {{is_typed}}", typed=typed, is_typed=is_typed) self.typed = typed + if not typed: + # ADD EXISTENCE COLUMNS + all_paths = {'.': None} # MAP FROM path TO parent TO MAKE A TREE + + def nested_path_of(v): + if v == '.': + return ('.',) + return (v,) + nested_path_of(all_paths[v]) + + query_paths = sort_using_key(set(step for path in self.snowflake.query_paths for step in path), key=lambda p: len(split_field(p))) + for step in query_paths: + if step in all_paths: + continue + else: + best = '.' + for candidate in all_paths.keys(): + if startswith_field(step, candidate): + if startswith_field(candidate, best): + best = candidate + all_paths[step] = best + for p in all_paths.keys(): + nested_path = nested_path_of(p) + try: + self.namespace.meta.columns.add(Column( + name=p, + es_column=p, + es_index=self.name, + es_type=OBJECT, + jx_type=OBJECT, + nested_path=nested_path, + multi=1001 if last(split_field(p)) == NESTED_TYPE else None, + last_updated=Date.now() + )) + except Exception as e: + raise e + @property def snowflake(self): return self._namespace.get_snowflake(self.es.settings.alias) @@ -105,32 +151,28 @@ class ES52(Container): settings.settings = None return settings - def __enter__(self): - Log.error("No longer used") - return self - - def __exit__(self, type, value, traceback): - if not self.worker: - return - - if isinstance(value, Exception): - self.worker.stop() - self.worker.join() - else: - self.worker.join() - - @property - def query_path(self): - return join_field(split_field(self.name)[1:]) - @property def url(self): return self.es.url + def _ensure_max_result_window_set(self, name): + # TODO : CHECK IF THIS IS ALREADY SET, IT TAKES TOO LONG + for i, s in self.es.cluster.get_metadata().indices.items(): + if name == i or name in s.aliases: + if s.settings.index.max_result_window != '100000' or s.settings.index.max_inner_result_window != '100000': + Log.note("setting max_result_window") + self.es.cluster.put("/" + name + "/_settings", data={"index": { + "max_inner_result_window": 100000, + "max_result_window": 100000 + }}) + break + def query(self, _query): try: query = QueryOp.wrap(_query, container=self, namespace=self.namespace) + self.stats.record(query) + for s in listwrap(query.select): if s.aggregate != None and not aggregates.get(s.aggregate): Log.error( @@ -140,12 +182,19 @@ class ES52(Container): ) frum = query["from"] - if isinstance(frum, QueryOp): + if is_op(frum, QueryOp): result = self.query(frum) q2 = query.copy() q2.frum = result return jx.run(q2) + if is_bulk_agg(self.es, query): + return es_bulkaggsop(self, frum, query) + if is_bulk_set(self.es, query): + return es_bulksetop(self, frum, query) + + query.limit = temper_limit(query.limit, query) + if is_deepop(self.es, query): return es_deepop(self.es, query) if is_aggsop(self.es, query): @@ -160,32 +209,6 @@ class ES52(Container): Log.error("Problem (Tried to clear Elasticsearch cache)", e) Log.error("problem", e) - def addDimension(self, dim): - if isinstance(dim, list): - Log.error("Expecting dimension to be a object, not a list:\n{{dim}}", dim= dim) - self._addDimension(dim, []) - - def _addDimension(self, dim, path): - dim.full_name = dim.name - for e in dim.edges: - d = Dimension(e, dim, self) - self.edges[d.full_name] = d - - def __getitem__(self, item): - c = self.get_columns(table_name=self.name, column_name=item) - if c: - if len(c) > 1: - Log.error("Do not know how to handle multipole matches") - return c[0] - - e = self.edges[item] - if not c: - Log.warning("Column with name {{column|quote}} can not be found in {{table}}", column=item, table=self.name) - return e - - def __getattr__(self, item): - return self.edges[item] - def update(self, command): """ EXPECTING command == {"set":term, "where":where} @@ -198,12 +221,11 @@ class ES52(Container): es_index = self.es.cluster.get_index(read_only=False, alias=None, kwargs=self.es.settings) schema = table.schema - es_filter = jx_expression(command.where).to_esfilter(schema) # GET IDS OF DOCUMENTS query = { "from": command['update'], - "select": ["_id"] + [ + "select": [{"value": "_id"}] + [ {"name": k, "value": v} for k, v in command.set.items() ], @@ -226,7 +248,6 @@ class ES52(Container): response = self.es.cluster.post( es_index.path + "/" + "_bulk", data=content, - headers={"Content-Type": "application/json"}, timeout=self.settings.timeout, params={"wait_for_active_shards": self.settings.wait_for_active_shards} ) @@ -234,10 +255,10 @@ class ES52(Container): Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)]) # DELETE BY QUERY, IF NEEDED - if '.' in listwrap(command.clear): + if "." in listwrap(command['clear']): + es_filter = ES52Lang[jx_expression(command.where)].partial_eval().to_esfilter(schema) self.es.delete_record(es_filter) return es_index.flush() - diff --git a/vendor/jx_elasticsearch/es52/agg_bulk.py b/vendor/jx_elasticsearch/es52/agg_bulk.py new file mode 100644 index 0000000..6d0b9c2 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/agg_bulk.py @@ -0,0 +1,357 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from copy import deepcopy + +import mo_math +from jx_base.expressions import Variable, TRUE +from jx_base.language import is_op +from jx_base.query import _normalize_group +from jx_elasticsearch.es52.agg_format import format_list_from_groupby, format_table_from_groupby +from jx_elasticsearch.es52.agg_op import build_es_query +from mo_dots import listwrap, unwrap, Null, wrap, coalesce +from mo_files import TempFile, URL, mimetype +from mo_future import first +from mo_json import value2json +from mo_logs import Log, Except +from mo_math.randoms import Random +from mo_testing.fuzzytestcase import assertAlmostEqual +from mo_threads import Thread +from mo_times import Timer, Date +from pyLibrary.aws.s3 import Connection + +DEBUG = False +MAX_CHUNK_SIZE = 5000 +MAX_PARTITIONS = 200 +URL_PREFIX = URL("https://active-data-query-results.s3-us-west-2.amazonaws.com") +S3_CONFIG = Null + + +def is_bulk_agg(esq, query): + # ONLY ACCEPTING ONE DIMENSION AT THIS TIME + if not S3_CONFIG: + return False + if query.destination not in {"s3", "url"}: + return False + if query.format not in {"list", "table"}: + return False + if len(listwrap(query.groupby)) != 1: + return False + + gb = first(_normalize_group(first(listwrap(query.groupby)), 0, query.limit)) + if not is_op(gb.value, Variable): + return False + return True + + +def es_bulkaggsop(esq, frum, query): + query = query.copy() # WE WILL MARK UP THIS QUERY + + chunk_size = min(coalesce(query.chunk_size, MAX_CHUNK_SIZE), MAX_CHUNK_SIZE) + schema = frum.schema + query_path = first(schema.query_path) + selects = listwrap(query.select) + + variable = first(query.groupby).value + # FIND CARDINALITY + + cardinality_check = Timer( + "Get cardinality for {{column}}", param={"column": variable.var} + ) + + with cardinality_check: + columns = schema.leaves(variable.var) + if len(columns) != 1: + Log.error( + "too many columns to bulk groupby:\n{{columns|json}}", columns=columns + ) + column = first(columns) + + if query.where is TRUE: + cardinality = column.cardinality + if cardinality == None: + esq.namespace._update_cardinality(column) + cardinality = column.cardinality + else: + cardinality = esq.query( + { + "select": { + "name": "card", + "value": variable, + "aggregate": "cardinality", + }, + "from": frum.name, + "where": query.where, + "format": "cube", + } + ).card + + num_partitions = (cardinality + chunk_size - 1) // chunk_size + + if num_partitions > MAX_PARTITIONS: + Log.error("Requesting more than {{num}} partitions", num=num_partitions) + if num_partitions == 0: + num_partitions = 1 + + acc, decoders, es_query = build_es_query(selects, query_path, schema, query) + guid = Random.base64(32, extra="-_") + abs_limit = mo_math.MIN((query.limit, first(query.groupby).domain.limit)) + formatter = formatters[query.format](abs_limit) + + Thread.run( + "extract to " + guid + ".json", + extractor, + guid, + num_partitions, + esq, + query, + selects, + query_path, + schema, + chunk_size, + cardinality, + abs_limit, + formatter, + parent_thread=Null, + ).release() + + output = wrap( + { + "url": URL_PREFIX / (guid + ".json"), + "status": URL_PREFIX / (guid + ".status.json"), + "meta": { + "format": query.format, + "timing": {"cardinality_check": cardinality_check.duration}, + "es_query": es_query, + "num_partitions": num_partitions, + "cardinality": cardinality, + }, + } + ) + return output + + +def extractor( + guid, + num_partitions, + esq, + query, + selects, + query_path, + schema, + chunk_size, + cardinality, + abs_limit, + formatter, + please_stop, +): + total = 0 + # WE MESS WITH THE QUERY LIMITS FOR CHUNKING + query.limit = first(query.groupby).domain.limit = chunk_size * 2 + start_time = Date.now() + + try: + write_status( + guid, + { + "status": "starting", + "chunks": num_partitions, + "rows": min(abs_limit, cardinality), + "start_time": start_time, + "timestamp": Date.now(), + }, + ) + + with TempFile() as temp_file: + with open(temp_file.abspath, "wb") as output: + for i in range(0, num_partitions): + if please_stop: + Log.error("request to shutdown!") + is_last = i == num_partitions - 1 + first(query.groupby).allowNulls = is_last + acc, decoders, es_query = build_es_query( + selects, query_path, schema, query + ) + # REACH INTO THE QUERY TO SET THE partitions + terms = es_query.aggs._filter.aggs._match.terms + terms.include.partition = i + terms.include.num_partitions = num_partitions + + result = esq.es.search(deepcopy(es_query), query.limit) + aggs = unwrap(result.aggregations) + + formatter.add(aggs, acc, query, decoders, selects) + for b in formatter.bytes(): + if b is DONE: + break + output.write(b) + else: + write_status( + guid, + { + "status": "working", + "chunk": i, + "chunks": num_partitions, + "row": total, + "rows": min(abs_limit, cardinality), + "start_time": start_time, + "timestamp": Date.now(), + }, + ) + continue + break + for b in formatter.footer(): + output.write(b) + + upload(guid + ".json", temp_file) + write_status( + guid, + { + "ok": True, + "status": "done", + "chunks": num_partitions, + "rows": min(abs_limit, cardinality), + "start_time": start_time, + "end_time": Date.now(), + "timestamp": Date.now(), + }, + ) + except Exception as e: + e = Except.wrap(e) + write_status( + guid, + { + "ok": False, + "status": "error", + "error": e, + "start_time": start_time, + "end_time": Date.now(), + "timestamp": Date.now(), + }, + ) + Log.warning("Could not extract", cause=e) + + +def upload(filename, temp_file): + with Timer("upload file to S3 {{file}}", param={"file": filename}): + try: + connection = Connection(S3_CONFIG).connection + bucket = connection.get_bucket(S3_CONFIG.bucket, validate=False) + storage = bucket.new_key(filename) + storage.set_contents_from_filename( + temp_file.abspath, headers={"Content-Type": mimetype.JSON} + ) + if S3_CONFIG.public: + storage.set_acl("public-read") + + except Exception as e: + Log.error( + "Problem connecting to {{bucket}}", bucket=S3_CONFIG.bucket, cause=e + ) + + +def write_status(guid, status): + try: + filename = guid + ".status.json" + with Timer("upload status to S3 {{file}}", param={"file": filename}, verbose=DEBUG): + try: + connection = Connection(S3_CONFIG).connection + bucket = connection.get_bucket(S3_CONFIG.bucket, validate=False) + storage = bucket.new_key(filename) + storage.set_contents_from_string( + value2json(status), headers={"Content-Type": mimetype.JSON} + ) + if S3_CONFIG.public: + storage.set_acl("public-read") + + except Exception as e: + Log.error( + "Problem connecting to {{bucket}}", + bucket=S3_CONFIG.bucket, + cause=e + ) + except Exception as e: + Log.warning("problem setting status", cause=e) + + +DONE = object() + + +class ListFormatter(object): + def __init__(self, abs_limit): + self.header = b"{\"meta\":{\"format\":\"list\"},\"data\":[\n" + self.count = 0 + self.abs_limit = abs_limit + self.result = None + + def add(self, aggs, acc, query, decoders, selects): + self.result = format_list_from_groupby(aggs, acc, query, decoders, selects) + + def bytes(self): + yield self.header + self.header = b",\n" + + comma = b"" + for r in self.result.data: + yield comma + comma = b",\n" + yield value2json(r).encode('utf8') + self.count += 1 + if self.count >= self.abs_limit: + yield DONE + + def footer(self): + yield b"\n]}" + + +class TableFormatter(object): + def __init__(self, abs_limit): + self.header = None + + self.count = 0 + self.abs_limit = abs_limit + self.result = None + self.pre = "" + + def add(self, aggs, acc, query, decoders, selects): + self.result = format_table_from_groupby(aggs, acc, query, decoders, selects) + # CONFIRM HEADER MATCH + if self.header: + assertAlmostEqual(self.header, self.result.header) + else: + self.header = self.result.header + + def bytes(self): + if self.pre: + yield self.pre + else: + self.pre = b",\n" + yield b"{\"meta\":{\"format\":\"table\"},\"header\":" + yield value2json(self.header).encode('utf8') + yield b",\n\"data\":[\n" + + comma = b"" + for r in self.result.data: + yield comma + comma = b",\n" + yield value2json(r).encode('utf8') + self.count += 1 + if self.count >= self.abs_limit: + yield DONE + + def footer(self): + yield b"\n]}" + + +formatters = { + "list": ListFormatter, + "table": TableFormatter +} diff --git a/vendor/jx_elasticsearch/es52/agg_format.py b/vendor/jx_elasticsearch/es52/agg_format.py new file mode 100644 index 0000000..83d7c2d --- /dev/null +++ b/vendor/jx_elasticsearch/es52/agg_format.py @@ -0,0 +1,359 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import TupleOp +from jx_base.language import is_op +from jx_base.query import canonical_aggregates +from jx_python.containers.cube import Cube +from mo_collections.matrix import Matrix +from mo_dots import Data, coalesce, is_list, split_field, wrap +from mo_files import mimetype +from mo_future import sort_using_key, next +from mo_json import value2json +from mo_logs import Log +from mo_logs.strings import quote + + +aggs_iterator, count_dim = [None]*2 + + +def format_cube(aggs, es_query, query, decoders, all_selects): + new_edges = count_dim(aggs, es_query, decoders) + + dims = [] + for e in new_edges: + if is_op(e.value, TupleOp): + e.allowNulls = False + + extra = 0 if e.allowNulls is False else 1 + dims.append(len(e.domain.partitions) + extra) + + dims = tuple(dims) + if any(s.default != canonical_aggregates[s.aggregate].default for s in all_selects): + # UNUSUAL DEFAULT VALUES MESS THE union() FUNCTION + is_default = Matrix(dims=dims, zeros=True) + matricies = {s.name: Matrix(dims=dims) for s in all_selects} + for row, coord, agg, selects in aggs_iterator(aggs, es_query, decoders): + for select in selects: + m = matricies[select.name] + v = select.pull(agg) + if v == None: + continue + is_default[coord] = False + union(m, coord, v, select.aggregate) + + # FILL THE DEFAULT VALUES + for c, v in is_default: + if v: + for s in all_selects: + matricies[s.name][c] = s.default + else: + matricies = {s.name: Matrix(dims=dims, zeros=s.default) for s in all_selects} + for row, coord, agg, selects in aggs_iterator(aggs, es_query, decoders): + for select in selects: + m = matricies[select.name] + v = select.pull(agg) + union(m, coord, v, select.aggregate) + + cube = Cube( + query.select, + sort_using_key(new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY + matricies + ) + cube.frum = query + return cube + + +def _value_drill(agg): + while True: + deeper = agg.get("_nested") + if deeper: + agg = deeper + continue + deeper = agg.get("_filter") + if deeper: + agg = deeper + continue + return agg + + +def format_table(aggs, es_query, query, decoders, all_selects): + new_edges = wrap(count_dim(aggs, es_query, decoders)) + dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) + rank = len(dims) + header = tuple(new_edges.name + all_selects.name) + name2index = {s.name: i + rank for i, s in enumerate(all_selects)} + + def data(): + is_sent = Matrix(dims=dims) + give_me_zeros = query.sort and not query.groupby + if give_me_zeros: + # WE REQUIRE THE ZEROS FOR SORTING + all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS + ordered_coord = next(all_coord)[::-1] + output = None + for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders): + if coord != ordered_coord: + # output HAS BEEN YIELDED, BUT SET THE DEFAULT VALUES + if output is not None: + for s in all_selects: + i = name2index[s.name] + if output[i] is None: + output[i] = s.default + # WE CAN GET THE SAME coord MANY TIMES, SO ONLY ADVANCE WHEN NOT + ordered_coord = next(all_coord)[::-1] + + while coord != ordered_coord: + # HAPPENS WHEN THE coord IS AHEAD OF ordered_coord + record = [d.get_value(ordered_coord[i]) for i, d in enumerate(decoders)] + [s.default for s in all_selects] + yield record + ordered_coord = next(all_coord)[::-1] + # coord == missing_coord + output = [d.get_value(c) for c, d in zip(coord, decoders)] + [None for s in all_selects] + for select in ss: + v = select.pull(agg) + if v != None: + union(output, name2index[select.name], v, select.aggregate) + yield output + else: + last_coord = None # HANG ONTO THE output FOR A BIT WHILE WE FILL THE ELEMENTS + output = None + for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders): + if coord != last_coord: + if output: + # SET DEFAULTS + for i, s in enumerate(all_selects): + v = output[rank+i] + if v == None: + output[rank+i] = s.default + yield output + output = is_sent[coord] + if output == None: + output = is_sent[coord] = [d.get_value(c) for c, d in zip(coord, decoders)] + [None for _ in all_selects] + last_coord = coord + # THIS IS A TRICK! WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED + for select in ss: + v = select.pull(agg) + if v != None: + union(output, name2index[select.name], v, select.aggregate) + + if output: + # SET DEFAULTS ON LAST ROW + for i, s in enumerate(all_selects): + v = output[rank+i] + if v == None: + output[rank+i] = s.default + yield output + + # EMIT THE MISSING CELLS IN THE CUBE + if not query.groupby: + for coord, output in is_sent: + if output == None: + record = [d.get_value(c) for c, d in zip(coord, decoders)] + [s.default for s in all_selects] + yield record + + return Data( + meta={"format": "table"}, + header=header, + data=list(data()) + ) + +def format_tab(aggs, es_query, query, decoders, select): + table = format_table(aggs, es_query, query, decoders, select) + + def data(): + yield "\t".join(map(quote, table.header)) + for d in table.data: + yield "\t".join(map(quote, d)) + + return data() + + +def format_csv(aggs, es_query, query, decoders, select): + table = format_table(aggs, es_query, query, decoders, select) + + def data(): + yield ", ".join(map(quote, table.header)) + for d in table.data: + yield ", ".join(map(quote, d)) + + return data() + + +def format_table_from_groupby(aggs, es_query, query, decoders, all_selects): + new_edges = wrap(count_dim(aggs, es_query, decoders)) + header = tuple(new_edges.name + all_selects.name) + name2index = {s.name: i for i, s in enumerate(all_selects)} + + def data(): + last_coord = None # HANG ONTO THE output FOR A BIT WHILE WE FILL THE ELEMENTS + coords = None + values = None + for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders): + if coord != last_coord: + if coords: + # SET DEFAULTS + for i, s in enumerate(all_selects): + v = values[i] + if v == None: + values[i] = s.default + yield coords + tuple(values) + coords = tuple(d.get_value(c) for c, d in zip(coord, decoders)) + values = [None for _ in all_selects] + last_coord = coord + # THIS IS A TRICK! WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED + for select in ss: + v = select.pull(agg) + if v != None: + union(values, name2index[select.name], v, select.aggregate) + + if coords: + # SET DEFAULTS ON LAST ROW + for i, s in enumerate(all_selects): + v = values[i] + if v == None: + values[i] = s.default + yield coords + tuple(values) + + return Data( + meta={"format": "table"}, + header=header, + data=list(data()) + ) + + +def format_list_from_groupby(aggs, es_query, query, decoders, all_selects): + new_edges = wrap(count_dim(aggs, es_query, decoders)) + + def data(): + groupby = query.groupby + dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) + is_sent = Matrix(dims=dims) + give_me_zeros = query.sort and not query.groupby + + finishes = [] + # IRREGULAR DEFAULTS MESS WITH union(), SET THEM AT END, IF ANY + for s in all_selects: + if s.default != canonical_aggregates[s.aggregate].default: + s.finish = s.default + s.default = None + finishes.append(s) + + for row, coord, agg, _selects in aggs_iterator(aggs, es_query, decoders, give_me_zeros=give_me_zeros): + output = is_sent[coord] + if output == None: + output = is_sent[coord] = Data() + for g, d, c in zip(groupby, decoders, coord): + output[g.put.name] = d.get_value(c) + for s in all_selects: + output[s.name] = s.default + yield output + # THIS IS A TRICK! WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED + for s in _selects: + union(output, s.name, s.pull(agg), s.aggregate) + + if finishes: + # SET ANY DEFAULTS + for c, o in is_sent: + for s in finishes: + if o[s.name] == None: + o[s.name] = s.finish + + for g in query.groupby: + g.put.name = coalesce(g.put.name, g.name) + + output = Data( + meta={"format": "list"}, + data=list(data()) + ) + return output + + +def format_list(aggs, es_query, query, decoders, select): + table = format_table(aggs, es_query, query, decoders, select) + header = table.header + + if query.edges or query.groupby: + data = [] + for row in table.data: + d = Data() + for h, r in zip(header, row): + d[h] = r + data.append(d) + format = "list" + elif is_list(query.select): + data = Data() + for h, r in zip(header, table.data[0]): + data[h] = r + format = "value" + else: + data = table.data[0][0] + format = "value" + + output = Data( + meta={"format": format}, + data=data + ) + return output + + +def format_line(aggs, es_query, query, decoders, select): + list = format_list(aggs, es_query, query, decoders, select) + + def data(): + for d in list.data: + yield value2json(d) + + return data() + + +agg_formatters = { + # EDGES FORMATTER, GROUPBY FORMATTER, VALUE_FORMATTER, mime_type + None: (format_cube, format_table, format_cube, mimetype.JSON), + "cube": (format_cube, format_cube, format_cube, mimetype.JSON), + "table": (format_table, format_table_from_groupby, format_table, mimetype.JSON), + "list": (format_list, format_list_from_groupby, format_list, mimetype.JSON), +} + + +def _get(v, k, d): + for p in split_field(k): + try: + v = v.get(p) + if v is None: + return d + except Exception: + v = [vv.get(p) for vv in v] + return v + + +def union(matrix, coord, value, agg): + # matrix[coord] = existing + value WITH ADDITIONAL CHECKS + existing = matrix[coord] + if existing == None: + matrix[coord] = value + elif value == None: + pass + elif agg not in ['sum', 'count']: + if agg == "cardinality" and (existing == 0 or value == 0): + matrix[coord] = existing + value + return + elif agg == "stats": + matrix[coord] = existing + value + return + elif agg == "union": + matrix[coord] = list(set(existing) | set(value)) + return + Log.warning("{{agg}} not ready", agg=agg) + else: + matrix[coord] = existing + value + + diff --git a/vendor/jx_elasticsearch/es52/agg_op.py b/vendor/jx_elasticsearch/es52/agg_op.py new file mode 100644 index 0000000..7b91eee --- /dev/null +++ b/vendor/jx_elasticsearch/es52/agg_op.py @@ -0,0 +1,343 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from collections import deque + +from jx_base.domains import SetDomain +from jx_base.expressions import NULL, Variable as Variable_ +from jx_base.language import is_op +from jx_base.query import DEFAULT_LIMIT +from jx_elasticsearch.es52 import agg_format +from jx_elasticsearch.es52.agg_format import agg_formatters +from jx_elasticsearch.es52.agg_op_field import agg_field +from jx_elasticsearch.es52.agg_op_formula import agg_formula +from jx_elasticsearch.es52.decoders import AggsDecoder +from jx_elasticsearch.es52.es_query import Aggs, FilterAggs, NestedAggs, simplify +from jx_elasticsearch.es52.expressions import AndOp, ES52, split_expression_by_path +from jx_elasticsearch.es52.painless import Painless +from jx_python import jx +from mo_dots import Data, Null, coalesce, listwrap, literal_field, unwrap, unwraplist, wrap +from mo_future import first, next +from mo_logs import Log +from mo_times.timer import Timer + +DEBUG = False + + + +def is_aggsop(es, query): + if query.edges or query.groupby or any(a != None and a != "none" for a in listwrap(query.select).aggregate): + return True + return False + + +def get_decoders_by_path(query): + """ + RETURN MAP FROM QUERY PATH TO LIST OF DECODER ARRAYS + + :param query: + :return: + """ + schema = query.frum.schema + output = {} + + if query.edges: + if query.sort and query.format != "cube": + # REORDER EDGES/GROUPBY TO MATCH THE SORT + query.edges = sort_edges(query, "edges") + elif query.groupby: + if query.sort and query.format != "cube": + query.groupby = sort_edges(query, "groupby") + + for edge in wrap(coalesce(query.edges, query.groupby, [])): + limit = coalesce(edge.domain.limit, query.limit, DEFAULT_LIMIT) + if edge.value != None and not edge.value is NULL: + edge = edge.copy() + vars_ = edge.value.vars() + for v in vars_: + if not schema.leaves(v.var): + Log.error("{{var}} does not exist in schema", var=v) + elif edge.range: + vars_ = edge.range.min.vars() | edge.range.max.vars() + for v in vars_: + if not schema[v.var]: + Log.error("{{var}} does not exist in schema", var=v) + elif edge.domain.dimension: + vars_ = edge.domain.dimension.fields + edge.domain.dimension = edge.domain.dimension.copy() + edge.domain.dimension.fields = [schema[v].es_column for v in vars_] + elif all(edge.domain.partitions.where): + vars_ = set() + for p in edge.domain.partitions: + vars_ |= p.where.vars() + + vars_ |= edge.value.vars() + depths = set(c.nested_path[0] for v in vars_ for c in schema.leaves(v.var)) + if not depths: + Log.error( + "Do not know of column {{column}}", + column=unwraplist([v for v in vars_ if schema[v] == None]) + ) + if len(depths) > 1: + Log.error("expression {{expr|quote}} spans tables, can not handle", expr=edge.value) + + decoder = AggsDecoder(edge, query, limit) + output.setdefault(first(depths), []).append(decoder) + return output + + +def sort_edges(query, prop): + ordered_edges = [] + remaining_edges = getattr(query, prop) + for s in jx.reverse(query.sort): + for e in remaining_edges: + if e.value == s.value: + if isinstance(e.domain, SetDomain): + pass # ALREADY SORTED? + else: + e.domain.sort = s.sort + ordered_edges.append(e) + remaining_edges.remove(e) + break + else: + Log.error("Can not sort by {{expr}}, can only sort by an existing edge expression", expr=s.value) + + ordered_edges.extend(remaining_edges) + for i, o in enumerate(ordered_edges): + o.dim = i # REORDER THE EDGES + return ordered_edges + + +def extract_aggs(select, query_path, schema): + """ + RETURN ES AGGREGATIONS + """ + + new_select = Data() # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING + formula = [] + for s in select: + if is_op(s.value, Variable_): + s.query_path = query_path + if s.aggregate == "count": + new_select["count_"+literal_field(s.value.var)] += [s] + else: + new_select[literal_field(s.value.var)] += [s] + elif s.aggregate: + split_select = split_expression_by_path(s.value, schema, lang=Painless) + for si_key, si_value in split_select.items(): + if si_value: + if s.query_path: + Log.error("can not handle more than one depth per select") + s.query_path = si_key + formula.append(s) + + acc = Aggs() + agg_field(acc, new_select, query_path, schema) + agg_formula(acc, formula, query_path, schema) + return acc + + +def build_es_query(select, query_path, schema, query): + acc = extract_aggs(select, query_path, schema) + acc = NestedAggs(query_path).add(acc) + split_decoders = get_decoders_by_path(query) + split_wheres = split_expression_by_path(query.where, schema=schema, lang=ES52) + start = 0 + decoders = [None] * (len(query.edges) + len(query.groupby)) + paths = list(reversed(sorted(set(split_wheres.keys()) | set(split_decoders.keys())))) + for path in paths: + decoder = split_decoders.get(path, Null) + where = split_wheres.get(path, Null) + + for d in decoder: + decoders[d.edge.dim] = d + acc = d.append_query(path, acc) + start += d.num_columns + + if where: + acc = FilterAggs("_filter", AndOp(where), None).add(acc) + acc = NestedAggs(path).add(acc) + acc = NestedAggs('.').add(acc) + acc = simplify(acc) + es_query = wrap(acc.to_es(schema)) + es_query.size = 0 + return acc, decoders, es_query + + +def es_aggsop(es, frum, query): + query = query.copy() # WE WILL MARK UP THIS QUERY + schema = frum.schema + query_path = schema.query_path[0] + selects = listwrap(query.select) + + acc, decoders, es_query = build_es_query(selects, query_path, schema, query) + + with Timer("ES query time", verbose=DEBUG) as es_duration: + result = es.search(es_query) + + # Log.note("{{result}}", result=result) + + try: + format_time = Timer("formatting", verbose=DEBUG) + with format_time: + # result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total) + # IT APPEARS THE OLD doc_count IS GONE + aggs = unwrap(result.aggregations) + + edges_formatter, groupby_formatter, value_fomratter, mime_type = agg_formatters[query.format] + if query.edges: + output = edges_formatter(aggs, acc, query, decoders, selects) + elif query.groupby: + output = groupby_formatter(aggs, acc, query, decoders, selects) + else: + output = value_fomratter(aggs, acc, query, decoders, selects) + + output.meta.timing.formatting = format_time.duration + output.meta.timing.es_search = es_duration.duration + output.meta.content_type = mime_type + output.meta.es_query = es_query + return output + except Exception as e: + if query.format not in agg_formatters: + Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e) + Log.error("Some problem", cause=e) + + +EMPTY = {} +EMPTY_LIST = [] + + +def drill(agg): + while True: + deeper = agg.get("_filter") + if deeper: + agg = deeper + continue + return agg + + +def _children(agg, children): + for child in children: + name = child.name + if name is None: + yield None, agg, child, None + continue + + v = agg[name] + if name == "_match": + for i, b in enumerate(v.get("buckets", EMPTY_LIST)): + yield i, b, child, b + elif name.startswith("_match"): + i = int(name[6:]) + yield i, v, child, v + elif name.startswith("_missing"): + if len(name) == 8: + i = None + else: + i = int(name[8:]) + yield None, v, child, v + else: + yield None, v, child, None + + +def aggs_iterator(aggs, es_query, decoders, give_me_zeros=False): + """ + DIG INTO ES'S RECURSIVE aggs DATA-STRUCTURE: + RETURN AN ITERATOR OVER THE EFFECTIVE ROWS OF THE RESULTS + + :param aggs: ES AGGREGATE OBJECT + :param es_query: THE ABSTRACT ES QUERY WE WILL TRACK ALONGSIDE aggs + :param decoders: TO CONVERT PARTS INTO COORDINATES + """ + coord = [0] * len(decoders) + parts = deque() + stack = [] + + gen = _children(aggs, es_query.children) + while True: + try: + index, c_agg, c_query, part = next(gen) + except StopIteration: + try: + gen = stack.pop() + except IndexError: + return + parts.popleft() + continue + + if c_agg.get('doc_count') == 0 and not give_me_zeros: + continue + parts.appendleft(part) + for d in c_query.decoders: + coord[d.edge.dim] = d.get_index(tuple(p for p in parts if p is not None), c_query, index) + + children = c_query.children + selects = c_query.selects + if selects or not children: + parts.popleft() # c_agg WAS ON TOP + yield ( + tuple(p for p in parts if p is not None), + tuple(coord), + c_agg, + selects + ) + continue + + stack.append(gen) + gen = _children(c_agg, children) + + +def count_dim(aggs, es_query, decoders): + if not any(hasattr(d, "done_count") for d in decoders): + return [d.edge for d in decoders] + + def _count_dim(parts, aggs, es_query): + children = es_query.children + if not children: + return + + for child in children: + name = child.name + if not name: + if aggs.get('doc_count') != 0: + _count_dim(parts, aggs, child) + continue + + agg = aggs[name] + if agg.get('doc_count') == 0: + continue + elif name == "_match": + for i, b in enumerate(agg.get("buckets", EMPTY_LIST)): + if not b.get('doc_count'): + continue + b["_index"] = i + new_parts = (b,) + parts + for d in child.decoders: + d.count(new_parts) + _count_dim(new_parts, b, child) + elif name.startswith("_missing"): + new_parts = (agg,) + parts + for d in child.decoders: + d.count(new_parts) + _count_dim(new_parts, agg, child) + else: + _count_dim(parts, agg, child) + + _count_dim(tuple(), aggs, es_query) + for d in decoders: + done_count = getattr(d, "done_count", Null) + done_count() + return [d.edge for d in decoders] + + +# EXPORT +agg_format.aggs_iterator = aggs_iterator +agg_format.count_dim = count_dim diff --git a/vendor/jx_elasticsearch/es52/agg_op_field.py b/vendor/jx_elasticsearch/es52/agg_op_field.py new file mode 100644 index 0000000..3bc900f --- /dev/null +++ b/vendor/jx_elasticsearch/es52/agg_op_field.py @@ -0,0 +1,167 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +import mo_math +from jx_base.expressions import NULL +from jx_elasticsearch.es52.es_query import CountAggs, ExprAggs, NestedAggs +from jx_elasticsearch.es52.set_op import get_pull_stats +from jx_elasticsearch.es52.util import aggregates +from jx_python.expressions import jx_expression_to_function +from mo_dots import join_field +from mo_future import first, is_text, text +from mo_json import EXISTS, NESTED, OBJECT, NUMBER_TYPES, BOOLEAN +from mo_json.typed_encoder import encode_property +from mo_logs import Log +from mo_logs.strings import quote + + +def agg_field(acc, new_select, query_path, schema): + for s in (s for _, many in new_select.items() for s in many): + canonical_name = s.name + if s.aggregate in ("value_count", "count"): + columns = schema.values(s.value.var, exclude_type=(OBJECT, NESTED)) + else: + columns = schema.values(s.value.var) + + if s.aggregate == "count": + canonical_names = [] + for column in columns: + es_name = column.es_column + "_count" + if column.jx_type == EXISTS: + if column.nested_path[0] == query_path: + canonical_names.append("doc_count") + acc.add(NestedAggs(column.nested_path[0]).add( + CountAggs(s) + )) + else: + canonical_names.append("value") + acc.add(NestedAggs(column.nested_path[0]).add( + ExprAggs(es_name, {"value_count": {"field": column.es_column}}, s) + )) + if len(canonical_names) == 1: + s.pull = jx_expression_to_function(canonical_names[0]) + else: + s.pull = jx_expression_to_function({"add": canonical_names}) + elif s.aggregate == "median": + columns = [c for c in columns if c.jx_type in NUMBER_TYPES] + if len(columns) != 1: + Log.error("Do not know how to perform median on columns with more than one type (script probably)") + # ES USES DIFFERENT METHOD FOR PERCENTILES + key = canonical_name + " percentile" + acc.add(ExprAggs(key, {"percentiles": { + "field": first(columns).es_column, + "percents": [50] + }}, s)) + s.pull = jx_expression_to_function("values.50\\.0") + elif s.aggregate in ("and", "or"): + columns = [c for c in columns if c.jx_type is BOOLEAN] + op = aggregates[s.aggregate] + if not columns: + s.pull = jx_expression_to_function(NULL) + else: + for c in columns: + acc.add(NestedAggs(c.nested_path[0]).add( + ExprAggs(canonical_name, {op: {"field": c.es_column}}, s) + )) + # get_name = concat_field(canonical_name, "value") + s.pull = jx_expression_to_function({"case": [ + {"when": {"eq": {"value": 1}}, "then": True}, + {"when": {"eq": {"value": 0}}, "then": False} + ]}) + elif s.aggregate == "percentile": + columns = [c for c in columns if c.jx_type in NUMBER_TYPES] + if len(columns) != 1: + Log.error( + "Do not know how to perform percentile on columns with more than one type (script probably)") + # ES USES DIFFERENT METHOD FOR PERCENTILES + key = canonical_name + " percentile" + if is_text(s.percentile) or s.percetile < 0 or 1 < s.percentile: + Log.error("Expecting percentile to be a float from 0.0 to 1.0") + percent = mo_math.round(s.percentile * 100, decimal=6) + + acc.add(ExprAggs(key, {"percentiles": { + "field": first(columns).es_column, + "percents": [percent], + "tdigest": {"compression": 2} + }}, s)) + s.pull = jx_expression_to_function(join_field(["values", text(percent)])) + elif s.aggregate == "cardinality": + for column in columns: + path = column.es_column + "_cardinality" + acc.add(ExprAggs(path, {"cardinality": {"field": column.es_column}}, s)) + s.pull = jx_expression_to_function("value") + elif s.aggregate == "stats": + columns = [c for c in columns if c.jx_type in NUMBER_TYPES] + if len(columns) != 1: + Log.error("Do not know how to perform stats on columns with more than one type (script probably)") + # REGULAR STATS + acc.add(ExprAggs(canonical_name, {"extended_stats": { + "field": first(columns).es_column + }}, s)) + s.pull = get_pull_stats() + + # GET MEDIAN TOO! + select_median = s.copy() + select_median.pull = jx_expression_to_function( + {"select": [{"name": "median", "value": "values.50\\.0"}]}) + + acc.add(ExprAggs(canonical_name + "_percentile", {"percentiles": { + "field": first(columns).es_column, + "percents": [50] + }}, select_median)) + + elif s.aggregate == "union": + for column in columns: + script = {"scripted_metric": { + 'init_script': 'params._agg.terms = new HashSet()', + 'map_script': 'for (v in doc[' + quote( + column.es_column) + '].values) params._agg.terms.add(v);', + 'combine_script': 'return params._agg.terms.toArray()', + 'reduce_script': 'HashSet output = new HashSet(); for (a in params._aggs) { if (a!=null) for (v in a) {output.add(v)} } return output.toArray()', + }} + stats_name = column.es_column + acc.add(NestedAggs(column.nested_path[0]).add(ExprAggs(stats_name, script, s))) + s.pull = jx_expression_to_function("value") + elif s.aggregate == "count_values": + # RETURN MAP FROM VALUE TO THE NUMBER OF TIMES FOUND IN THE DOCUMENTS + # NOT A NESTED DOC, RATHER A MULTIVALUE FIELD + for column in columns: + script = {"scripted_metric": { + 'params': {"_agg": {}}, + 'init_script': 'params._agg.terms = new HashMap()', + 'map_script': 'for (v in doc[' + quote( + column.es_column) + '].values) params._agg.terms.put(v, Optional.ofNullable(params._agg.terms.get(v)).orElse(0)+1);', + 'combine_script': 'return params._agg.terms', + 'reduce_script': ''' + HashMap output = new HashMap(); + for (agg in params._aggs) { + if (agg!=null){ + for (e in agg.entrySet()) { + String key = String.valueOf(e.getKey()); + output.put(key, e.getValue() + Optional.ofNullable(output.get(key)).orElse(0)); + } + } + } + return output; + ''' + }} + stats_name = encode_property(column.es_column) + acc.add(NestedAggs(column.nested_path[0]).add(ExprAggs(stats_name, script, s))) + s.pull = jx_expression_to_function("value") + else: + if not columns: + s.pull = jx_expression_to_function(NULL) + else: + for c in columns: + acc.add(NestedAggs(c.nested_path[0]).add( + ExprAggs(canonical_name, {"extended_stats": {"field": c.es_column}}, s) + )) + s.pull = jx_expression_to_function(aggregates[s.aggregate]) diff --git a/vendor/jx_elasticsearch/es52/agg_op_formula.py b/vendor/jx_elasticsearch/es52/agg_op_formula.py new file mode 100644 index 0000000..9302a70 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/agg_op_formula.py @@ -0,0 +1,194 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +import mo_math +from jx_base.expressions import NULL, TupleOp +from jx_base.language import is_op +from jx_elasticsearch.es52.es_query import ExprAggs, NestedAggs, TermsAggs +from jx_elasticsearch.es52.expressions import split_expression_by_path +from jx_elasticsearch.es52.painless import NumberOp, Painless +from jx_elasticsearch.es52.set_op import get_pull_stats +from jx_elasticsearch.es52.util import aggregates +from jx_python.expressions import jx_expression_to_function +from mo_dots import join_field, literal_field +from mo_future import first, text +from mo_json import BOOLEAN +from mo_logs import Log +from mo_logs.strings import expand_template + +COMPARE_TUPLE = """ +(a, b)->{ + int i=0; + for (dummy in a){ //ONLY THIS FOR LOOP IS ACCEPTED (ALL OTHER FORMS THROW NullPointerException) + if (a[i]==null){ + if (b[i]==null){ + return 0; + }else{ + return -1*({{dir}}); + }//endif + }else if (b[i]==null) return {{dir}}; + + if (a[i]!=b[i]) { + if (a[i] instanceof Boolean){ + if (b[i] instanceof Boolean){ + int cmp = Boolean.compare(a[i], b[i]); + if (cmp != 0) return cmp; + } else { + return -1; + }//endif + }else if (a[i] instanceof Number) { + if (b[i] instanceof Boolean) { + return 1 + } else if (b[i] instanceof Number) { + int cmp = Double.compare(a[i], b[i]); + if (cmp != 0) return cmp; + } else { + return -1; + }//endif + }else { + if (b[i] instanceof Boolean) { + return 1; + } else if (b[i] instanceof Number) { + return 1; + } else { + int cmp = ((String)a[i]).compareTo((String)b[i]); + if (cmp != 0) return cmp; + }//endif + }//endif + }//endif + i=i+1; + }//for + return 0; +} +""" + + +MAX_OF_TUPLE = """ +(Object[])([{{expr1}}, {{expr2}}].stream().{{op}}("""+COMPARE_TUPLE+""").get()) +""" + + +def agg_formula(acc, formula, query_path, schema): + # DUPLICATED FOR SCRIPTS, MAYBE THIS CAN BE PUT INTO A LANGUAGE? + for i, s in enumerate(formula): + canonical_name = s.name + s_path = [k for k, v in split_expression_by_path(s.value, schema=schema, lang=Painless).items() if v] + if len(s_path) == 0: + # FOR CONSTANTS + nest = NestedAggs(query_path) + acc.add(nest) + elif len(s_path) == 1: + nest = NestedAggs(first(s_path)) + acc.add(nest) + else: + raise Log.error("do not know how to handle") + + if is_op(s.value, TupleOp): + if s.aggregate == "count": + # TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY + s.pull = jx_expression_to_function("doc_count") + elif s.aggregate in ('max', 'maximum', 'min', 'minimum'): + if s.aggregate in ('max', 'maximum'): + dir = 1 + op = "max" + else: + dir = -1 + op = 'min' + + nully = Painless[TupleOp([NULL] * len(s.value.terms))].partial_eval().to_es_script(schema) + selfy = text(Painless[s.value].partial_eval().to_es_script(schema)) + + script = {"scripted_metric": { + 'init_script': 'params._agg.best = ' + nully + '.toArray();', + 'map_script': 'params._agg.best = ' + expand_template( + MAX_OF_TUPLE, + {"expr1": "params._agg.best", "expr2": selfy, + "dir": dir, "op": op} + ) + ";", + 'combine_script': 'return params._agg.best', + 'reduce_script': 'return params._aggs.stream().' + op + '(' + expand_template( + COMPARE_TUPLE, + {"dir": dir, + "op": op} + ) + ').get()', + }} + nest.add(NestedAggs(query_path).add( + ExprAggs(canonical_name, script, s) + )) + s.pull = jx_expression_to_function("value") + else: + Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate) + elif s.aggregate == "count": + nest.add(ExprAggs(canonical_name, + {"value_count": {"script": text(Painless[s.value].partial_eval().to_es_script(schema))}}, + s)) + s.pull = jx_expression_to_function("value") + elif s.aggregate == "median": + # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT + key = literal_field(canonical_name + " percentile") + nest.add(ExprAggs(key, {"percentiles": { + "script": text(Painless[s.value].to_es_script(schema)), + "percents": [50] + }}, s)) + s.pull = jx_expression_to_function(join_field(["50.0"])) + elif s.aggregate in ("and", "or"): + key = literal_field(canonical_name + " " + s.aggregate) + op = aggregates[s.aggregate] + nest.add( + ExprAggs(key, {op: { + "script": text(Painless[NumberOp(s.value)].to_es_script(schema)) + }}, s) + ) + # get_name = concat_field(canonical_name, "value") + s.pull = jx_expression_to_function({"case": [ + {"when": {"eq": {"value": 1}}, "then": True}, + {"when": {"eq": {"value": 0}}, "then": False} + ]}) + elif s.aggregate == "percentile": + # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT + key = literal_field(canonical_name + " percentile") + percent = mo_math.round(s.percentile * 100, decimal=6) + nest.add(ExprAggs(key, {"percentiles": { + "script": text(Painless[s.value].to_es_script(schema)), + "percents": [percent] + }}, s)) + s.pull = jx_expression_to_function(join_field(["values", text(percent)])) + elif s.aggregate == "cardinality": + # ES USES DIFFERENT METHOD FOR CARDINALITY + key = canonical_name + " cardinality" + nest.add(ExprAggs(key, {"cardinality": {"script": text(Painless[s.value].to_es_script(schema))}}, s)) + s.pull = jx_expression_to_function("value") + elif s.aggregate == "stats": + # REGULAR STATS + nest.add(ExprAggs(canonical_name, {"extended_stats": { + "script": text(Painless[s.value].to_es_script(schema)) + }}, s)) + s.pull = get_pull_stats() + + # GET MEDIAN TOO! + select_median = s.copy() + select_median.pull = jx_expression_to_function({"select": [{"name": "median", "value": "values.50\\.0"}]}) + + nest.add(ExprAggs(canonical_name + "_percentile", {"percentiles": { + "script": text(Painless[s.value].to_es_script(schema)), + "percents": [50] + }}, select_median)) + s.pull = get_pull_stats() + elif s.aggregate == "union": + # USE TERMS AGGREGATE TO SIMULATE union + nest.add(TermsAggs(canonical_name, {"script_field": text(Painless[s.value].to_es_script(schema))}, s)) + s.pull = jx_expression_to_function("key") + else: + # PULL VALUE OUT OF THE stats AGGREGATE + s.pull = jx_expression_to_function(aggregates[s.aggregate]) + nest.add(ExprAggs(canonical_name, { + "extended_stats": {"script": text(NumberOp(s.value).partial_eval().to_es_script(schema))}}, s)) + diff --git a/vendor/jx_elasticsearch/es52/aggs.py b/vendor/jx_elasticsearch/es52/aggs.py deleted file mode 100644 index 1609954..0000000 --- a/vendor/jx_elasticsearch/es52/aggs.py +++ /dev/null @@ -1,533 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from jx_base.domains import SetDomain -from jx_base.expressions import TupleOp, NULL -from jx_base.query import DEFAULT_LIMIT, MAX_LIMIT -from jx_elasticsearch import post as es_post -from jx_elasticsearch.es52.decoders import DefaultDecoder, AggsDecoder, ObjectDecoder, DimFieldListDecoder -from jx_elasticsearch.es52.expressions import split_expression_by_depth, AndOp, Variable, NullOp -from jx_elasticsearch.es52.setop import get_pull_stats -from jx_elasticsearch.es52.util import aggregates -from jx_python import jx -from jx_python.expressions import jx_expression_to_function -from mo_dots import listwrap, Data, wrap, literal_field, set_default, coalesce, Null, split_field, FlatList, unwrap, unwraplist -from mo_future import text_type -from mo_json.typed_encoder import encode_property, EXISTS -from mo_logs import Log -from mo_logs.strings import quote, expand_template -from mo_math import Math, MAX, UNION -from mo_times.timer import Timer - -COMPARE_TUPLE = """ -(a, b)->{ - int i=0; - for (dummy in a){ //ONLY THIS FOR LOOP IS ACCEPTED (ALL OTHER FORMS THROW NullPointerException) - if (a[i]==null) return -1*({{dir}}); - if (b[i]==null) return 1*({{dir}}); - - if (a[i]!=b[i]) { - if (a[i] instanceof Boolean){ - if (b[i] instanceof Boolean){ - int cmp = Boolean.compare(a[i], b[i]); - if (cmp != 0) return cmp; - } else { - return -1; - }//endif - }else if (a[i] instanceof Number) { - if (b[i] instanceof Boolean) { - return 1 - } else if (b[i] instanceof Number) { - int cmp = Double.compare(a[i], b[i]); - if (cmp != 0) return cmp; - } else { - return -1; - }//endif - }else { - if (b[i] instanceof Boolean) { - return 1; - } else if (b[i] instanceof Number) { - return 1; - } else { - int cmp = ((String)a[i]).compareTo((String)b[i]); - if (cmp != 0) return cmp; - }//endif - }//endif - }//endif - i=i+1; - }//for - return 0; -} -""" - - -MAX_OF_TUPLE = """ -(Object[])Arrays.asList(new Object[]{{{expr1}}, {{expr2}}}).stream().{{op}}("""+COMPARE_TUPLE+""").get() -""" - - -def is_aggsop(es, query): - if query.edges or query.groupby or any(a != None and a != "none" for a in listwrap(query.select).aggregate): - return True - return False - - -def get_decoders_by_depth(query): - """ - RETURN A LIST OF DECODER ARRAYS, ONE ARRAY FOR EACH NESTED DEPTH - """ - schema = query.frum.schema - output = FlatList() - - if query.edges: - if query.sort and query.format != "cube": - # REORDER EDGES/GROUPBY TO MATCH THE SORT - query.edges = sort_edges(query, "edges") - elif query.groupby: - if query.sort and query.format != "cube": - query.groupby = sort_edges(query, "groupby") - - for edge in wrap(coalesce(query.edges, query.groupby, [])): - limit = coalesce(edge.domain.limit, query.limit, DEFAULT_LIMIT) - if edge.value != None and not isinstance(edge.value, NullOp): - edge = edge.copy() - vars_ = edge.value.vars() - for v in vars_: - if not schema.leaves(v.var): - Log.error("{{var}} does not exist in schema", var=v) - elif edge.range: - vars_ = edge.range.min.vars() | edge.range.max.vars() - for v in vars_: - if not schema[v.var]: - Log.error("{{var}} does not exist in schema", var=v) - elif edge.domain.dimension: - vars_ = edge.domain.dimension.fields - edge.domain.dimension = edge.domain.dimension.copy() - edge.domain.dimension.fields = [schema[v].es_column for v in vars_] - elif all(edge.domain.partitions.where): - vars_ = set() - for p in edge.domain.partitions: - vars_ |= p.where.vars() - - try: - vars_ |= edge.value.vars() - depths = set(len(c.nested_path) - 1 for v in vars_ for c in schema.leaves(v.var)) - if -1 in depths: - Log.error( - "Do not know of column {{column}}", - column=unwraplist([v for v in vars_ if schema[v] == None]) - ) - if len(depths) > 1: - Log.error("expression {{expr|quote}} spans tables, can not handle", expr=edge.value) - max_depth = MAX(depths) - while len(output) <= max_depth: - output.append([]) - except Exception as e: - # USUALLY THE SCHEMA IS EMPTY, SO WE ASSUME THIS IS A SIMPLE QUERY - max_depth = 0 - output.append([]) - - output[max_depth].append(AggsDecoder(edge, query, limit)) - return output - - -def sort_edges(query, prop): - ordered_edges = [] - remaining_edges = getattr(query, prop) - for s in query.sort: - for e in remaining_edges: - if e.value == s.value: - if isinstance(e.domain, SetDomain): - pass # ALREADY SORTED? - else: - e.domain.sort = s.sort - ordered_edges.append(e) - remaining_edges.remove(e) - break - else: - Log.error("Can not sort by {{expr}}, can only sort by an existing edge expression", expr=s.value) - - ordered_edges.extend(remaining_edges) - return ordered_edges - - -def es_aggsop(es, frum, query): - query = query.copy() # WE WILL MARK UP THIS QUERY - schema = frum.schema - select = listwrap(query.select) - - es_query = Data() - new_select = Data() # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING - formula = [] - for s in select: - if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".": - if schema.query_path == ".": - s.pull = jx_expression_to_function("doc_count") - else: - s.pull = jx_expression_to_function({"coalesce": ["_nested.doc_count", "doc_count", 0]}) - elif isinstance(s.value, Variable): - if s.aggregate == "count": - new_select["count_"+literal_field(s.value.var)] += [s] - else: - new_select[literal_field(s.value.var)] += [s] - elif s.aggregate: - formula.append(s) - - for canonical_name, many in new_select.items(): - for s in many: - columns = frum.schema.values(s.value.var) - - if s.aggregate == "count": - canonical_names = [] - for column in columns: - cn = literal_field(column.es_column + "_count") - if column.jx_type == EXISTS: - canonical_names.append(cn + ".doc_count") - es_query.aggs[cn].filter.range = {column.es_column: {"gt": 0}} - else: - canonical_names.append(cn+ ".value") - es_query.aggs[cn].value_count.field = column.es_column - if len(canonical_names) == 1: - s.pull = jx_expression_to_function(canonical_names[0]) - else: - s.pull = jx_expression_to_function({"add": canonical_names}) - elif s.aggregate == "median": - if len(columns) > 1: - Log.error("Do not know how to count columns with more than one type (script probably)") - # ES USES DIFFERENT METHOD FOR PERCENTILES - key = literal_field(canonical_name + " percentile") - - es_query.aggs[key].percentiles.field = columns[0].es_column - es_query.aggs[key].percentiles.percents += [50] - s.pull = jx_expression_to_function(key + ".values.50\\.0") - elif s.aggregate == "percentile": - if len(columns) > 1: - Log.error("Do not know how to count columns with more than one type (script probably)") - # ES USES DIFFERENT METHOD FOR PERCENTILES - key = literal_field(canonical_name + " percentile") - if isinstance(s.percentile, text_type) or s.percetile < 0 or 1 < s.percentile: - Log.error("Expecting percentile to be a float from 0.0 to 1.0") - percent = Math.round(s.percentile * 100, decimal=6) - - es_query.aggs[key].percentiles.field = columns[0].es_column - es_query.aggs[key].percentiles.percents += [percent] - es_query.aggs[key].percentiles.tdigest.compression = 2 - s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent))) - elif s.aggregate == "cardinality": - canonical_names = [] - for column in columns: - cn = literal_field(column.es_column + "_cardinality") - canonical_names.append(cn) - es_query.aggs[cn].cardinality.field = column.es_column - if len(columns) == 1: - s.pull = jx_expression_to_function(canonical_names[0] + ".value") - else: - s.pull = jx_expression_to_function({"add": [cn + ".value" for cn in canonical_names], "default": 0}) - elif s.aggregate == "stats": - if len(columns) > 1: - Log.error("Do not know how to count columns with more than one type (script probably)") - # REGULAR STATS - stats_name = literal_field(canonical_name) - es_query.aggs[stats_name].extended_stats.field = columns[0].es_column - - # GET MEDIAN TOO! - median_name = literal_field(canonical_name + "_percentile") - es_query.aggs[median_name].percentiles.field = columns[0].es_column - es_query.aggs[median_name].percentiles.percents += [50] - - s.pull = get_pull_stats(stats_name, median_name) - elif s.aggregate == "union": - pulls = [] - for column in columns: - script = {"scripted_metric": { - 'init_script': 'params._agg.terms = new HashSet()', - 'map_script': 'for (v in doc['+quote(column.es_column)+'].values) params._agg.terms.add(v);', - 'combine_script': 'return params._agg.terms.toArray()', - 'reduce_script': 'HashSet output = new HashSet(); for (a in params._aggs) { if (a!=null) for (v in a) {output.add(v)} } return output.toArray()', - }} - stats_name = encode_property(column.es_column) - if column.nested_path[0] == ".": - es_query.aggs[stats_name] = script - pulls.append(jx_expression_to_function(stats_name + ".value")) - else: - es_query.aggs[stats_name] = { - "nested": {"path": column.nested_path[0]}, - "aggs": {"_nested": script} - } - pulls.append(jx_expression_to_function(stats_name + "._nested.value")) - - if len(pulls) == 0: - s.pull = NULL - elif len(pulls) == 1: - s.pull = pulls[0] - else: - s.pull = lambda row: UNION(p(row) for p in pulls) - else: - if len(columns) > 1: - Log.error("Do not know how to count columns with more than one type (script probably)") - elif len(columns) <1: - # PULL VALUE OUT OF THE stats AGGREGATE - s.pull = jx_expression_to_function({"null":{}}) - else: - # PULL VALUE OUT OF THE stats AGGREGATE - es_query.aggs[literal_field(canonical_name)].extended_stats.field = columns[0].es_column - s.pull = jx_expression_to_function({"coalesce": [literal_field(canonical_name) + "." + aggregates[s.aggregate], s.default]}) - - for i, s in enumerate(formula): - canonical_name = literal_field(s.name) - - if isinstance(s.value, TupleOp): - if s.aggregate == "count": - # TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY - s.pull = "doc_count" - elif s.aggregate in ('max', 'maximum', 'min', 'minimum'): - if s.aggregate in ('max', 'maximum'): - dir = 1 - op = "max" - else: - dir = -1 - op = 'min' - - nully = TupleOp("tuple", [NULL]*len(s.value.terms)).partial_eval().to_es_script(schema).expr - selfy = s.value.partial_eval().to_es_script(schema).expr - - script = {"scripted_metric": { - 'init_script': 'params._agg.best = ' + nully + ';', - 'map_script': 'params._agg.best = ' + expand_template(MAX_OF_TUPLE, {"expr1": "params._agg.best", "expr2": selfy, "dir": dir, "op": op}) + ";", - 'combine_script': 'return params._agg.best', - 'reduce_script': 'return params._aggs.stream().max(' + expand_template(COMPARE_TUPLE, {"dir": dir, "op": op}) + ').get()', - }} - if schema.query_path[0] == ".": - es_query.aggs[canonical_name] = script - s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value") - else: - es_query.aggs[canonical_name] = { - "nested": {"path": schema.query_path[0]}, - "aggs": {"_nested": script} - } - s.pull = jx_expression_to_function(literal_field(canonical_name) + "._nested.value") - else: - Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate) - elif s.aggregate == "count": - es_query.aggs[literal_field(canonical_name)].value_count.script = s.value.partial_eval().to_es_script(schema).script(schema) - s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value") - elif s.aggregate == "median": - # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT - key = literal_field(canonical_name + " percentile") - - es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema) - es_query.aggs[key].percentiles.percents += [50] - s.pull = jx_expression_to_function(key + ".values.50\\.0") - elif s.aggregate == "percentile": - # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT - key = literal_field(canonical_name + " percentile") - percent = Math.round(s.percentile * 100, decimal=6) - - es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema) - es_query.aggs[key].percentiles.percents += [percent] - s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent))) - elif s.aggregate == "cardinality": - # ES USES DIFFERENT METHOD FOR CARDINALITY - key = canonical_name + " cardinality" - - es_query.aggs[key].cardinality.script = s.value.to_es_script(schema).script(schema) - s.pull = jx_expression_to_function(key + ".value") - elif s.aggregate == "stats": - # REGULAR STATS - stats_name = literal_field(canonical_name) - es_query.aggs[stats_name].extended_stats.script = s.value.to_es_script(schema).script(schema) - - # GET MEDIAN TOO! - median_name = literal_field(canonical_name + " percentile") - es_query.aggs[median_name].percentiles.script = s.value.to_es_script(schema).script(schema) - es_query.aggs[median_name].percentiles.percents += [50] - - s.pull = get_pull_stats(stats_name, median_name) - elif s.aggregate == "union": - # USE TERMS AGGREGATE TO SIMULATE union - stats_name = literal_field(canonical_name) - es_query.aggs[stats_name].terms.script_field = s.value.to_es_script(schema).script(schema) - s.pull = jx_expression_to_function(stats_name + ".buckets.key") - else: - # PULL VALUE OUT OF THE stats AGGREGATE - s.pull = jx_expression_to_function(canonical_name + "." + aggregates[s.aggregate]) - es_query.aggs[canonical_name].extended_stats.script = s.value.to_es_script(schema).script(schema) - - decoders = get_decoders_by_depth(query) - start = 0 - - # THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested - split_where = split_expression_by_depth(query.where, schema=frum.schema) - - if len(split_field(frum.name)) > 1: - if any(split_where[2::]): - Log.error("Where clause is too deep") - - for d in decoders[1]: - es_query = d.append_query(es_query, start) - start += d.num_columns - - if split_where[1]: - #TODO: INCLUDE FILTERS ON EDGES - filter_ = AndOp("and", split_where[1]).to_esfilter(schema) - es_query = Data( - aggs={"_filter": set_default({"filter": filter_}, es_query)} - ) - - es_query = wrap({ - "aggs": {"_nested": set_default( - {"nested": {"path": schema.query_path[0]}}, - es_query - )} - }) - else: - if any(split_where[1::]): - Log.error("Where clause is too deep") - - if decoders: - for d in jx.reverse(decoders[0]): - es_query = d.append_query(es_query, start) - start += d.num_columns - - if split_where[0]: - #TODO: INCLUDE FILTERS ON EDGES - filter = AndOp("and", split_where[0]).to_esfilter(schema) - es_query = Data( - aggs={"_filter": set_default({"filter": filter}, es_query)} - ) - # - - if not es_query: - es_query = wrap({"query": {"match_all": {}}}) - - es_query.size = 0 - - with Timer("ES query time") as es_duration: - result = es_post(es, es_query, query.limit) - - try: - format_time = Timer("formatting") - with format_time: - decoders = [d for ds in decoders for d in ds] - result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total) # IT APPEARS THE OLD doc_count IS GONE - - formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format] - if query.edges: - output = formatter(decoders, result.aggregations, start, query, select) - elif query.groupby: - output = groupby_formatter(decoders, result.aggregations, start, query, select) - else: - output = aggop_formatter(decoders, result.aggregations, start, query, select) - - output.meta.timing.formatting = format_time.duration - output.meta.timing.es_search = es_duration.duration - output.meta.content_type = mime_type - output.meta.es_query = es_query - return output - except Exception as e: - if query.format not in format_dispatch: - Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e) - Log.error("Some problem", cause=e) - - -EMPTY = {} -EMPTY_LIST = [] - - -def drill(agg): - deeper = agg.get("_filter") or agg.get("_nested") - while deeper: - agg = deeper - deeper = agg.get("_filter") or agg.get("_nested") - return agg - - -def aggs_iterator(aggs, decoders, coord=True): - """ - DIG INTO ES'S RECURSIVE aggs DATA-STRUCTURE: - RETURN AN ITERATOR OVER THE EFFECTIVE ROWS OF THE RESULTS - - :param aggs: ES AGGREGATE OBJECT - :param decoders: - :param coord: TURN ON LOCAL COORDINATE LOOKUP - """ - depth = max(d.start + d.num_columns for d in decoders) - - def _aggs_iterator(agg, d): - agg = drill(agg) - - if d > 0: - for k, v in agg.items(): - if k == "_match": - v = drill(v) - for i, b in enumerate(v.get("buckets", EMPTY_LIST)): - b["_index"] = i - for a, parts in _aggs_iterator(b, d - 1): - yield a, parts + (b,) - elif k == "_other": - for b in v.get("buckets", EMPTY_LIST): - for a, parts in _aggs_iterator(b, d - 1): - yield a, parts + (Null,) - elif k == "_missing": - b = drill(v) - for a, parts in _aggs_iterator(b, d - 1): - yield a, parts + (b,) - elif k.startswith("_join_"): - v["key"] = int(k[6:]) - for a, parts in _aggs_iterator(v, d - 1): - yield a, parts + (v,) - else: - for k, v in agg.items(): - if k == "_match": - v = drill(v) - for i, b in enumerate(v.get("buckets", EMPTY_LIST)): - b["_index"] = i - yield b, (b,) - elif k == "_other": - for b in v.get("buckets", EMPTY_LIST): - yield b, (Null,) - elif k == "_missing": - b = drill(v,) - yield b, (v,) - elif k.startswith("_join_"): - v["_index"] = int(k[6:]) - yield v, (v,) - - if coord: - for a, parts in _aggs_iterator(unwrap(aggs), depth - 1): - coord = tuple(d.get_index(parts) for d in decoders) - if any(c is None for c in coord): - continue - yield parts, coord, a - else: - for a, parts in _aggs_iterator(unwrap(aggs), depth - 1): - yield parts, None, a - - -def count_dim(aggs, decoders): - if any(isinstance(d, (DefaultDecoder, DimFieldListDecoder, ObjectDecoder)) for d in decoders): - # ENUMERATE THE DOMAINS, IF UNKNOWN AT QUERY TIME - for row, coord, agg in aggs_iterator(aggs, decoders, coord=False): - for d in decoders: - d.count(row) - for d in decoders: - d.done_count() - new_edges = wrap([d.edge for d in decoders]) - return new_edges - - -format_dispatch = {} -from jx_elasticsearch.es52.format import format_cube - -_ = format_cube - diff --git a/vendor/jx_elasticsearch/es52/decoders.py b/vendor/jx_elasticsearch/es52/decoders.py index d959fa9..6aacb11 100644 --- a/vendor/jx_elasticsearch/es52/decoders.py +++ b/vendor/jx_elasticsearch/es52/decoders.py @@ -5,28 +5,30 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http:# mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from collections import Mapping +from __future__ import absolute_import, division, unicode_literals from jx_base.dimensions import Dimension -from jx_base.domains import SimpleSetDomain, DefaultDomain, PARTITION -from jx_base.expressions import TupleOp, TRUE -from jx_base.query import MAX_LIMIT, DEFAULT_LIMIT -from jx_elasticsearch.es52.expressions import Variable, NotOp, InOp, Literal, AndOp, InequalityOp, LeavesOp, LIST_TO_PIPE -from jx_elasticsearch.es52.util import es_missing +from jx_base.domains import DefaultDomain, PARTITION, SimpleSetDomain +from jx_base.expressions import ExistsOp, FirstOp, GtOp, GteOp, LeavesOp, LtOp, LteOp, MissingOp, TupleOp, Variable +from jx_base.language import is_op +from jx_base.query import DEFAULT_LIMIT +from jx_elasticsearch.es52.es_query import Aggs, FilterAggs, FiltersAggs, NestedAggs, RangeAggs, TermsAggs +from jx_elasticsearch.es52.expressions import AndOp, InOp, Literal, NotOp +from jx_elasticsearch.es52.painless import LIST_TO_PIPE, Painless +from jx_elasticsearch.es52.util import pull_functions, temper_limit +from jx_elasticsearch.meta import KNOWN_MULTITYPES from jx_python import jx -from mo_dots import wrap, set_default, coalesce, literal_field, Data, relative_field, unwraplist -from mo_future import text_type, transpose -from mo_json.typed_encoder import untype_path, STRING, NUMBER, BOOLEAN +from mo_dots import Data, coalesce, concat_field, is_data, literal_field, relative_field, set_default, wrap +from mo_future import first, is_text, text, transpose +from mo_json import EXISTS, OBJECT, STRING +from mo_json.typed_encoder import EXISTS_TYPE, NESTED_TYPE, untype_path, unnest_path from mo_logs import Log -from mo_logs.strings import quote, expand_template -from mo_math import MAX, MIN, Math -from pyLibrary.convert import string2boolean +from mo_logs.strings import expand_template, quote +from mo_math import MAX, MIN + +DEBUG = False class AggsDecoder(object): @@ -37,91 +39,107 @@ class AggsDecoder(object): # if query.groupby: # return object.__new__(DefaultDecoder, e) - if isinstance(e.value, text_type): + if is_text(e.value): Log.error("Expecting Variable or Expression, not plain string") - if isinstance(e.value, LeavesOp): - return object.__new__(ObjectDecoder, e) - elif isinstance(e.value, TupleOp): + if is_op(e.value, LeavesOp): + return object.__new__(ObjectDecoder) + elif is_op(e.value, TupleOp): # THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields # JUST PULL THE FIELDS - if not all(isinstance(t, Variable) for t in e.value.terms): + if not all(is_op(t, Variable) for t in e.value.terms): Log.error("Can only handle variables in tuples") e.domain = Data( dimension={"fields": e.value.terms} ) - return object.__new__(DimFieldListDecoder, e) + return object.__new__(DimFieldListDecoder) - elif isinstance(e.value, Variable): + elif is_op(e.value, Variable): schema = query.frum.schema cols = schema.leaves(e.value.var) if not cols: - return object.__new__(DefaultDecoder, e) + return object.__new__(DefaultDecoder) if len(cols) != 1: - return object.__new__(ObjectDecoder, e) - col = cols[0] + return object.__new__(ObjectDecoder) + col = first(cols) limit = coalesce(e.domain.limit, query.limit, DEFAULT_LIMIT) - if col.partitions != None: - if col.multi > 1 and len(col.partitions) < 6: + if col.cardinality == None: + DEBUG and Log.warning( + "metadata for column {{name|quote}} (id={{id}}) is not ready", + name=concat_field(col.es_index, col.es_column), + id=id(col) + ) + if unnest_path(e.value.var) in KNOWN_MULTITYPES: + Log.warning("{{var}} is not multivalued", var = e.value.var) + return object.__new__(MultivalueDecoder) + + e.domain = set_default(DefaultDomain(limit=limit), e.domain.__data__()) + return object.__new__(DefaultDecoder) + elif col.multi <= 1 and col.partitions == None: + if unnest_path(e.value.var) in KNOWN_MULTITYPES: + Log.warning("{{var}} is not multivalued", var = e.value.var) + return object.__new__(MultivalueDecoder) + e.domain = set_default(DefaultDomain(limit=limit), e.domain.__data__()) + return object.__new__(DefaultDecoder) + else: + DEBUG and Log.note("id={{id}} has parts!!!", id=id(col)) + if col.multi > 1: return object.__new__(MultivalueDecoder) partitions = col.partitions[:limit:] - if e.domain.sort==-1: + if e.domain.sort == -1: partitions = list(reversed(sorted(partitions))) else: partitions = sorted(partitions) e.domain = SimpleSetDomain(partitions=partitions, limit=limit) - else: - e.domain = set_default(DefaultDomain(limit=limit), e.domain.__data__()) - return object.__new__(DefaultDecoder, e) else: - return object.__new__(DefaultDecoder, e) + return object.__new__(DefaultDecoder) if e.value and e.domain.type in PARTITION: - return object.__new__(SetDecoder, e) + return object.__new__(SetDecoder) if isinstance(e.domain.dimension, Dimension): e.domain = e.domain.dimension.getDomain() - return object.__new__(SetDecoder, e) + return object.__new__(SetDecoder) if e.value and e.domain.type == "time": - return object.__new__(TimeDecoder, e) + return object.__new__(TimeDecoder) if e.range: - return object.__new__(GeneralRangeDecoder, e) + return object.__new__(GeneralRangeDecoder) if e.value and e.domain.type == "duration": - return object.__new__(DurationDecoder, e) + return object.__new__(DurationDecoder) elif e.value and e.domain.type == "range": - return object.__new__(RangeDecoder, e) + return object.__new__(RangeDecoder) elif not e.value and e.domain.dimension.fields: # THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields # JUST PULL THE FIELDS fields = e.domain.dimension.fields - if isinstance(fields, Mapping): + if is_data(fields): Log.error("No longer allowed: All objects are expressions") else: - return object.__new__(DimFieldListDecoder, e) + return object.__new__(DimFieldListDecoder) elif not e.value and all(e.domain.partitions.where): - return object.__new__(GeneralSetDecoder, e) + return object.__new__(GeneralSetDecoder) else: Log.error("domain type of {{type}} is not supported yet", type=e.domain.type) def __init__(self, edge, query, limit): - self.start = None self.edge = edge self.name = literal_field(self.edge.name) self.query = query self.limit = limit self.schema = self.query.frum.schema - def append_query(self, es_query, start): + def append_query(self, query_path, es_query): Log.error("Not supported") def count(self, row): pass - def done_count(self): - pass + # DO NOT IMPLEMENT IF domain HAS KNOWN PARTITIONS + # def done_count(self): + # pass def get_value_from_row(self, row): raise NotImplementedError() @@ -129,7 +147,7 @@ class AggsDecoder(object): def get_value(self, index): raise NotImplementedError() - def get_index(self, row): + def get_index(self, row, es_query=None, index=None): raise NotImplementedError() @property @@ -155,70 +173,74 @@ class SetDecoder(AggsDecoder): parts = jx.sort(domain.partitions, {"value": domain.key, "sort": s.sort}) edge.domain = self.domain = SimpleSetDomain(key=domain.key, label=domain.label, partitions=parts) - def append_query(self, es_query, start): - self.start = start + def append_query(self, query_path, es_query): domain = self.domain - domain_key = domain.key - include, text_include = transpose(*( - ( - float(v) if isinstance(v, (int, float)) else v, - text_type(float(v)) if isinstance(v, (int, float)) else v - ) - for v in (p[domain_key] for p in domain.partitions) - )) - value = self.edge.value - exists = AndOp("and", [ - value.exists(), - InOp("in", [value, Literal("literal", include)]) - ]).partial_eval() + value = Painless[self.edge.value] + cnv = pull_functions[value.type] + include = tuple(cnv(p[domain_key]) for p in domain.partitions) + + exists = Painless[AndOp([ + InOp([value, Literal(include)]) + ])].partial_eval() limit = coalesce(self.limit, len(domain.partitions)) - if isinstance(value, Variable): - es_field = self.query.frum.schema.leaves(value.var)[0].es_column # ALREADY CHECKED THERE IS ONLY ONE - terms = set_default({"terms": { - "field": es_field, - "size": limit, - "order": {"_term": self.sorted} if self.sorted else None - }}, es_query) - else: - terms = set_default({"terms": { - "script": { - "lang": "painless", - "inline": value.to_es_script(self.schema).script(self.schema) + if is_op(value, Variable): + es_field = first(self.query.frum.schema.leaves(value.var)).es_column # ALREADY CHECKED THERE IS ONLY ONE + match = TermsAggs( + "_match", + { + "field": es_field, + "size": limit, + "order": {"_term": self.sorted} if self.sorted else None }, - "size": limit - }}, es_query) - - if self.edge.allowNulls: - missing = set_default( - {"filter": NotOp("not", exists).to_esfilter(self.schema)}, - es_query + self ) else: - missing = None + match = TermsAggs( + "_match", + { + "script": text(value.to_es_script(self.schema)), + "size": limit + }, + self + ) + output = Aggs().add(FilterAggs("_filter", exists, None).add(match.add(es_query))) - return wrap({"aggs": { - "_match": { - "filter": exists.to_esfilter(self.schema), - "aggs": { - "_filter": terms - } - }, - "_missing": missing - }}) + if self.edge.allowNulls: + # FIND NULLS AT EACH NESTED LEVEL + for p in self.schema.query_path: + if p == query_path: + # MISSING AT THE QUERY DEPTH + output.add( + NestedAggs(p).add(FilterAggs("_missing0", NotOp(exists), self).add(es_query)) + ) + else: + # PARENT HAS NO CHILDREN, SO MISSING + column = first(self.schema.values(query_path, (OBJECT, EXISTS))) + output.add( + NestedAggs(column.nested_path[0]).add( + FilterAggs( + "_missing1", + NotOp(ExistsOp(Variable(column.es_column.replace(NESTED_TYPE, EXISTS_TYPE)))), + self + ).add(es_query) + ) + ) + return output def get_value(self, index): return self.domain.getKeyByIndex(index) - def get_value_from_row(self, row): - return self.pull(row[self.start].get('key')) + def get_value_from_row(self, parts): + key = parts[0].get('key') + return self.pull(key) - def get_index(self, row): + def get_index(self, row, es_query=None, index=None): try: - part = row[self.start] - return self.domain.getIndexByKey(part.get('key')) + key = row[0].get('key') + return self.domain.getIndexByKey(key) except Exception as e: Log.error("problem", cause=e) @@ -227,52 +249,43 @@ class SetDecoder(AggsDecoder): return 1 -def _range_composer(edge, domain, es_query, to_float, schema): +def _range_composer(self, edge, domain, es_query, to_float, schema): # USE RANGES _min = coalesce(domain.min, MIN(domain.partitions.min)) _max = coalesce(domain.max, MAX(domain.partitions.max)) + output = Aggs() if edge.allowNulls: - missing_filter = set_default( - { - "filter": NotOp("not", AndOp("and", [ - edge.value.exists(), - InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]), - InequalityOp("lt", [edge.value, Literal(None, to_float(_max))]) - ]).partial_eval()).to_esfilter(schema) - }, - es_query - ) - else: - missing_filter = None + output.add(FilterAggs( + "_missing", + NotOp(AndOp([ + edge.value.exists(), + GteOp([edge.value, Literal(to_float(_min))]), + LtOp([edge.value, Literal(to_float(_max))]) + ]).partial_eval()), + self + ).add(es_query)) - if isinstance(edge.value, Variable): - calc = {"field": schema.leaves(edge.value.var)[0].es_column} + if is_op(edge.value, Variable): + calc = {"field": first(schema.leaves(edge.value.var)).es_column} else: - calc = {"script": edge.value.to_es_script(schema).script(schema)} + calc = {"script": text(Painless[edge.value].to_es_script(schema))} + calc['ranges'] = [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions] - return wrap({"aggs": { - "_match": set_default( - {"range": calc}, - {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}}, - es_query - ), - "_missing": missing_filter - }}) + return output.add(RangeAggs("_match", calc, self).add(es_query)) class TimeDecoder(AggsDecoder): - def append_query(self, es_query, start): - self.start = start + def append_query(self, query_path, es_query): schema = self.query.frum.schema - return _range_composer(self.edge, self.edge.domain, es_query, lambda x: x.unix, schema) + return _range_composer(self, self.edge, self.edge.domain, es_query, lambda x: x.unix, schema) def get_value(self, index): return self.edge.domain.getKeyByIndex(index) - def get_index(self, row): + def get_index(self, row, es_query=None, index=None): domain = self.edge.domain - part = row[self.start] + part = row[0] if part == None: return len(domain.partitions) @@ -309,35 +322,31 @@ class GeneralRangeDecoder(AggsDecoder): else: Log.error("Unknown domain of type {{type}} for range edge", type=edge.domain.type) - def append_query(self, es_query, start): - self.start = start - + def append_query(self, query_path, es_query): edge = self.edge range = edge.range domain = edge.domain - aggs = {} + aggs = Aggs() for i, p in enumerate(domain.partitions): - filter_ = AndOp("and", [ - InequalityOp("lte", [range.min, Literal("literal", self.to_float(p.min))]), - InequalityOp("gt", [range.max, Literal("literal", self.to_float(p.min))]) + filter_ = AndOp([ + LteOp([range.min, Literal(self.to_float(p.min))]), + GtOp([range.max, Literal(self.to_float(p.min))]) ]) - aggs["_join_" + text_type(i)] = set_default( - {"filter": filter_.to_esfilter(self.schema)}, - es_query - ) + aggs.add(FilterAggs("_match" + text(i), filter_, self).add(es_query)) - return wrap({"aggs": aggs}) + return aggs def get_value(self, index): return self.edge.domain.getKeyByIndex(index) - def get_index(self, row): + def get_index(self, row, es_query=None, index=None): domain = self.edge.domain - part = row[self.start] + part = row[0] if part == None: return len(domain.partitions) - return part["_index"] + index = int(es_query.name[6:]) + return index @property def num_columns(self): @@ -349,42 +358,30 @@ class GeneralSetDecoder(AggsDecoder): EXPECTING ALL PARTS IN partitions TO HAVE A where CLAUSE """ - def append_query(self, es_query, start): - self.start = start - + def append_query(self, query_path, es_query): parts = self.edge.domain.partitions filters = [] notty = [] - for p in parts: w = p.where - filters.append(AndOp("and", [w] + notty).to_esfilter(self.schema)) - notty.append(NotOp("not", w)) + filters.append(AndOp([w] + notty)) + notty.append(NotOp(w)) - missing_filter = None + output = Aggs().add(FiltersAggs("_match", filters, self).add(es_query)) if self.edge.allowNulls: # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER - missing_filter = set_default( - {"filter": AndOp("and", notty).to_esfilter(self.schema)}, - es_query - ) + output.add(FilterAggs("_missing", AndOp(notty), self).add(es_query)) - return wrap({"aggs": { - "_match": set_default( - {"filters": {"filters": filters}}, - es_query - ), - "_missing": missing_filter - }}) + return output def get_value(self, index): return self.edge.domain.getKeyByIndex(index) - def get_index(self, row): + def get_index(self, row, es_query=None, index=None): domain = self.edge.domain - part = row[self.start] - # if part == None: - # return len(domain.partitions) - return part.get("_index", len(domain.partitions)) + if index == None: + return len(domain.partitions) + else: + return index @property def num_columns(self): @@ -392,16 +389,15 @@ class GeneralSetDecoder(AggsDecoder): class DurationDecoder(AggsDecoder): - def append_query(self, es_query, start): - self.start = start - return _range_composer(self.edge, self.edge.domain, es_query, lambda x: x.seconds, self.schema) + def append_query(self, query_path, es_query): + return _range_composer(self, self.edge, self.edge.domain, es_query, lambda x: x.seconds, self.schema) def get_value(self, index): return self.edge.domain.getKeyByIndex(index) - def get_index(self, row): + def get_index(self, row, es_query=None, index=None): domain = self.edge.domain - part = row[self.start] + part = row[0] if part == None: return len(domain.partitions) @@ -423,16 +419,15 @@ class DurationDecoder(AggsDecoder): class RangeDecoder(AggsDecoder): - def append_query(self, es_query, start): - self.start = start - return _range_composer(self.edge, self.edge.domain, es_query, lambda x: x, self.schema) + def append_query(self, query_path, es_query): + return _range_composer(self, self.edge, self.edge.domain, es_query, lambda x: x, self.schema) def get_value(self, index): return self.edge.domain.getKeyByIndex(index) - def get_index(self, row): + def get_index(self, row, es_query=None, index=None): domain = self.edge.domain - part = row[self.start] + part = row[0] if part == None: return len(domain.partitions) @@ -455,36 +450,45 @@ class RangeDecoder(AggsDecoder): class MultivalueDecoder(SetDecoder): def __init__(self, edge, query, limit): - AggsDecoder.__init__(self, edge, query, limit) + SetDecoder.__init__(self, edge, query, limit) self.var = edge.value.var - self.values = query.frum.schema[edge.value.var][0].partitions self.parts = [] - def append_query(self, es_query, start): - self.start = start + def append_query(self, query_path, es_query): + es_field = first(self.query.frum.schema.leaves(self.var)).es_column - es_field = self.query.frum.schema.leaves(self.var)[0].es_column - es_query = wrap({"aggs": { - "_match": set_default({"terms": { - "script": expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'}) - }}, es_query) - }}) - - return es_query + return Aggs().add(TermsAggs("_match", { + "script": expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'}), + "size": self.limit + }, self).add(es_query)) def get_value_from_row(self, row): - values = row[self.start]['key'].replace("||", "\b").split("|") + values = row[0]['key'].replace("||", "\b").split("|") if len(values) == 2: return None - return unwraplist([v.replace("\b", "|") for v in values[1:-1]]) + t = tuple(v.replace("\b", "|") for v in sorted(values[1:-1])) - def get_index(self, row): + if len(t) == 0: + return None + elif len(t) == 1: + return t[0] + else: + return t + + def get_index(self, row, es_query=None, index=None): find = self.get_value_from_row(row) - try: - return self.parts.index(find) - except Exception: - self.parts.append(find) - return len(self.parts)-1 + return self.domain.getIndexByKey(find) + + def count(self, row): + value = self.get_value_from_row(row) + self.parts.append(value) + + def done_count(self): + self.edge.allowNulls = False + self.edge.domain = self.domain = SimpleSetDomain( + partitions=jx.sort(set(self.parts)) + ) + self.parts = None @property def num_columns(self): @@ -494,7 +498,7 @@ class MultivalueDecoder(SetDecoder): class ObjectDecoder(AggsDecoder): def __init__(self, edge, query, limit): AggsDecoder.__init__(self, edge, query, limit) - if isinstance(edge.value, LeavesOp): + if is_op(edge.value, LeavesOp): prefix = edge.value.term.var flatter = lambda k: literal_field(relative_field(k, prefix)) else: @@ -502,30 +506,29 @@ class ObjectDecoder(AggsDecoder): flatter = lambda k: relative_field(k, prefix) self.put, self.fields = transpose(*[ - (flatter(untype_path(c.names["."])), c.es_column) + (flatter(untype_path(c.name)), c.es_column) for c in query.frum.schema.leaves(prefix) ]) self.domain = self.edge.domain = wrap({"dimension": {"fields": self.fields}}) - self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) + self.domain.limit = temper_limit(self.domain.limit, query) self.parts = list() self.key2index = {} self.computed_domain = False - def append_query(self, es_query, start): - self.start = start + def append_query(self, query_path, es_query): + decoder = self for i, v in enumerate(self.fields): - nest = wrap({"aggs": { - "_match": set_default({"terms": { + nest = Aggs().add( + TermsAggs("_match", { "field": v, "size": self.domain.limit - }}, es_query), - "_missing": set_default( - {"filter": es_missing(v)}, - es_query - ) - }}) + }, decoder).add(es_query) + ).add( + FilterAggs("_missing", MissingOp(Variable(v)), decoder).add(es_query) + ) es_query = nest + decoder = None return es_query def count(self, row): @@ -542,7 +545,7 @@ class ObjectDecoder(AggsDecoder): partitions=[{"value": p, "dataIndex": i} for i, p in enumerate(self.parts)] ) - def get_index(self, row): + def get_index(self, row, es_query=None, index=None): value = self.get_value_from_row(row) if self.computed_domain: return self.domain.getIndexByKey(value) @@ -555,16 +558,20 @@ class ObjectDecoder(AggsDecoder): self.parts.append(value) return i - def get_value_from_row(self, row): - part = row[self.start:self.start + self.num_columns:] - if not part[0]['doc_count']: + def get_value_from_row(self, parts): + if not parts[0]['doc_count']: return None output = Data() - for k, v in transpose(self.put, part): - output[k] = v.get('key') + for k, v in transpose(self.put, parts): + v_key = v.get('key') + if v_key != None: + output[k] = v_key return output + def get_value(self, index): + return self.parts[index] + @property def num_columns(self): return len(self.fields) @@ -576,81 +583,61 @@ class DefaultDecoder(SetDecoder): def __init__(self, edge, query, limit): AggsDecoder.__init__(self, edge, query, limit) self.domain = edge.domain - self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) + self.domain.limit = temper_limit(self.domain.limit, query) self.parts = list() self.key2index = {} self.computed_domain = False - self.script = self.edge.value.partial_eval().to_es_script(self.schema) + self.script = Painless[self.edge.value].partial_eval().to_es_script(self.schema) self.pull = pull_functions[self.script.data_type] self.missing = self.script.miss.partial_eval() - self.exists = NotOp("not", self.missing).partial_eval() + self.exists = NotOp(self.missing).partial_eval() # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM - sort_candidates = [s for s in self.query.sort if s.value == self.edge.value] + sort_candidates = [s for s in query.sort if s.value == edge.value] if sort_candidates: self.es_order = {"_term": {1: "asc", -1: "desc"}[sort_candidates[0].sort]} else: self.es_order = None - def append_query(self, es_query, start): - self.start = start - - if not isinstance(self.edge.value, Variable): - if self.exists is TRUE: - # IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH) - output = wrap({"aggs": { - "_match": set_default( - {"terms": { - "script": {"lang": "painless", "inline": self.script.expr}, - "size": self.domain.limit, - "order": self.es_order - }}, - es_query - ) - }}) - else: - output = wrap({"aggs": { - "_match": { # _match AND _filter REVERSED SO _match LINES UP WITH _missing - "filter": self.exists.to_esfilter(self.schema), - "aggs": { - "_filter": set_default( - {"terms": { - "script": {"lang": "painless", "inline": self.script.expr}, - "size": self.domain.limit, - "order": self.es_order - }}, - es_query - ) - } - }, - "_missing": set_default( - {"filter": self.missing.to_esfilter(self.schema)}, - es_query - ) - }}) - return output + def append_query(self, query_path, es_query): + if is_op(self.edge.value, FirstOp) and is_op(self.edge.value.term, Variable): + self.edge.value = self.edge.value.term # ES USES THE FIRST TERM FOR {"terms": } AGGREGATION + output = Aggs() + if not is_op(self.edge.value, Variable): + terms = TermsAggs( + "_match", + { + "script": {"lang": "painless", "inline": self.script.expr}, + "size": self.domain.limit, + "order": self.es_order + }, + self + ) + output.add(FilterAggs("_filter", self.exists, None).add(terms.add(es_query))) else: - output = wrap({"aggs": { - "_match": set_default( - {"terms": { - "field": self.schema.leaves(self.edge.value.var)[0].es_column, - "size": self.domain.limit, - "order": self.es_order - }}, - es_query - ), - "_missing": set_default( - {"filter": self.missing.to_esfilter(self.schema)}, - es_query - ) - }}) - return output + terms = TermsAggs( + "_match", { + "field": first(self.schema.leaves(self.edge.value.var)).es_column, + "size": self.domain.limit, + "order": self.es_order + }, + self + ) + output.add(terms.add(es_query)) + + if self.edge.allowNulls: + output.add(FilterAggs("_missing", self.missing, self).add(es_query)) + return output def count(self, row): - part = row[self.start] + part = row[0] if part['doc_count']: - if part.get('key') != None: - self.parts.append(self.pull(part.get('key'))) + key = part.get('key') + if key != None: + try: + self.parts.append(self.pull(key)) + except Exception as e: + pass else: self.edge.allowNulls = True # OK! WE WILL ALLOW NULLS @@ -661,16 +648,16 @@ class DefaultDecoder(SetDecoder): self.parts = None self.computed_domain = True - def get_index(self, row): + def get_index(self, row, es_query=None, index=None): if self.computed_domain: try: - part = row[self.start] + part = row[0] return self.domain.getIndexByKey(self.pull(part.get('key'))) except Exception as e: Log.error("problem", cause=e) else: try: - part = row[self.start] + part = row[0] key = self.pull(part.get('key')) i = self.key2index.get(key) if i is None: @@ -693,42 +680,35 @@ class DimFieldListDecoder(SetDecoder): edge.allowNulls = False self.fields = edge.domain.dimension.fields self.domain = self.edge.domain - self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) + self.domain.limit = temper_limit(self.domain.limit, query) self.parts = list() - def append_query(self, es_query, start): - # TODO: USE "reverse_nested" QUERY TO PULL THESE - self.start = start + def append_query(self, query_path, es_query): + decoder = self for i, v in enumerate(self.fields): exists = v.exists().partial_eval() - nest = wrap({"aggs": {"_match": { - "filter": exists.to_esfilter(self.schema), - "aggs": {"_filter": set_default({"terms": { - "field": self.schema.leaves(v.var)[0].es_column, - "size": self.domain.limit - }}, es_query)} - }}}) - nest.aggs._missing = set_default( - {"filter": NotOp("not", exists).to_esfilter(self.schema)}, - es_query - ) + nest = Aggs() + nest.add(TermsAggs("_match", { + "field": first(self.schema.leaves(v.var)).es_column, + "size": self.domain.limit + }, decoder).add(es_query)) + nest.add(FilterAggs("_missing", NotOp(exists), decoder).add(es_query)) es_query = nest + decoder = None if self.domain.where: - filter_ = self.domain.where.partial_eval().to_esfilter(self.schema) - es_query = {"aggs": {"_filter": set_default({"filter": filter_}, es_query)}} + es_query = FilterAggs("_filter", self.domain.where, None).add(es_query) return es_query - def count(self, row): - part = row[self.start:self.start + len(self.fields):] - if part[0]['doc_count']: - value = tuple(p.get("key") for p in part) + def count(self, parts): + if parts[0]['doc_count']: + value = tuple(p.get("key") for p, f in zip(parts, self.fields)) self.parts.append(value) def done_count(self): - columns = map(text_type, range(len(self.fields))) - parts = wrap([{text_type(i): p for i, p in enumerate(part)} for part in set(self.parts)]) + columns = list(map(text, range(len(self.fields)))) + parts = wrap([{text(i): p for i, p in enumerate(part)} for part in set(self.parts)]) self.parts = None sorted_parts = jx.sort(parts, columns) @@ -737,20 +717,14 @@ class DimFieldListDecoder(SetDecoder): partitions=[{"value": tuple(v[k] for k in columns), "dataIndex": i} for i, v in enumerate(sorted_parts)] ) - def get_index(self, row): - part = row[self.start:self.start + len(self.fields):] - if part[0]['doc_count']==0: - return None - find = tuple(p.get("key") for p in part) - output = self.domain.getIndexByKey(find) - return output + def get_index(self, row, es_query=None, index=None): + if row[0]['doc_count']: + find = tuple(p.get("key") for p, f in zip(row, self.fields)) + output = self.domain.getIndexByKey(find) + return output + @property def num_columns(self): return len(self.fields) -pull_functions = { - STRING: lambda x: x, - NUMBER: lambda x: float(x) if x !=None else None, - BOOLEAN: string2boolean -} diff --git a/vendor/jx_elasticsearch/es52/deep.py b/vendor/jx_elasticsearch/es52/deep.py index 524c2e6..a1b47c2 100644 --- a/vendor/jx_elasticsearch/es52/deep.py +++ b/vendor/jx_elasticsearch/es52/deep.py @@ -5,38 +5,36 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http:# mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from jx_base.expressions import NULL +from jx_base.expressions import LeavesOp, NULL, Variable +from jx_base.language import is_op from jx_base.query import DEFAULT_LIMIT -from jx_elasticsearch import post as es_post -from jx_elasticsearch.es52.expressions import split_expression_by_depth, AndOp, Variable, LeavesOp -from jx_elasticsearch.es52.setop import format_dispatch, get_pull_function, get_pull -from jx_elasticsearch.es52.util import jx_sort_to_es_sort, es_query_template -from jx_python.expressions import compile_expression, jx_expression_to_function -from mo_dots import split_field, FlatList, listwrap, literal_field, coalesce, Data, concat_field, set_default, relative_field, startswith_field -from mo_json.typed_encoder import NESTED -from mo_json.typed_encoder import untype_path +from jx_elasticsearch.es52.expressions import AndOp, ES52, split_expression_by_depth, MATCH_ALL +from jx_elasticsearch.es52.set_op import set_formatters, get_pull, get_pull_function +from jx_elasticsearch.es52.util import es_query_template, jx_sort_to_es_sort +from jx_python.expressions import jx_expression_to_function +from mo_dots import Data, FlatList, coalesce, concat_field, is_list as is_list_, listwrap, literal_field, \ + relative_field, set_default, split_field, startswith_field, unwrap, wrap +from mo_future import zip_longest +from mo_json import NESTED +from mo_json.typed_encoder import untype_path, untyped from mo_logs import Log from mo_threads import Thread from mo_times.timer import Timer -from pyLibrary import convert EXPRESSION_PREFIX = "_expr." -_ = convert - def is_deepop(es, query): if query.edges or query.groupby: return False if all(s.aggregate not in (None, "none") for s in listwrap(query.select)): return False - if len(split_field(query.frum.name)) > 1: + # THE schema.name SHOWS THE REAL NESTED DEPTH + if len(split_field(query.frum.schema.name)) > 1: return True # ASSUME IT IS NESTED IF WE ARE ASKING FOR NESTED COLUMNS @@ -60,20 +58,19 @@ def es_deepop(es, query): # SPLIT WHERE CLAUSE BY DEPTH wheres = split_expression_by_depth(query.where, schema) - for i, f in enumerate(es_filters): - script = AndOp("and", wheres[i]).partial_eval().to_esfilter(schema) + for f, w in zip_longest(es_filters, wheres): + script = ES52[AndOp(w)].partial_eval().to_esfilter(schema) set_default(f, script) if not wheres[1]: + # INCLUDE DOCS WITH NO NESTED DOCS more_filter = { "bool": { - "filter": [AndOp("and", wheres[0]).partial_eval().to_esfilter(schema)], + "filter": [AndOp(wheres[0]).partial_eval().to_esfilter(schema)], "must_not": { "nested": { "path": query_path, - "query": { - "match_all": {} - } + "query": MATCH_ALL } } } @@ -85,80 +82,84 @@ def es_deepop(es, query): # es_query.sort = jx_sort_to_es_sort(query.sort) map_to_es_columns = schema.map_to_es() - # {c.names["."]: c.es_column for c in schema.leaves(".")} + # {c.name: c.es_column for c in schema.leaves(".")} query_for_es = query.map(map_to_es_columns) es_query.sort = jx_sort_to_es_sort(query_for_es.sort, schema) es_query.stored_fields = [] - is_list = isinstance(query.select, list) + is_list = is_list_(query.select) + selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)]) new_select = FlatList() - i = 0 - for s in listwrap(query.select): - if isinstance(s.value, LeavesOp) and isinstance(s.value.term, Variable): + put_index = 0 + for select in selects: + if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable): # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS - leaves = schema.leaves(s.value.term.var) + leaves = schema.leaves(select.value.term.var) col_names = set() for c in leaves: if c.nested_path[0] == ".": if c.jx_type == NESTED: continue es_query.stored_fields += [c.es_column] - c_name = untype_path(c.names[query_path]) + c_name = untype_path(relative_field(c.name, query_path)) col_names.add(c_name) new_select.append({ - "name": concat_field(s.name, c_name), + "name": concat_field(select.name, c_name), "nested_path": c.nested_path[0], - "put": {"name": concat_field(s.name, literal_field(c_name)), "index": i, "child": "."}, + "put": {"name": concat_field(select.name, literal_field(c_name)), "index": put_index, "child": "."}, "pull": get_pull_function(c) }) - i += 1 + put_index += 1 # REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS for n in new_select: if n.name.startswith("..") and n.name.lstrip(".") not in col_names: n.put.name = n.name = n.name.lstrip(".") col_names.add(n.name) - elif isinstance(s.value, Variable): - net_columns = schema.leaves(s.value.var) + elif is_op(select.value, Variable): + net_columns = schema.leaves(select.value.var) if not net_columns: new_select.append({ - "name": s.name, + "name": select.name, "nested_path": ".", - "put": {"name": s.name, "index": i, "child": "."}, + "put": {"name": select.name, "index": put_index, "child": "."}, "pull": NULL }) else: for n in net_columns: - pull = get_pull_function(n) if n.nested_path[0] == ".": if n.jx_type == NESTED: continue es_query.stored_fields += [n.es_column] - # WE MUST FIGURE OUT WHICH NAMESSPACE s.value.var IS USING SO WE CAN EXTRACT THE child + if len(n.nested_path[0]) > len(query_path): + # SELECTING INNER PROPERTIES IS NOT ALLOWED + continue + # WE MUST FIGURE OUT WHICH NAMESPACE s.value.var IS USING SO WE CAN EXTRACT THE child for np in n.nested_path: - c_name = untype_path(n.names[np]) - if startswith_field(c_name, s.value.var): - child = relative_field(c_name, s.value.var) + c_name = untype_path(relative_field(n.name, np)) + if startswith_field(c_name, select.value.var): + child = relative_field(c_name, select.value.var) break else: - child = relative_field(untype_path(n.names[n.nested_path[0]]), s.value.var) + raise Log.error("Not expected") + pull = get_pull_function(n) new_select.append({ - "name": s.name, + "name": select.name, "pull": pull, "nested_path": n.nested_path[0], "put": { - "name": s.name, - "index": i, + "name": select.name, + "index": put_index, "child": child } }) - i += 1 + put_index += 1 else: - expr = s.value + expr = select.value for v in expr.vars(): for c in schema[v.var]: if c.nested_path[0] == ".": @@ -166,35 +167,33 @@ def es_deepop(es, query): # else: # Log.error("deep field not expected") - pull_name = EXPRESSION_PREFIX + s.name + pull_name = EXPRESSION_PREFIX + select.name map_to_local = MapToLocal(schema) pull = jx_expression_to_function(pull_name) - post_expressions[pull_name] = compile_expression(expr.map(map_to_local).to_python()) + post_expressions[pull_name] = jx_expression_to_function(expr.map(map_to_local)) new_select.append({ - "name": s.name if is_list else ".", + "name": select.name if is_list else ".", "pull": pull, "value": expr.__data__(), - "put": {"name": s.name, "index": i, "child": "."} + "put": {"name": select.name, "index": put_index, "child": "."} }) - i += 1 + put_index += 1 # ES needs two calls to get all documents more = [] def get_more(please_stop): - more.append(es_post( - es, + more.append(es.search( Data( query=more_filter, stored_fields=es_query.stored_fields - ), - query.limit + ) )) if more_filter: need_more = Thread.run("get more", target=get_more) with Timer("call to ES") as call_timer: - data = es_post(es, es_query, query.limit) + data = es.search(es_query) # EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED def inners(): @@ -208,10 +207,10 @@ def es_deepop(es, query): Thread.join(need_more) for t in more[0].hits.hits: yield t - # + # try: - formatter, groupby_formatter, mime_type = format_dispatch[query.format] + formatter, row_formatter, mime_type = set_formatters[query.format] output = formatter(inners(), new_select, query) output.meta.timing.es = call_timer.duration diff --git a/vendor/jx_elasticsearch/es52/es_query.py b/vendor/jx_elasticsearch/es52/es_query.py new file mode 100644 index 0000000..5b52474 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/es_query.py @@ -0,0 +1,340 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +from __future__ import absolute_import, division, unicode_literals + +from jx_elasticsearch.es52.expressions import ES52 +from mo_dots import is_data, is_list, startswith_field +from mo_future import text +from mo_json import value2json +from mo_logs import Log + +_new = object.__new__ + + +class Aggs(object): + + def __init__(self, name=None): + self.name = name + self.children = [] + self.decoders = [] + self.selects = [] + + def to_es(self, schema, query_path="."): + if self.children: + return {"aggs": { + name: t.to_es(schema, query_path) + for i, t in enumerate(self.children) + for name in [t.name if t.name else "_" + text(i)] + }} + else: + return {} + + def add(self, child): + self.children.append(child) + return self + + def __eq__(self, other): + if self is other: + return True + return isinstance(other, Aggs) and self.name == other.name + + def merge(self, other): + if self != other: + return False + self.children.extend(other.children) + self.decoders.extend(other.decoders) + return True + + def __str__(self): + return value2json(self.to_es) + + def copy(self): + output = _new(self.__class__) + output.name = self.name + output.children = self.children[:] + output.decoders = self.decoders[:] + output.selects = self.selects[:] + return output + + +class ExprAggs(Aggs): + + def __init__(self, name, expr, select): + Aggs.__init__(self, name) + self.expr = expr + if not select: + Log.error("Expecting a select") + + self.selects = [select] + + def __eq__(self, other): + if self is other: + return True + return isinstance(other, ExprAggs) and self.name == other.name and self.expr == other.expr + + def merge(self, other): + if self != other: + return False + self.expr += other.expr + self.children.extend(other.children) + self.decoders.extend(other.decoders) + self.selects.extend(other.selects) + return True + + def to_es(self, schema, query_path="."): + self.expr['aggs'] = Aggs.to_es(self, schema, query_path).get('aggs') + return self.expr + + def copy(self): + output = Aggs.copy(self) + output.expr = self.expr + return output + + +class CountAggs(Aggs): + # DO A DOC COUNT + + def __init__(self, select): + Aggs.__init__(self, None) + if not select: + Log.error("Expecting a select") + self.selects = [select] + + def __eq__(self, other): + if self is other: + return True + return all(s is t for s, t in zip(self.selects, other.selects)) + + def to_es(self, schema, query_path="."): + return None # NO NEED TO WRITE ANYTHING + + +class FilterAggs(Aggs): + def __init__(self, name, filter, decoder): + Aggs.__init__(self, name) + self.filter = filter + if is_data(filter): + Log.error("programming error") + self.decoders = [decoder] if decoder else [] + + def __eq__(self, other): + if self is other: + return True + return isinstance(other, FilterAggs) and self.name == other.name and self.filter == other.filter + + def merge(self, other): + if self != other: + return False + self.children.extend(other.children) + self.decoders.extend(other.decoders) + return True + + def to_es(self, schema, query_path="."): + output = Aggs.to_es(self, schema, query_path) + output['filter'] = ES52[self.filter].partial_eval().to_esfilter(schema) + return output + + def copy(self): + output = Aggs.copy(self) + output.filter = self.filter + return output + + +class FiltersAggs(Aggs): + def __init__(self, name, filters, decoder): + Aggs.__init__(self, name) + self.filters = filters + self.decoders = [decoder] if decoder else [] + if not is_list(filters): + Log.error("expecting a list") + + def __eq__(self, other): + if self is other: + return True + return isinstance(other, FiltersAggs) and self.name == other.name and self.filters == other.filters + + def merge(self, other): + if self != other: + return False + self.children.extend(other.children) + self.decoders.extend(other.decoders) + return True + + def to_es(self, schema, query_path="."): + output = Aggs.to_es(self, schema, query_path) + output['filters'] = {"filters": [f.partial_eval().to_esfilter(schema) for f in self.filters]} + return output + + def copy(self): + output = Aggs.copy(self) + output.filters = self.filters + return output + + +class NestedAggs(Aggs): + def __init__(self, path): + Aggs.__init__(self, "_nested") + self.path = path + + def __eq__(self, other): + if self is other: + return True + return isinstance(other, NestedAggs) and self.path == other.path + + def to_es(self, schema, query_path="."): + output = Aggs.to_es(self, schema, self.path) + if query_path == self.path: + Log.error("this should have been cancelled out") + elif startswith_field(self.path, query_path): + output['nested'] = {"path": self.path} + else: + output["reverse_nested"] = {"path": None if self.path == "." else self.path} + return output + + def __eq__(self, other): + if self is other: + return True + return isinstance(other, NestedAggs) and self.path == other.path + + def copy(self): + output = Aggs.copy(self) + output.path = self.path + return output + + +class TermsAggs(Aggs): + def __init__(self, name, terms, decoder): + Aggs.__init__(self, name) + self.terms = terms + self.decoders = [decoder] if decoder else [] + + def __eq__(self, other): + if self is other: + return True + return isinstance(other, TermsAggs) and self.name == other.name and self.terms == other.terms + + def to_es(self, schema, query_path="."): + output = Aggs.to_es(self, schema, query_path) + output['terms'] = self.terms + return output + + def copy(self): + output = Aggs.copy(self) + output.terms = self.terms + return output + + +class RangeAggs(Aggs): + def __init__(self, name, expr, decoder): + Aggs.__init__(self, name) + self.expr = expr + self.decoders = [decoder] if decoder else [] + + def __eq__(self, other): + if self is other: + return True + return isinstance(other, RangeAggs) and self.name == other.name and self.expr == other.expr + + def to_es(self, schema, query_path="."): + output = Aggs.to_es(self, schema, query_path) + output['range'] = self.expr + return output + + def copy(self): + output = Aggs.copy(self) + output.expr = self.expr + return output + + +def simplify(aggs): + # CONVERT FROM TREE TO UNION OF SEQUENCES + def depth_first(aggr): + if aggr.__class__ == Aggs: + # BASE CLASS Aggs IS ONLY A PLACEHOLDER + if not aggr.children: + yield tuple() + return + for c in aggr.children: + for path in depth_first(c): + yield path + elif not aggr.children: + yield (aggr,) + else: + for c in aggr.children: + for path in depth_first(c): + yield (aggr,) + path + + # CANCEL OUT REDUNDANT NESTED AGGS + combined = [] + for path in depth_first(aggs): + current_nested = NestedAggs(".") + prev = None + remove = [] + for step in path: + if isinstance(step, NestedAggs): + if prev is not None: + remove.append(prev) + prev = None + if current_nested is not None: + if current_nested.path == step.path: + remove.append(step) + continue + else: + pass + prev = step + else: + current_nested = prev if prev else current_nested + prev = None + + combined.append(tuple(p for p in path if not any(p is r for r in remove))) + + # COMMON FACTOR, CONVERT BACK TO TREE + def merge(aggregations): + output = [] + while True: + common_children = [] + first_found = None + common = None + for i, terms in enumerate(aggregations): + if not terms: + continue + term, rest = terms[0], terms[1:] + if first_found is None: + first_found = term + common_children.append(rest) + common = first_found.copy() + aggregations[i] = None + elif term == first_found: + common_children.append(rest) + common.selects.extend([t for t in term.selects if not any(t is s for s in common.selects)]) + common.decoders.extend([t for t in term.decoders if not any(t is d for d in common.decoders)]) + aggregations[i] = None + + if first_found is None: + return output + else: + common.children = merge(common_children) + output.append(common) + + merged = [trim_root(o) for o in merge(combined)] + + output = Aggs() + output.children = merged + return output + + +def trim_root(agg): + if isinstance(agg, NestedAggs) and agg.path == '.': + if len(agg.children) == 1: + return agg.children[0] + else: + output = Aggs() + output.children = agg.children + return output + else: + return agg diff --git a/vendor/jx_elasticsearch/es52/expressions.py b/vendor/jx_elasticsearch/es52/expressions.py deleted file mode 100644 index 98200b8..0000000 --- a/vendor/jx_elasticsearch/es52/expressions.py +++ /dev/null @@ -1,1442 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -import itertools - -from jx_base.expressions import Variable, TupleOp, LeavesOp, BinaryOp, OrOp, ScriptOp, \ - WhenOp, InequalityOp, extend, Literal, NullOp, TrueOp, FalseOp, DivOp, FloorOp, \ - EqOp, NeOp, NotOp, LengthOp, NumberOp, StringOp, CountOp, MultiOp, RegExpOp, CoalesceOp, MissingOp, ExistsOp, \ - PrefixOp, NotLeftOp, InOp, CaseOp, AndOp, \ - ConcatOp, IsNumberOp, Expression, BasicIndexOfOp, MaxOp, MinOp, BasicEqOp, BooleanOp, IntegerOp, BasicSubstringOp, ZERO, NULL, FirstOp, FALSE, TRUE, SuffixOp, simplified, ONE -from jx_elasticsearch.es52.util import es_not, es_script, es_or, es_and, es_missing -from mo_dots import coalesce, wrap, Null, set_default, literal_field -from mo_future import text_type -from mo_json.typed_encoder import NUMBER, STRING, BOOLEAN, OBJECT, INTEGER -from mo_logs import Log, suppress_exception -from mo_logs.strings import expand_template, quote -from mo_math import MAX, OR -from mo_times import Date -from pyLibrary.convert import string2regexp - -NUMBER_TO_STRING = """ -Optional.of({{expr}}).map( - value -> { - String output = String.valueOf(value); - if (output.endsWith(".0")) output = output.substring(0, output.length() - 2); - return output; - } -).orElse(null) -""" - -LIST_TO_PIPE = """ -StringBuffer output=new StringBuffer(); -for(String s : {{expr}}){ - output.append("|"); - String sep2=""; - StringTokenizer parts = new StringTokenizer(s, "|"); - while (parts.hasMoreTokens()){ - output.append(sep2); - output.append(parts.nextToken()); - sep2="||"; - }//for -}//for -output.append("|"); -return output.toString() -""" - - - -class EsScript(Expression): - __slots__ = ("miss", "data_type", "expr", "many") - - def __init__(self, type, expr, frum, miss=None, many=False): - self.miss = coalesce(miss, FALSE) # Expression that will return true/false to indicate missing result - self.data_type = type - self.expr = expr - self.many = many # True if script returns multi-value - self.frum = frum - - @property - def type(self): - return self.data_type - - def script(self, schema): - """ - RETURN A SCRIPT SUITABLE FOR CODE OUTSIDE THIS MODULE (NO KNOWLEDGE OF Painless) - :param schema: - :return: - """ - missing = self.miss.partial_eval() - if missing is FALSE: - return self.partial_eval().to_es_script(schema).expr - elif missing is TRUE: - return "null" - - return "(" + missing.to_es_script(schema).expr + ")?null:(" + self.expr + ")" - - def to_esfilter(self, schema): - return {"script": es_script(self.script(schema))} - - def to_es_script(self, schema): - return self - - def missing(self): - return self.miss - - def __data__(self): - return {"script": self.script} - - def __eq__(self, other): - if not isinstance(other, EsScript): - return False - elif self.expr==other.expr: - return True - else: - return False - - -@extend(BinaryOp) -def to_es_script(self, schema): - lhs = NumberOp("number", self.lhs).partial_eval().to_es_script(schema).expr - rhs = NumberOp("number", self.rhs).partial_eval().to_es_script(schema).expr - script = "(" + lhs + ") " + BinaryOp.operators[self.op] + " (" + rhs + ")" - missing = OrOp("or", [self.lhs.missing(), self.rhs.missing()]) - - return WhenOp( - "when", - missing, - **{ - "then": self.default, - "else": - EsScript(type=NUMBER, expr=script, frum=self) - } - ).partial_eval().to_es_script(schema) - - -@extend(BinaryOp) -def to_esfilter(self, schema): - if not isinstance(self.lhs, Variable) or not isinstance(self.rhs, Literal) or self.op in BinaryOp.operators: - return self.to_es_script(schema).to_esfilter(schema) - - if self.op in ["eq", "term"]: - return {"term": {self.lhs.var: self.rhs.to_esfilter(schema)}} - elif self.op in ["ne", "neq"]: - return es_not({"term": {self.lhs.var: self.rhs.to_esfilter(schema)}}) - elif self.op in BinaryOp.ineq_ops: - return {"range": {self.lhs.var: {self.op: self.rhs.value}}} - else: - Log.error("Logic error") - - -@extend(CaseOp) -def to_es_script(self, schema): - acc = self.whens[-1].partial_eval().to_es_script(schema) - for w in reversed(self.whens[0:-1]): - acc = WhenOp( - "when", - w.when, - **{"then": w.then, "else": acc} - ).partial_eval().to_es_script(schema) - return acc - - -@extend(CaseOp) -def to_esfilter(self, schema): - if self.type == BOOLEAN: - return OrOp( - "or", - [ - AndOp("and", [w.when, w.then]) - for w in self.whens[:-1] - ] + - self.whens[-1:] - ).partial_eval().to_esfilter(schema) - else: - Log.error("do not know how to handle") - return ScriptOp("script", self.to_es_script(schema).script(schema)).to_esfilter(schema) - - -@extend(ConcatOp) -def to_esfilter(self, schema): - if isinstance(self.value, Variable) and isinstance(self.find, Literal): - return {"regexp": {self.value.var: ".*" + string2regexp(self.find.value) + ".*"}} - else: - return ScriptOp("script", self.to_es_script(schema).script(schema)).to_esfilter(schema) - - -@extend(ConcatOp) -def to_es_script(self, schema): - if len(self.terms) == 0: - return self.default.to_es_script(schema) - - acc = [] - separator = StringOp("string", self.separator).partial_eval() - sep = separator.to_es_script(schema).expr - for t in self.terms: - val = WhenOp( - "when", - t.missing(), - **{ - "then": Literal("literal", ""), - "else": EsScript(type=STRING, expr=sep + "+" + StringOp(None, t).partial_eval().to_es_script(schema).expr, frum=t) - # "else": ConcatOp("concat", [sep, t]) - } - ) - acc.append("(" + val.partial_eval().to_es_script(schema).expr + ")") - expr_ = "(" + "+".join(acc) + ").substring(" + LengthOp("length", separator).to_es_script(schema).expr + ")" - - if isinstance(self.default, NullOp): - return EsScript( - miss=self.missing(), - type=STRING, - expr=expr_, - frum=self - ) - else: - return EsScript( - miss=self.missing(), - type=STRING, - expr="((" + expr_ + ").length==0) ? (" + self.default.to_es_script(schema).expr + ") : (" + expr_ + ")", - frum=self - ) - - -@extend(Literal) -def to_es_script(self, schema): - def _convert(v): - if v is None: - return NULL.to_es_script(schema) - if v is True: - return EsScript( - type=BOOLEAN, - expr="true", - frum=self - ) - if v is False: - return EsScript( - type=BOOLEAN, - expr="false", - frum=self - ) - if isinstance(v, text_type): - return EsScript( - type=STRING, - expr=quote(v), - frum=self - ) - if isinstance(v, int): - return EsScript( - type=INTEGER, - expr=text_type(v), - frum=self - ) - if isinstance(v, float): - return EsScript( - type=NUMBER, - expr=text_type(v), - frum=self - ) - if isinstance(v, dict): - return EsScript( - type=OBJECT, - expr="[" + ", ".join(quote(k) + ": " + _convert(vv) for k, vv in v.items()) + "]", - frum=self - ) - if isinstance(v, (list, tuple)): - return EsScript( - type=OBJECT, - expr="[" + ", ".join(_convert(vv).expr for vv in v) + "]", - frum=self - ) - if isinstance(v, Date): - return EsScript( - type=NUMBER, - expr=text_type(v.unix), - frum=self - ) - - return _convert(self.term) - - -@extend(CoalesceOp) -def to_es_script(self, schema): - if not self.terms: - return NULL.to_es_script(schema) - - v = self.terms[-1] - acc = FirstOp("first", v).partial_eval().to_es_script(schema) - for v in reversed(self.terms[:-1]): - m = v.missing().partial_eval() - e = NotOp("not", m).partial_eval().to_es_script(schema) - r = FirstOp("first", v).partial_eval().to_es_script(schema) - - if r.miss is TRUE: - continue - elif r.miss is FALSE: - acc = r - continue - elif acc.type == r.type: - new_type = r.type - elif acc.type == NUMBER and r.type == INTEGER: - new_type = NUMBER - elif acc.type == INTEGER and r.type == NUMBER: - new_type = NUMBER - else: - new_type = OBJECT - - acc = EsScript( - miss=AndOp("and", [acc.miss, m]).partial_eval(), - type=new_type, - expr="(" + e.expr + ") ? (" + r.expr + ") : (" + acc.expr + ")", - frum=self - ) - return acc - - -@extend(CoalesceOp) -def to_esfilter(self, schema): - return {"bool": {"should": [{"exists": {"field": v}} for v in self.terms]}} - - -@extend(ExistsOp) -def to_es_script(self, schema): - return self.field.exists().partial_eval().to_es_script(schema) - - -@extend(ExistsOp) -def to_esfilter(self, schema): - return self.field.exists().partial_eval().to_esfilter(schema) - - -@extend(Literal) -def to_esfilter(self, schema): - return self.json - - -@extend(NullOp) -def to_es_script(self, schema): - return EsScript( - miss=TRUE, - type=OBJECT, - expr="null", - frum=self - ) - -@extend(NullOp) -def to_esfilter(self, schema): - return es_not({"match_all": {}}) - - -@extend(FalseOp) -def to_es_script(self, schema): - return EsScript(type=BOOLEAN, expr="false", frum=self) - - -@extend(FalseOp) -def to_esfilter(self, schema): - return MATCH_NONE - - -@extend(TupleOp) -def to_esfilter(self, schema): - Log.error("not supported") - - -@extend(TupleOp) -def to_es_script(self, schema): - terms = [FirstOp("first", t).partial_eval().to_es_script(schema) for t in self.terms] - expr = 'new Object[]{'+','.join(t.expr for t in terms)+'}' - return EsScript( - type=OBJECT, - expr=expr, - miss=FALSE, - many=FALSE, - frum=self - ) - - -@extend(LeavesOp) -def to_es_script(self, schema): - Log.error("not supported") - - -@extend(LeavesOp) -def to_esfilter(self, schema): - Log.error("not supported") - - -@extend(InequalityOp) -def to_es_script(self, schema): - lhs = NumberOp("number", self.lhs).partial_eval().to_es_script(schema).expr - rhs = NumberOp("number", self.rhs).partial_eval().to_es_script(schema).expr - script = "(" + lhs + ") " + InequalityOp.operators[self.op] + " (" + rhs + ")" - - output = WhenOp( - "when", - OrOp("or", [self.lhs.missing(), self.rhs.missing()]), - **{ - "then": FALSE, - "else": - EsScript(type=BOOLEAN, expr=script, frum=self) - } - ).partial_eval().to_es_script(schema) - return output - - -@extend(InequalityOp) -def to_esfilter(self, schema): - if isinstance(self.lhs, Variable) and isinstance(self.rhs, Literal): - cols = schema.leaves(self.lhs.var) - if not cols: - lhs = self.lhs.var # HAPPENS DURING DEBUGGING, AND MAYBE IN REAL LIFE TOO - elif len(cols) == 1: - lhs = schema.leaves(self.lhs.var)[0].es_column - else: - Log.error("operator {{op|quote}} does not work on objects", op=self.op) - return {"range": {lhs: {self.op: self.rhs.value}}} - else: - script = self.to_es_script(schema) - if script.miss is not FALSE: - Log.error("inequality must be decisive") - return {"script": es_script(script.expr)} - - -@extend(DivOp) -def to_es_script(self, schema): - lhs = NumberOp("number", self.lhs).partial_eval() - rhs = NumberOp("number", self.rhs).partial_eval() - script = "(" + lhs.to_es_script(schema).expr + ") / (" + rhs.to_es_script(schema).expr + ")" - - output = WhenOp( - "when", - OrOp("or", [self.lhs.missing(), self.rhs.missing(), EqOp("eq", [self.rhs, ZERO])]), - **{ - "then": self.default, - "else": EsScript(type=NUMBER, expr=script, frum=self) - } - ).partial_eval().to_es_script(schema) - - return output - - -@extend(DivOp) -def to_esfilter(self, schema): - return NotOp("not", self.missing()).partial_eval().to_esfilter(schema) - - -@extend(FloorOp) -def to_es_script(self, schema): - lhs = self.lhs.partial_eval().to_es_script(schema) - rhs = self.rhs.partial_eval().to_es_script(schema) - - if rhs.frum is ONE: - script = "(int)Math.floor(" + lhs.expr + ")" - else: - script = "Math.floor((" + lhs.expr + ") / (" + rhs.expr + "))*(" + rhs.expr + ")" - - output = WhenOp( - "when", - OrOp("or", [lhs.miss, rhs.miss, EqOp("eq", [self.rhs, ZERO])]), - **{ - "then": self.default, - "else": - EsScript( - type=NUMBER, - expr=script, - frum=self, - miss=FALSE - ) - } - ).to_es_script(schema) - return output - - -@extend(FloorOp) -def to_esfilter(self, schema): - Log.error("Logic error") - - -@simplified -@extend(EqOp) -def partial_eval(self): - lhs = self.lhs.partial_eval() - rhs = self.rhs.partial_eval() - return EqOp("eq", [lhs, rhs]) - - -@extend(EqOp) -def to_es_script(self, schema): - return CaseOp("case", [ - WhenOp("when", self.lhs.missing(), **{"then": self.rhs.missing()}), - WhenOp("when", self.rhs.missing(), **{"then": FALSE}), - BasicEqOp("eq", [self.lhs, self.rhs]) - ]).partial_eval().to_es_script(schema) - - -@extend(EqOp) -def to_esfilter(self, schema): - if isinstance(self.lhs, Variable) and isinstance(self.rhs, Literal): - rhs = self.rhs.value - lhs = self.lhs.var - cols = schema.leaves(lhs) - if cols: - lhs = cols[0].es_column - - if isinstance(rhs, list): - if len(rhs) == 1: - return {"term": {lhs: rhs[0]}} - else: - return {"terms": {lhs: rhs}} - else: - return {"term": {lhs: rhs}} - - else: - return CaseOp("case", [ - WhenOp("when", self.lhs.missing(), **{"then": self.rhs.missing()}), - WhenOp("when", self.rhs.missing(), **{"then": FALSE}), - BasicEqOp("eq", [self.lhs, self.rhs]) - ]).partial_eval().to_esfilter(schema) - - -@extend(BasicEqOp) -def to_es_script(self, schema): - lhs = self.lhs.partial_eval().to_es_script(schema) - rhs = self.rhs.partial_eval().to_es_script(schema) - - if lhs.many: - if rhs.many: - return AndOp("and", [ - EsScript(type=BOOLEAN, expr="(" + lhs.expr + ").size()==(" + rhs.expr + ").size()", frum=self), - EsScript(type=BOOLEAN, expr="(" + rhs.expr + ").containsAll(" + lhs.expr + ")", frum=self) - ]).to_es_script(schema) - else: - return EsScript(type=BOOLEAN, expr="(" + lhs.expr + ").contains(" + rhs.expr + ")",frum=self) - elif rhs.many: - return EsScript( - type=BOOLEAN, - expr="(" + rhs.expr + ").contains(" + lhs.expr + ")", - frum=self - ) - else: - return EsScript( - type=BOOLEAN, - expr="(" + lhs.expr + "==" + rhs.expr + ")", - frum=self - ) - - -@extend(BasicEqOp) -def to_esfilter(self, schema): - if isinstance(self.lhs, Variable) and isinstance(self.rhs, Literal): - lhs = self.lhs.var - cols = schema.leaves(lhs) - if cols: - lhs = cols[0].es_column - rhs = self.rhs.value - if isinstance(rhs, list): - if len(rhs) == 1: - return {"term": {lhs: rhs[0]}} - else: - return {"terms": {lhs: rhs}} - else: - return {"term": {lhs: rhs}} - else: - return self.to_es_script(schema).to_esfilter(schema) - - - -@extend(MissingOp) -def to_es_script(self, schema, not_null=False, boolean=True): - if isinstance(self.expr, Variable): - if self.expr.var == "_id": - return EsScript(type=BOOLEAN, expr="false", frum=self) - else: - columns = schema.leaves(self.expr.var) - if len(columns) == 1: - return EsScript(type=BOOLEAN, expr="doc[" + quote(columns[0].es_column) + "].empty", frum=self) - else: - return AndOp("and", [ - EsScript( - type=BOOLEAN, - expr="doc[" + quote(c.es_column) + "].empty", - frum=self - ) - for c in columns - ]).partial_eval().to_es_script(schema) - elif isinstance(self.expr, Literal): - return self.expr.missing().to_es_script(schema) - else: - return self.expr.missing().partial_eval().to_es_script(schema) - - -@extend(MissingOp) -def to_esfilter(self, schema): - if isinstance(self.expr, Variable): - cols = schema.leaves(self.expr.var) - if not cols: - return {"match_all": {}} - elif len(cols) == 1: - return es_missing(cols[0].es_column) - else: - return es_and([ - es_missing(c.es_column) for c in cols - ]) - else: - return ScriptOp("script", self.to_es_script(schema).script(schema)).to_esfilter(schema) - - -@extend(NotLeftOp) -def to_es_script(self, schema): - v = StringOp("string", self.value).partial_eval().to_es_script(schema).expr - l = NumberOp("number", self.length).partial_eval().to_es_script(schema).expr - - expr = "(" + v + ").substring((int)Math.max(0, (int)Math.min(" + v + ".length(), " + l + ")))" - return EsScript( - miss=OrOp("or", [self.value.missing(), self.length.missing()]), - type=STRING, - expr=expr, - frum=self - ) - - -@extend(NeOp) -def to_es_script(self, schema): - return CaseOp("case", [ - WhenOp("when", self.lhs.missing(), **{"then": NotOp("not", self.rhs.missing())}), - WhenOp("when", self.rhs.missing(), **{"then": NotOp("not", self.lhs.missing())}), - NotOp("not", BasicEqOp("eq", [self.lhs, self.rhs])) - ]).partial_eval().to_es_script(schema) - - -@extend(NeOp) -def to_esfilter(self, schema): - if isinstance(self.lhs, Variable) and isinstance(self.rhs, Literal): - columns = schema.values(self.lhs.var) - if len(columns) == 0: - return {"match_all": {}} - elif len(columns) == 1: - return es_not({"term": {columns[0].es_column: self.rhs.value}}) - else: - Log.error("column split to multiple, not handled") - else: - lhs = self.lhs.partial_eval().to_es_script(schema) - rhs = self.rhs.partial_eval().to_es_script(schema) - - if lhs.many: - if rhs.many: - return es_not( - ScriptOp( - "script", - ( - "(" + lhs.expr + ").size()==(" + rhs.expr + ").size() && " + - "(" + rhs.expr + ").containsAll(" + lhs.expr + ")" - ) - ).to_esfilter(schema) - ) - else: - return es_not( - ScriptOp("script", "(" + lhs.expr + ").contains(" + rhs.expr + ")").to_esfilter(schema) - ) - else: - if rhs.many: - return es_not( - ScriptOp("script", "(" + rhs.expr + ").contains(" + lhs.expr + ")").to_esfilter(schema) - ) - else: - return es_not( - ScriptOp("script", "(" + lhs.expr + ") != (" + rhs.expr + ")").to_esfilter(schema) - ) - -@extend(NotOp) -def to_es_script(self, schema): - return EsScript( - type=BOOLEAN, - expr="!(" + self.term.to_es_script(schema).expr + ")", - frum=self - ) - - -@extend(NotOp) -def to_esfilter(self, schema): - if isinstance(self.term, MissingOp) and isinstance(self.term.expr, Variable): - v = self.term.expr.var - cols = schema.leaves(v) - if cols: - v = cols[0].es_column - return {"exists": {"field": v}} - else: - operand = self.term.to_esfilter(schema) - return es_not(operand) - - -@extend(AndOp) -def to_es_script(self, schema): - if not self.terms: - return TRUE.to_es_script() - else: - return EsScript( - miss=FALSE, - type=BOOLEAN, - expr=" && ".join("(" + t.to_es_script(schema).expr + ")" for t in self.terms), - frum=self - ) - - -@extend(AndOp) -def to_esfilter(self, schema): - if not len(self.terms): - return {"match_all": {}} - else: - return es_and([t.to_esfilter(schema) for t in self.terms]) - - -@extend(OrOp) -def to_es_script(self, schema): - return EsScript( - miss=FALSE, - type=BOOLEAN, - expr=" || ".join("(" + t.to_es_script(schema).expr + ")" for t in self.terms if t), - frum=self - ) - - -@extend(OrOp) -def to_esfilter(self, schema): - # TODO: REPLICATE THIS WHOLE expression.py SO IT IS CLEAR ES5 QUERIES ARE A BIT DIFFERENT - if schema.snowflake.namespace.es_cluster.version.startswith("5."): - # VERSION 5.2.x - # WE REQUIRE EXIT-EARLY SEMANTICS, OTHERWISE EVERY EXPRESSION IS A SCRIPT EXPRESSION - # {"bool":{"should" :[a, b, c]}} RUNS IN PARALLEL - # {"bool":{"must_not":[a, b, c]}} ALSO RUNS IN PARALLEL - - # OR(x) == NOT(AND(NOT(xi) for xi in x)) - output = es_not(es_and([ - NotOp("not", t).partial_eval().to_esfilter(schema) - for t in self.terms - ])) - return output - else: - # VERSION 6.2 - return es_or([t.partial_eval().to_esfilter(schema) for t in self.terms]) - - -@extend(LengthOp) -def to_es_script(self, schema): - value = StringOp("string", self.term).to_es_script(schema) - missing = self.term.missing().partial_eval() - return EsScript( - miss=missing, - type=INTEGER, - expr="(" + value.expr + ").length()", - frum=self - ) - - -@extend(FirstOp) -def to_es_script(self, schema): - if isinstance(self.term, Variable): - columns = schema.values(self.term.var) - if len(columns) == 1: - return self.term.to_es_script(schema, many=False) - - term = self.term.to_es_script(schema) - - if isinstance(term.frum, CoalesceOp): - return CoalesceOp("coalesce", [FirstOp("first", t.partial_eval().to_es_script(schema)) for t in term.frum.terms]).to_es_script(schema) - - if term.many: - return EsScript( - miss=term.miss, - type=term.type, - expr="(" + term.expr + ")[0]", - frum=term.frum - ).to_es_script(schema) - else: - return term - - - -@extend(BooleanOp) -def to_es_script(self, schema): - value = self.term.to_es_script(schema) - if value.many: - return BooleanOp("boolean", EsScript( - miss=value.miss, - type=value.type, - expr="(" + value.expr + ")[0]", - frum=value.frum - )).to_es_script(schema) - elif value.type == BOOLEAN: - miss = value.miss - value.miss = FALSE - return WhenOp("when", miss, **{"then": FALSE, "else": value}).partial_eval().to_es_script(schema) - else: - return NotOp("not", value.miss).partial_eval().to_es_script(schema) - -@extend(BooleanOp) -def to_esfilter(self, schema): - if isinstance(self.term, Variable): - return {"term": {self.term.var: True}} - else: - return self.to_es_script(schema).to_esfilter(schema) - -@extend(IntegerOp) -def to_es_script(self, schema): - value = self.term.to_es_script(schema) - if value.many: - return IntegerOp("integer", EsScript( - miss=value.missing, - type=value.type, - expr="(" + value.expr + ")[0]", - frum=value.frum - )).to_es_script(schema) - elif value.type == BOOLEAN: - return EsScript( - miss=value.missing, - type=INTEGER, - expr=value.expr + " ? 1 : 0", - frum=self - ) - elif value.type == INTEGER: - return value - elif value.type == NUMBER: - return EsScript( - miss=value.missing, - type=INTEGER, - expr="(int)(" + value.expr + ")", - frum=self - ) - elif value.type == STRING: - return EsScript( - miss=value.missing, - type=INTEGER, - expr="Integer.parseInt(" + value.expr + ")", - frum=self - ) - else: - return EsScript( - miss=value.missing, - type=INTEGER, - expr="((" + value.expr + ") instanceof String) ? Integer.parseInt(" + value.expr + ") : (int)(" + value.expr + ")", - frum=self - ) - -@extend(NumberOp) -def to_es_script(self, schema): - term = FirstOp("first", self.term).partial_eval() - value = term.to_es_script(schema) - - if isinstance(value.frum, CoalesceOp): - return CoalesceOp("coalesce", [NumberOp("number", t).partial_eval().to_es_script(schema) for t in value.frum.terms]).to_es_script(schema) - - if value.type == BOOLEAN: - return EsScript( - miss=term.missing().partial_eval(), - type=NUMBER, - expr=value.expr + " ? 1 : 0", - frum=self - ) - elif value.type == INTEGER: - return EsScript( - miss=term.missing().partial_eval(), - type=NUMBER, - expr=value.expr, - frum=self - ) - elif value.type == NUMBER: - return EsScript( - miss=term.missing().partial_eval(), - type=NUMBER, - expr=value.expr, - frum=self - ) - elif value.type == STRING: - return EsScript( - miss=term.missing().partial_eval(), - type=NUMBER, - expr="Double.parseDouble(" + value.expr + ")", - frum=self - ) - elif value.type == OBJECT: - return EsScript( - miss=term.missing().partial_eval(), - type=NUMBER, - expr="((" + value.expr + ") instanceof String) ? Double.parseDouble(" + value.expr + ") : (" + value.expr + ")", - frum=self - ) - - -@extend(IsNumberOp) -def to_es_script(self, schema): - value = self.term.to_es_script(schema) - if value.expr or value.i: - return TRUE.to_es_script(schema) - else: - return EsScript( - miss=FALSE, - type=BOOLEAN, - expr="(" + value.expr + ") instanceof java.lang.Double", - frum=self - ) - -@extend(CountOp) -def to_es_script(self, schema): - return EsScript( - miss=FALSE, - type=INTEGER, - expr="+".join("((" + t.missing().partial_eval().to_es_script(schema).expr + ") ? 0 : 1)" for t in self.terms), - frum=self - ) - - -@extend(LengthOp) -def to_esfilter(self, schema): - return {"regexp": {self.var.var: self.pattern.value}} - - -@extend(MaxOp) -def to_es_script(self, schema): - acc = NumberOp("number", self.terms[-1]).partial_eval().to_es_script(schema).expr - for t in reversed(self.terms[0:-1]): - acc = "Math.max(" + NumberOp("number", t).partial_eval().to_es_script(schema).expr + " , " + acc + ")" - return EsScript( - miss=AndOp("or", [t.missing() for t in self.terms]), - type=NUMBER, - expr=acc, - frum=self - ) - - -@extend(MinOp) -def to_es_script(self, schema): - acc = NumberOp("number", self.terms[-1]).partial_eval().to_es_script(schema).expr - for t in reversed(self.terms[0:-1]): - acc = "Math.min(" + NumberOp("number", t).partial_eval().to_es_script(schema).expr + " , " + acc + ")" - return EsScript( - miss=AndOp("or", [t.missing() for t in self.terms]), - type=NUMBER, - expr=acc, - frum=self - ) - - -_painless_operators = { - "add": (" + ", "0"), # (operator, zero-array default value) PAIR - "sum": (" + ", "0"), - "mul": (" * ", "1"), - "mult": (" * ", "1"), - "multiply": (" * ", "1") -} - - -@extend(MultiOp) -def to_es_script(self, schema): - op, unit = _painless_operators[self.op] - if self.nulls: - calc = op.join( - "((" + t.missing().to_es_script(schema).expr + ") ? " + unit + " : (" + NumberOp("number", t).partial_eval().to_es_script(schema).expr + "))" - for t in self.terms - ) - return WhenOp( - "when", - AndOp("and", [t.missing() for t in self.terms]), - **{"then": self.default, "else": EsScript(type=NUMBER, expr=calc, frum=self)} - ).partial_eval().to_es_script(schema) - else: - calc = op.join( - "(" + NumberOp("number", t).to_es_script(schema).expr + ")" - for t in self.terms - ) - return WhenOp( - "when", - OrOp("or", [t.missing() for t in self.terms]), - **{"then": self.default, "else": EsScript(type=NUMBER, expr=calc, frum=self)} - ).partial_eval().to_es_script(schema) - - -@extend(RegExpOp) -def to_esfilter(self, schema): - if isinstance(self.pattern, Literal) and isinstance(self.var, Variable): - cols = schema.leaves(self.var.var) - if len(cols) == 0: - return MATCH_NONE - elif len(cols) == 1: - return {"regexp": {cols[0].es_column: self.pattern.value}} - else: - Log.error("regex on not supported ") - else: - Log.error("regex only accepts a variable and literal pattern") - - -@extend(StringOp) -def to_es_script(self, schema): - term = FirstOp("first", self.term).partial_eval() - value = term.to_es_script(schema) - - if isinstance(value.frum, CoalesceOp): - return CoalesceOp("coalesce", [StringOp("string", t).partial_eval() for t in value.frum.terms]).to_es_script(schema) - - if value.type == BOOLEAN: - return EsScript( - miss=self.term.missing().partial_eval(), - type=STRING, - expr=value.expr + ' ? "T" : "F"', - frum=self - ) - elif value.type == INTEGER: - return EsScript( - miss=self.term.missing().partial_eval(), - type=STRING, - expr="String.valueOf(" + value.expr + ")", - frum=self - ) - elif value.type == NUMBER: - return EsScript( - miss=self.term.missing().partial_eval(), - type=STRING, - expr=expand_template(NUMBER_TO_STRING, {"expr":value.expr}), - frum=self - ) - elif value.type == STRING: - return value - else: - return EsScript( - miss=self.term.missing().partial_eval(), - type=STRING, - expr=expand_template(NUMBER_TO_STRING, {"expr":value.expr}), - frum=self - ) - - # ((Runnable)(() -> {int a=2; int b=3; System.out.println(a+b);})).run(); - # "((Runnable)((value) -> {String output=String.valueOf(value); if (output.endsWith('.0')) {return output.substring(0, output.length-2);} else return output;})).run(" + value.expr + ")" - - -@extend(TrueOp) -def to_es_script(self, schema): - return EsScript(type=BOOLEAN, expr="true", frum=self) - - -@extend(TrueOp) -def to_esfilter(self, schema): - return {"match_all": {}} - - -@extend(PrefixOp) -def to_es_script(self, schema): - if not self.field: - return "true" - else: - return "(" + self.field.to_es_script(schema) + ").startsWith(" + self.prefix.to_es_script(schema) + ")" - - -@extend(PrefixOp) -def to_esfilter(self, schema): - if not self.expr: - return {"match_all": {}} - elif isinstance(self.expr, Variable) and isinstance(self.prefix, Literal): - var = schema.leaves(self.expr.var)[0].es_column - return {"prefix": {var: self.prefix.value}} - else: - return ScriptOp("script", self.to_es_script(schema).script(schema)).to_esfilter(schema) - -@extend(SuffixOp) -def to_es_script(self, schema): - if not self.suffix: - return "true" - else: - return "(" + self.expr.to_es_script(schema) + ").endsWith(" + self.suffix.to_es_script(schema) + ")" - - -@extend(SuffixOp) -def to_esfilter(self, schema): - if not self.suffix: - return {"match_all": {}} - elif isinstance(self.expr, Variable) and isinstance(self.suffix, Literal): - var = schema.leaves(self.expr.var)[0].es_column - return {"regexp": {var: ".*"+string2regexp(self.suffix.value)}} - else: - return ScriptOp("script", self.to_es_script(schema).script(schema)).to_esfilter(schema) - - -@extend(InOp) -def to_es_script(self, schema): - superset = self.superset.to_es_script(schema) - value = self.value.to_es_script(schema) - return EsScript( - type=BOOLEAN, - expr="(" + superset.expr + ").contains(" + value.expr + ")", - frum=self - ) - - -@extend(InOp) -def to_esfilter(self, schema): - if isinstance(self.value, Variable): - var = self.value.var - cols = schema.leaves(var) - if cols: - var = cols[0].es_column - return {"terms": {var: self.superset.value}} - else: - return ScriptOp("script", self.to_es_script(schema).script(schema)).to_esfilter(schema) - - -@extend(ScriptOp) -def to_es_script(self, schema): - return EsScript(type=self.data_type, expr=self.script, frum=self) - - -@extend(ScriptOp) -def to_esfilter(self, schema): - return {"script": es_script(self.script)} - - -@extend(Variable) -def to_es_script(self, schema, many=True): - if self.var == ".": - return "_source" - else: - if self.var == "_id": - return EsScript(type=STRING, expr='doc["_uid"].value.substring(doc["_uid"].value.indexOf(\'#\')+1)', frum=self) - - columns = schema.values(self.var) - acc = [] - for c in columns: - varname = c.es_column - frum = Variable(c.es_column) - q = quote(varname) - if many: - acc.append(EsScript( - miss=frum.missing(), - type=c.jx_type, - expr="doc[" + q + "].values" if c.jx_type != BOOLEAN else "doc[" + q + "].value", - frum=frum, - many=True - )) - else: - acc.append(EsScript( - miss=frum.missing(), - type=c.jx_type, - expr="doc[" + q + "].value" if c.jx_type != BOOLEAN else "doc[" + q + "].value", - frum=frum, - many=True - )) - - if len(acc) == 0: - return NULL.to_es_script(schema) - elif len(acc) == 1: - return acc[0] - else: - return CoalesceOp("coalesce", acc).to_es_script(schema) - - -@extend(WhenOp) -def to_es_script(self, schema): - if self.simplified: - when = self.when.to_es_script(schema) - then = self.then.to_es_script(schema) - els_ = self.els_.to_es_script(schema) - - if when is TRUE: - return then - elif when is FALSE: - return els_ - elif then.miss is TRUE: - return EsScript( - miss=self.missing(), - type=els_.type, - expr=els_.expr, - frum=self - ) - elif els_.miss is TRUE: - return EsScript( - miss=self.missing(), - type=then.type, - expr=then.expr, - frum=self - ) - - elif then.type == els_.type: - return EsScript( - miss=self.missing(), - type=then.type, - expr="(" + when.expr + ") ? (" + then.expr + ") : (" + els_.expr + ")", - frum=self - ) - elif then.type in (INTEGER, NUMBER) and els_.type in (INTEGER, NUMBER): - return EsScript( - miss=self.missing(), - type=NUMBER, - expr="(" + when.expr + ") ? (" + then.expr + ") : (" + els_.expr + ")", - frum=self - ) - else: - Log.error("do not know how to handle: {{self}}", self=self.__data__()) - else: - return self.partial_eval().to_es_script(schema) - - -@extend(WhenOp) -def to_esfilter(self, schema): - output = OrOp("or", [ - AndOp("and", [self.when, BooleanOp("boolean", self.then)]), - AndOp("and", [NotOp("not", self.when), BooleanOp("boolean", self.els_)]) - ]).partial_eval() - - return output.to_esfilter(schema) - - -@extend(BasicIndexOfOp) -def to_es_script(self, schema): - v = StringOp("string", self.value).to_es_script(schema).expr - find = StringOp("string", self.find).to_es_script(schema).expr - start = IntegerOp("integer", self.start).to_es_script(schema).expr - - return EsScript( - miss=FALSE, - type=INTEGER, - expr="(" + v + ").indexOf(" + find + ", " + start + ")", - frum=self - ) - - -@extend(BasicIndexOfOp) -def to_esfilter(self, schema): - return ScriptOp("", self.to_es_script(schema).script(schema)).to_esfilter(schema) - - -@extend(BasicSubstringOp) -def to_es_script(self, schema): - v = StringOp("string", self.value).partial_eval().to_es_script(schema).expr - start = IntegerOp("string", self.start).partial_eval().to_es_script(schema).expr - end = IntegerOp("integer", self.end).partial_eval().to_es_script(schema).expr - - return EsScript( - miss=FALSE, - type=STRING, - expr="(" + v + ").substring(" + start + ", " + end + ")", - frum=self - ) - - - -MATCH_ALL = wrap({"match_all": {}}) -MATCH_NONE = es_not({"match_all": {}}) - - -def simplify_esfilter(esfilter): - try: - output = wrap(_normalize(wrap(esfilter))) - output.isNormal = None - return output - except Exception as e: - from mo_logs import Log - - Log.unexpected("programmer error", cause=e) - - -def _normalize(esfilter): - """ - TODO: DO NOT USE Data, WE ARE SPENDING TOO MUCH TIME WRAPPING/UNWRAPPING - REALLY, WE JUST COLLAPSE CASCADING `and` AND `or` FILTERS - """ - if esfilter == MATCH_ALL or esfilter == MATCH_NONE or esfilter.isNormal: - return esfilter - - # Log.note("from: " + convert.value2json(esfilter)) - isDiff = True - - while isDiff: - isDiff = False - - if esfilter.bool.filter: - terms = esfilter.bool.filter - for (i0, t0), (i1, t1) in itertools.product(enumerate(terms), enumerate(terms)): - if i0 == i1: - continue # SAME, IGNORE - # TERM FILTER ALREADY ASSUMES EXISTENCE - with suppress_exception: - if t0.exists.field != None and t0.exists.field == t1.term.items()[0][0]: - terms[i0] = MATCH_ALL - continue - - # IDENTICAL CAN BE REMOVED - with suppress_exception: - if t0 == t1: - terms[i0] = MATCH_ALL - continue - - # MERGE range FILTER WITH SAME FIELD - if i0 > i1: - continue # SAME, IGNORE - with suppress_exception: - f0, tt0 = t0.range.items()[0] - f1, tt1 = t1.range.items()[0] - if f0 == f1: - set_default(terms[i0].range[literal_field(f1)], tt1) - terms[i1] = MATCH_ALL - - output = [] - for a in terms: - if isinstance(a, (list, set)): - from mo_logs import Log - - Log.error("and clause is not allowed a list inside a list") - a_ = _normalize(a) - if a_ is not a: - isDiff = True - a = a_ - if a == MATCH_ALL: - isDiff = True - continue - if a == MATCH_NONE: - return MATCH_NONE - if a.bool.filter: - isDiff = True - a.isNormal = None - output.extend(a.bool.filter) - else: - a.isNormal = None - output.append(a) - if not output: - return MATCH_ALL - elif len(output) == 1: - # output[0].isNormal = True - esfilter = output[0] - break - elif isDiff: - esfilter = es_and(output) - continue - - if esfilter.bool.should: - output = [] - for a in esfilter.bool.should: - a_ = _normalize(a) - if a_ is not a: - isDiff = True - a = a_ - - if a.bool.should: - a.isNormal = None - isDiff = True - output.extend(a.bool.should) - else: - a.isNormal = None - output.append(a) - if not output: - return MATCH_NONE - elif len(output) == 1: - esfilter = output[0] - break - elif isDiff: - esfilter = wrap({"bool": {"should": output}}) - continue - - if esfilter.term != None: - if esfilter.term.keys(): - esfilter.isNormal = True - return esfilter - else: - return MATCH_ALL - - if esfilter.terms: - for k, v in esfilter.terms.items(): - if len(v) > 0: - if OR(vv == None for vv in v): - rest = [vv for vv in v if vv != None] - if len(rest) > 0: - output = es_or([ - es_missing(k), - {"terms": {k: rest}} - ]) - else: - output = es_missing(k) - output.isNormal = True - return output - else: - esfilter.isNormal = True - return esfilter - return MATCH_NONE - - if esfilter.bool.must_not: - _sub = esfilter.bool.must_not - sub = _normalize(_sub) - if sub == MATCH_NONE: - return MATCH_ALL - elif sub == MATCH_ALL: - return MATCH_NONE - elif sub is not _sub: - sub.isNormal = None - return wrap({"bool": {"must_not": sub, "isNormal": True}}) - else: - sub.isNormal = None - - esfilter.isNormal = True - return esfilter - - -def split_expression_by_depth(where, schema, output=None, var_to_depth=None): - """ - :param where: EXPRESSION TO INSPECT - :param schema: THE SCHEMA - :param output: - :param var_to_depth: MAP FROM EACH VARIABLE NAME TO THE DEPTH - :return: - """ - """ - It is unfortunate that ES can not handle expressions that - span nested indexes. This will split your where clause - returning {"and": [filter_depth0, filter_depth1, ...]} - """ - vars_ = where.vars() - - if var_to_depth is None: - if not vars_: - return Null - # MAP VARIABLE NAMES TO HOW DEEP THEY ARE - var_to_depth = {v.var: max(len(c.nested_path) - 1, 0) for v in vars_ for c in schema[v.var]} - all_depths = set(var_to_depth.values()) - # if -1 in all_depths: - # Log.error( - # "Can not find column with name {{column|quote}}", - # column=unwraplist([k for k, v in var_to_depth.items() if v == -1]) - # ) - if len(all_depths) == 0: - all_depths = {0} - output = wrap([[] for _ in range(MAX(all_depths) + 1)]) - else: - all_depths = set(var_to_depth[v.var] for v in vars_) - - if len(all_depths) == 1: - output[list(all_depths)[0]] += [where] - elif isinstance(where, AndOp): - for a in where.terms: - split_expression_by_depth(a, schema, output, var_to_depth) - else: - Log.error("Can not handle complex where clause") - - return output - - -def get_type(var_name): - type_ = var_name.split(".$")[1:] - if not type_: - return "j" - return json_type_to_es_script_type.get(type_[0], "j") - - -json_type_to_es_script_type = { - "string": "s", - "boolean": "b", - "number": "n" -} diff --git a/vendor/jx_elasticsearch/es52/expressions/__init__.py b/vendor/jx_elasticsearch/es52/expressions/__init__.py new file mode 100644 index 0000000..79de423 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/__init__.py @@ -0,0 +1,35 @@ +from jx_elasticsearch.es52.expressions._utils import ES52, split_expression_by_path, split_expression_by_depth +from jx_elasticsearch.es52.expressions.and_op import AndOp, es_and +from jx_elasticsearch.es52.expressions.basic_eq_op import BasicEqOp +from jx_elasticsearch.es52.expressions.basic_starts_with_op import BasicStartsWithOp +from jx_elasticsearch.es52.expressions.boolean_op import BooleanOp +from jx_elasticsearch.es52.expressions.case_op import CaseOp +from jx_elasticsearch.es52.expressions.coalesce_op import CoalesceOp +from jx_elasticsearch.es52.expressions.concat_op import ConcatOp +from jx_elasticsearch.es52.expressions.div_op import DivOp +from jx_elasticsearch.es52.expressions.eq_op import EqOp +from jx_elasticsearch.es52.expressions.es_nested_op import EsNestedOp +from jx_elasticsearch.es52.expressions.exists_op import ExistsOp +from jx_elasticsearch.es52.expressions.false_op import FalseOp, MATCH_NONE +from jx_elasticsearch.es52.expressions.find_op import FindOp +from jx_elasticsearch.es52.expressions.gt_op import GtOp +from jx_elasticsearch.es52.expressions.gte_op import GteOp +from jx_elasticsearch.es52.expressions.in_op import InOp +from jx_elasticsearch.es52.expressions.length_op import LengthOp +from jx_elasticsearch.es52.expressions.literal import Literal +from jx_elasticsearch.es52.expressions.lt_op import LtOp +from jx_elasticsearch.es52.expressions.lte_op import LteOp +from jx_elasticsearch.es52.expressions.missing_op import MissingOp +from jx_elasticsearch.es52.expressions.ne_op import NeOp +from jx_elasticsearch.es52.expressions.ne_op import NeOp +from jx_elasticsearch.es52.expressions.not_op import NotOp, es_not +from jx_elasticsearch.es52.expressions.or_op import OrOp, es_or +from jx_elasticsearch.es52.expressions.prefix_op import PrefixOp +from jx_elasticsearch.es52.expressions.reg_exp_op import RegExpOp +from jx_elasticsearch.es52.expressions.script_op import ScriptOp +from jx_elasticsearch.es52.expressions.suffix_op import SuffixOp +from jx_elasticsearch.es52.expressions.true_op import TrueOp, MATCH_ALL +from jx_elasticsearch.es52.expressions.variable import Variable +from jx_elasticsearch.es52.expressions.when_op import WhenOp + +ES52.register_ops(vars()) diff --git a/vendor/jx_elasticsearch/es52/expressions/_utils.py b/vendor/jx_elasticsearch/es52/expressions/_utils.py new file mode 100644 index 0000000..6d80cc5 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/_utils.py @@ -0,0 +1,131 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ( + AndOp as AndOp_, + FALSE, + Variable as Variable_, +) +from jx_base.expressions.literal import is_literal +from jx_base.language import Language, is_op +from jx_elasticsearch.es52.painless import Painless +from jx_elasticsearch.es52.painless.es_script import es_script +from mo_dots import Null, wrap +from mo_future import first +from mo_logs import Log +from mo_math import MAX + +MATCH_NONE, MATCH_ALL, Painlesss = [None] * 3 + + +def _inequality_to_esfilter(self, schema): + if is_op(self.lhs, Variable_) and is_literal(self.rhs): + cols = schema.leaves(self.lhs.var) + if not cols: + lhs = self.lhs.var # HAPPENS DURING DEBUGGING, AND MAYBE IN REAL LIFE TOO + elif len(cols) == 1: + lhs = first(cols).es_column + else: + raise Log.error("operator {{op|quote}} does not work on objects", op=self.op) + return {"range": {lhs: {self.op: self.rhs.value}}} + else: + script = Painless[self].to_es_script(schema) + if script.miss is not FALSE: + Log.error("inequality must be decisive") + return {"script": es_script(script.expr)} + + +def split_expression_by_depth(where, schema, output=None, var_to_depth=None): + """ + :param where: EXPRESSION TO INSPECT + :param schema: THE SCHEMA + :param output: + :param var_to_depth: MAP FROM EACH VARIABLE NAME TO THE DEPTH + :return: + """ + """ + It is unfortunate that ES can not handle expressions that + span nested indexes. This will split your where clause + returning {"and": [filter_depth0, filter_depth1, ...]} + """ + vars_ = where.vars() + + if var_to_depth is None: + if not vars_: + return Null + # MAP VARIABLE NAMES TO HOW DEEP THEY ARE + var_to_depth = { + v.var: max(len(c.nested_path) - 1, 0) for v in vars_ for c in schema[v.var] + } + all_depths = set(var_to_depth.values()) + if len(all_depths) == 0: + all_depths = {0} + output = wrap([[] for _ in range(MAX(all_depths) + 1)]) + else: + all_depths = set(var_to_depth[v.var] for v in vars_) + + if len(all_depths) == 1: + output[first(all_depths)] += [where] + elif is_op(where, AndOp_): + for a in where.terms: + split_expression_by_depth(a, schema, output, var_to_depth) + else: + Log.error("Can not handle complex where clause") + + return output + + +def split_expression_by_path( + expr, schema, output=None, var_to_columns=None, lang=Language +): + """ + :param expr: EXPRESSION TO INSPECT + :param schema: THE SCHEMA + :param output: THE MAP FROM PATH TO EXPRESSION WE WANT UPDATED + :param var_to_columns: MAP FROM EACH VARIABLE NAME TO THE DEPTH + :return: output: A MAP FROM PATH TO EXPRESSION + """ + where_vars = expr.vars() + if var_to_columns is None: + var_to_columns = {v.var: schema.leaves(v.var) for v in where_vars} + output = {schema.query_path[0]: []} + if not var_to_columns: + output.setdefault(".", []).append( + expr + ) # LEGIT EXPRESSIONS OF ZERO VARIABLES + return output + + all_paths = set(c.nested_path[0] for v in where_vars for c in var_to_columns[v.var]) + + if len(all_paths) == 0: + output.setdefault(".", []).append(expr) + elif len(all_paths) == 1: + output.setdefault(first(all_paths), []).append(expr) + elif is_op(expr, AndOp_): + for w in expr.terms: + split_expression_by_path(w, schema, output, var_to_columns, lang=lang) + else: + Log.error("Can not handle complex expression clause") + + return output + + +def get_type(var_name): + type_ = var_name.split(".$")[1:] + if not type_: + return "j" + return json_type_to_es_script_type.get(type_[0], "j") + + +json_type_to_es_script_type = {"string": "s", "boolean": "b", "number": "n"} + + +ES52 = Language("ES52") diff --git a/vendor/jx_elasticsearch/es52/expressions/and_op.py b/vendor/jx_elasticsearch/es52/expressions/and_op.py new file mode 100644 index 0000000..7bc9217 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/and_op.py @@ -0,0 +1,28 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_elasticsearch.es52.expressions.true_op import MATCH_ALL +from mo_dots import wrap + +from jx_base.expressions import AndOp as AndOp_ +from jx_elasticsearch.es52.expressions._utils import ES52 + + +class AndOp(AndOp_): + def to_esfilter(self, schema): + if not len(self.terms): + return MATCH_ALL + else: + return es_and([ES52[t].to_esfilter(schema) for t in self.terms]) + + +def es_and(terms): + return wrap({"bool": {"filter": terms}}) diff --git a/vendor/jx_elasticsearch/es52/expressions/basic_eq_op.py b/vendor/jx_elasticsearch/es52/expressions/basic_eq_op.py new file mode 100644 index 0000000..77f7827 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/basic_eq_op.py @@ -0,0 +1,39 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ( + BasicEqOp as BasicEqOp_, + Variable as Variable_, + is_literal, +) +from jx_base.language import is_op +from jx_elasticsearch.es52.painless import Painless +from mo_dots import is_many +from mo_future import first + + +class BasicEqOp(BasicEqOp_): + def to_esfilter(self, schema): + if is_op(self.lhs, Variable_) and is_literal(self.rhs): + lhs = self.lhs.var + cols = schema.leaves(lhs) + if cols: + lhs = first(cols).es_column + rhs = self.rhs.value + if is_many(rhs): + if len(rhs) == 1: + return {"term": {lhs: first(rhs)}} + else: + return {"terms": {lhs: rhs}} + else: + return {"term": {lhs: rhs}} + else: + return Painless[self].to_es_script(schema).to_esfilter(schema) diff --git a/vendor/jx_elasticsearch/es52/expressions/basic_starts_with_op.py b/vendor/jx_elasticsearch/es52/expressions/basic_starts_with_op.py new file mode 100644 index 0000000..ea420d2 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/basic_starts_with_op.py @@ -0,0 +1,36 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ( + BasicStartsWithOp as BasicStartsWithOp_, + Variable as Variable_, + is_literal, +) +from jx_base.language import is_op +from jx_elasticsearch.es52.expressions.false_op import MATCH_NONE +from jx_elasticsearch.es52.expressions.true_op import MATCH_ALL +from jx_elasticsearch.es52.painless import false_script +from mo_future import first +from jx_elasticsearch.es52.painless import BasicStartsWithOp as PainlessBasicStartsWithOp + + +class BasicStartsWithOp(BasicStartsWithOp_): + def to_esfilter(self, schema): + if not self.value: + return MATCH_ALL + elif is_op(self.value, Variable_) and is_literal(self.prefix): + var = first(schema.leaves(self.value.var)).es_column + return {"prefix": {var: self.prefix.value}} + else: + output = PainlessBasicStartsWithOp.to_es_script(self, schema) + if output is false_script: + return MATCH_NONE + return output diff --git a/vendor/jx_elasticsearch/es52/expressions/boolean_op.py b/vendor/jx_elasticsearch/es52/expressions/boolean_op.py new file mode 100644 index 0000000..e05fef0 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/boolean_op.py @@ -0,0 +1,31 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import BooleanOp as BooleanOp_, Variable as Variable_ +from jx_base.language import is_op +from jx_elasticsearch.es52.expressions.exists_op import es_exists +from jx_elasticsearch.es52.painless import Painless +from jx_elasticsearch.es52.expressions._utils import ES52 +from jx_elasticsearch.es52.expressions import find_op +from jx_elasticsearch.es52.expressions.find_op import FindOp + + +class BooleanOp(BooleanOp_): + def to_esfilter(self, schema): + if is_op(self.term, Variable_): + return es_exists(self.term.var) + elif is_op(self.term, FindOp): + return ES52[self.term].to_esfilter(schema) + else: + return Painless[self].to_es_script(schema).to_esfilter(schema) + + +find_op.BooleanOp = BooleanOp diff --git a/vendor/jx_elasticsearch/es52/expressions/case_op.py b/vendor/jx_elasticsearch/es52/expressions/case_op.py new file mode 100644 index 0000000..d59ca9f --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/case_op.py @@ -0,0 +1,31 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import CaseOp as CaseOp_ +from jx_elasticsearch.es52.expressions.and_op import AndOp +from jx_elasticsearch.es52.expressions.or_op import OrOp +from mo_json import BOOLEAN +from mo_logs import Log + + +class CaseOp(CaseOp_): + def to_esfilter(self, schema): + if self.type == BOOLEAN: + return ( + OrOp( + [AndOp([w.when, w.then]) for w in self.whens[:-1]] + self.whens[-1:] + ) + .partial_eval() + .to_esfilter(schema) + ) + else: + Log.error("do not know how to handle") + return self.to_es_script(schema).script(schema).to_esfilter(schema) diff --git a/vendor/jx_elasticsearch/es52/expressions/coalesce_op.py b/vendor/jx_elasticsearch/es52/expressions/coalesce_op.py new file mode 100644 index 0000000..d004424 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/coalesce_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import CoalesceOp as CoalesceOp_ + + +class CoalesceOp(CoalesceOp_): + def to_esfilter(self, schema): + return {"bool": {"should": [{"exists": {"field": v}} for v in self.terms]}} diff --git a/vendor/jx_elasticsearch/es52/expressions/concat_op.py b/vendor/jx_elasticsearch/es52/expressions/concat_op.py new file mode 100644 index 0000000..d7f8535 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/concat_op.py @@ -0,0 +1,24 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ConcatOp as ConcatOp_, Variable as Variable_, is_literal +from jx_base.language import is_op +from pyLibrary.convert import string2regexp + + +class ConcatOp(ConcatOp_): + def to_esfilter(self, schema): + if is_op(self.value, Variable_) and is_literal(self.find): + return { + "regexp": {self.value.var: ".*" + string2regexp(self.find.value) + ".*"} + } + else: + return self.to_es_script(schema).script(schema).to_esfilter(schema) diff --git a/vendor/jx_elasticsearch/es52/expressions/div_op.py b/vendor/jx_elasticsearch/es52/expressions/div_op.py new file mode 100644 index 0000000..bdf9610 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/div_op.py @@ -0,0 +1,18 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import DivOp as DivOp_ +from jx_elasticsearch.es52.expressions.not_op import NotOp + + +class DivOp(DivOp_): + def to_esfilter(self, schema): + return NotOp(self.missing()).partial_eval().to_esfilter(schema) diff --git a/vendor/jx_elasticsearch/es52/expressions/eq_op.py b/vendor/jx_elasticsearch/es52/expressions/eq_op.py new file mode 100644 index 0000000..9b5b29c --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/eq_op.py @@ -0,0 +1,105 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ( + EqOp as EqOp_, + FALSE, + TRUE, + Variable as Variable_, + is_literal, + simplified, +) +from jx_base.language import is_op +from jx_elasticsearch.es52.expressions import BasicEqOp +from jx_elasticsearch.es52.expressions._utils import ES52 +from jx_elasticsearch.es52.expressions.case_op import CaseOp +from jx_elasticsearch.es52.expressions.or_op import OrOp +from jx_elasticsearch.es52.expressions.when_op import WhenOp +from jx_elasticsearch.es52.util import pull_functions +from jx_python.jx import value_compare +from mo_dots import Data, is_container +from mo_future import first +from mo_json import BOOLEAN, python_type_to_json_type, NUMBER_TYPES +from mo_logs import Log + + +class EqOp(EqOp_): + @simplified + def partial_eval(self): + lhs = ES52[self.lhs].partial_eval() + rhs = ES52[self.rhs].partial_eval() + + if is_literal(lhs): + if is_literal(rhs): + return FALSE if value_compare(lhs.value, rhs.value) else TRUE + else: + return EqOp([rhs, lhs]) # FLIP SO WE CAN USE TERMS FILTER + + return EqOp([lhs, rhs]) + + def to_esfilter(self, schema): + if is_op(self.lhs, Variable_) and is_literal(self.rhs): + rhs = self.rhs.value + lhs = self.lhs.var + cols = schema.leaves(lhs) + if not cols: + Log.warning( + "{{col}} does not exist while processing {{expr}}", + col=lhs, + expr=self.__data__(), + ) + + if is_container(rhs): + if len(rhs) == 1: + rhs = rhs[0] + else: + types = Data() # MAP JSON TYPE TO LIST OF LITERALS + for r in rhs: + types[python_type_to_json_type[r.__class__]] += [r] + if len(types) == 1: + jx_type, values = first(types.items()) + for c in cols: + if jx_type == c.jx_type or (jx_type in NUMBER_TYPES and c.jx_type in NUMBER_TYPES): + return {"terms": {c.es_column: values}} + return FALSE.to_esfilter(schema) + else: + return ( + OrOp( + [ + EqOp([self.lhs, values]) + for t, values in types.items() + ] + ) + .partial_eval() + .to_esfilter(schema) + ) + + for c in cols: + if c.jx_type == BOOLEAN: + rhs = pull_functions[c.jx_type](rhs) + rhs_type = python_type_to_json_type[rhs.__class__] + if rhs_type == c.jx_type or (rhs_type in NUMBER_TYPES and c.jx_type in NUMBER_TYPES): + return {"term": {c.es_column: rhs}} + return FALSE.to_esfilter(schema) + else: + return ( + ES52[ + CaseOp( + [ + WhenOp(self.lhs.missing(), **{"then": self.rhs.missing()}), + WhenOp(self.rhs.missing(), **{"then": FALSE}), + BasicEqOp([self.lhs, self.rhs]), + ] + ) + .partial_eval() + ] + .to_esfilter(schema) + ) diff --git a/vendor/jx_elasticsearch/es52/expressions/es_nested_op.py b/vendor/jx_elasticsearch/es52/expressions/es_nested_op.py new file mode 100644 index 0000000..db69079 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/es_nested_op.py @@ -0,0 +1,25 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import EsNestedOp as EsNestedOp_ + + +class EsNestedOp(EsNestedOp_): + def to_esfilter(self, schema): + if self.path.var == ".": + return {"query": self.query.to_esfilter(schema)} + else: + return { + "nested": { + "path": self.path.var, + "query": self.query.to_esfilter(schema), + } + } diff --git a/vendor/jx_elasticsearch/es52/expressions/exists_op.py b/vendor/jx_elasticsearch/es52/expressions/exists_op.py new file mode 100644 index 0000000..86c3719 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/exists_op.py @@ -0,0 +1,21 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ExistsOp as ExistsOp_ + + +class ExistsOp(ExistsOp_): + def to_esfilter(self, schema): + return self.field.exists().partial_eval().to_esfilter(schema) + + +def es_exists(term): + return {"exists": {"field": term}} diff --git a/vendor/jx_elasticsearch/es52/expressions/false_op.py b/vendor/jx_elasticsearch/es52/expressions/false_op.py new file mode 100644 index 0000000..b8a8ce5 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/false_op.py @@ -0,0 +1,21 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import FalseOp, extend + + +@extend(FalseOp) +def to_esfilter(self, schema): + return MATCH_NONE + + +MATCH_NONE = {"bool": {"must_not": {"match_all": {}}}} diff --git a/vendor/jx_elasticsearch/es52/expressions/find_op.py b/vendor/jx_elasticsearch/es52/expressions/find_op.py new file mode 100644 index 0000000..1729b07 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/find_op.py @@ -0,0 +1,62 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +import re + +from jx_base.expressions import ( + FindOp as FindOp_, + NULL, + Variable as Variable_, + is_literal, + simplified, +) +from jx_base.language import is_op +from jx_elasticsearch.es52.painless import Painless +from jx_elasticsearch.es52.expressions._utils import ES52 +from jx_elasticsearch.es52.expressions.not_op import NotOp +from mo_json import STRING + +BooleanOp = None + +class FindOp(FindOp_): + def to_esfilter(self, schema): + if ( + is_op(self.value, Variable_) + and is_literal(self.find) + and self.default is NULL + and is_literal(self.start) + and self.start.value == 0 + ): + columns = [c for c in schema.leaves(self.value.var) if c.jx_type == STRING] + if len(columns) == 1: + return { + "regexp": { + columns[0].es_column: ".*" + re.escape(self.find.value) + ".*" + } + } + # CONVERT TO SCRIPT, SIMPLIFY, AND THEN BACK TO FILTER + self.simplified = False + return ES52[Painless[self].partial_eval()].to_esfilter(schema) + + @simplified + def partial_eval(self): + value = self.value.partial_eval() + find = self.find.partial_eval() + default = self.default.partial_eval() + start = self.start.partial_eval() + + return FindOp([value, find], default=default, start=start) + + def missing(self): + return NotOp(self) + + def exists(self): + return BooleanOp(self) diff --git a/vendor/jx_elasticsearch/es52/expressions/gt_op.py b/vendor/jx_elasticsearch/es52/expressions/gt_op.py new file mode 100644 index 0000000..2a12da7 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/gt_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import GtOp as GtOp_ +from jx_elasticsearch.es52.expressions._utils import _inequality_to_esfilter + + +class GtOp(GtOp_): + to_esfilter = _inequality_to_esfilter diff --git a/vendor/jx_elasticsearch/es52/expressions/gte_op.py b/vendor/jx_elasticsearch/es52/expressions/gte_op.py new file mode 100644 index 0000000..2521ccf --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/gte_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import GteOp as GteOp_ +from jx_elasticsearch.es52.expressions._utils import _inequality_to_esfilter + + +class GteOp(GteOp_): + to_esfilter = _inequality_to_esfilter diff --git a/vendor/jx_elasticsearch/es52/expressions/in_op.py b/vendor/jx_elasticsearch/es52/expressions/in_op.py new file mode 100644 index 0000000..45cb192 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/in_op.py @@ -0,0 +1,57 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ( + InOp as InOp_, + TupleOp, + Variable as Variable_, + is_literal, +) +from jx_base.language import is_op +from jx_elasticsearch.es52.expressions.false_op import MATCH_NONE +from jx_elasticsearch.es52.painless import Painless +from jx_elasticsearch.es52.expressions.eq_op import EqOp +from jx_elasticsearch.es52.expressions.or_op import OrOp +from mo_dots import is_many +from mo_future import first +from mo_json import BOOLEAN +from pyLibrary.convert import value2boolean + + +class InOp(InOp_): + def to_esfilter(self, schema): + if is_op(self.value, Variable_): + var = self.value.var + cols = schema.leaves(var) + if not cols: + return MATCH_NONE + col = first(cols) + var = col.es_column + + if is_literal(self.superset): + if col.jx_type == BOOLEAN: + if is_literal(self.superset) and not is_many(self.superset.value): + return {"term": {var: value2boolean(self.superset.value)}} + else: + return {"terms": {var: list(map(value2boolean, self.superset.value))}} + else: + if is_literal(self.superset) and not is_many(self.superset.value): + return {"term": {var: self.superset.value}} + else: + return {"terms": {var: self.superset.value}} + elif is_op(self.superset, TupleOp): + return ( + OrOp([EqOp([self.value, s]) for s in self.superset.terms]) + .partial_eval() + .to_esfilter(schema) + ) + # THE HARD WAY + return Painless[self].to_es_script(schema).to_esfilter(schema) diff --git a/vendor/jx_elasticsearch/es52/expressions/length_op.py b/vendor/jx_elasticsearch/es52/expressions/length_op.py new file mode 100644 index 0000000..048a81e --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/length_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import LengthOp as LengthOp_ + + +class LengthOp(LengthOp_): + def to_esfilter(self, schema): + return {"regexp": {self.var.var: self.pattern.value}} diff --git a/vendor/jx_elasticsearch/es52/expressions/literal.py b/vendor/jx_elasticsearch/es52/expressions/literal.py new file mode 100644 index 0000000..bf07536 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/literal.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import Literal as Literal_ + + +class Literal(Literal_): + def to_esfilter(self, schema): + return self.json diff --git a/vendor/jx_elasticsearch/es52/expressions/lt_op.py b/vendor/jx_elasticsearch/es52/expressions/lt_op.py new file mode 100644 index 0000000..0b89f15 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/lt_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import LtOp as LtOp_ +from jx_elasticsearch.es52.expressions._utils import _inequality_to_esfilter + + +class LtOp(LtOp_): + to_esfilter = _inequality_to_esfilter diff --git a/vendor/jx_elasticsearch/es52/expressions/lte_op.py b/vendor/jx_elasticsearch/es52/expressions/lte_op.py new file mode 100644 index 0000000..baedc62 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/lte_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import LteOp as LteOp_ +from jx_elasticsearch.es52.expressions._utils import _inequality_to_esfilter + + +class LteOp(LteOp_): + to_esfilter = _inequality_to_esfilter diff --git a/vendor/jx_elasticsearch/es52/expressions/missing_op.py b/vendor/jx_elasticsearch/es52/expressions/missing_op.py new file mode 100644 index 0000000..6c30eef --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/missing_op.py @@ -0,0 +1,35 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import MissingOp as MissingOp_, Variable as Variable_ +from jx_base.language import is_op +from jx_elasticsearch.es52.expressions.and_op import es_and +from jx_elasticsearch.es52.expressions.true_op import MATCH_ALL +from jx_elasticsearch.es52.painless import MissingOp as PainlessMissingOp +from mo_future import first + + +class MissingOp(MissingOp_): + def to_esfilter(self, schema): + if is_op(self.expr, Variable_): + cols = schema.leaves(self.expr.var) + if not cols: + return MATCH_ALL + elif len(cols) == 1: + return es_missing(first(cols).es_column) + else: + return es_and([es_missing(c.es_column) for c in cols]) + else: + return PainlessMissingOp.to_es_script(self, schema).to_esfilter(schema) + +def es_missing(term): + return {"bool": {"must_not": {"exists": {"field": term}}}} + diff --git a/vendor/jx_elasticsearch/es52/expressions/ne_op.py b/vendor/jx_elasticsearch/es52/expressions/ne_op.py new file mode 100644 index 0000000..747b108 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/ne_op.py @@ -0,0 +1,71 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import NeOp as NeOp_, Variable as Variable_, is_literal +from jx_base.language import is_op +from jx_elasticsearch.es52.expressions.not_op import es_not +from jx_elasticsearch.es52.expressions.script_op import ScriptOp +from jx_elasticsearch.es52.expressions.true_op import MATCH_ALL +from mo_future import first +from mo_logs import Log + + +class NeOp(NeOp_): + def to_esfilter(self, schema): + if is_op(self.lhs, Variable_) and is_literal(self.rhs): + columns = schema.values(self.lhs.var) + if len(columns) == 0: + return MATCH_ALL + elif len(columns) == 1: + return es_not({"term": {first(columns).es_column: self.rhs.value}}) + else: + Log.error("column split to multiple, not handled") + else: + lhs = self.lhs.partial_eval().to_es_script(schema) + rhs = self.rhs.partial_eval().to_es_script(schema) + + if lhs.many: + if rhs.many: + return es_not( + ScriptOp( + ( + "(" + + lhs.expr + + ").size()==(" + + rhs.expr + + ").size() && " + + "(" + + rhs.expr + + ").containsAll(" + + lhs.expr + + ")" + ) + ).to_esfilter(schema) + ) + else: + return es_not( + ScriptOp( + "(" + lhs.expr + ").contains(" + rhs.expr + ")" + ).to_esfilter(schema) + ) + else: + if rhs.many: + return es_not( + ScriptOp( + "(" + rhs.expr + ").contains(" + lhs.expr + ")" + ).to_esfilter(schema) + ) + else: + return es_not( + ScriptOp( + "(" + lhs.expr + ") != (" + rhs.expr + ")" + ).to_esfilter(schema) + ) diff --git a/vendor/jx_elasticsearch/es52/expressions/not_op.py b/vendor/jx_elasticsearch/es52/expressions/not_op.py new file mode 100644 index 0000000..fdb1a47 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/not_op.py @@ -0,0 +1,51 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_elasticsearch.es52.expressions import literal, or_op +from jx_elasticsearch.es52.expressions.false_op import MATCH_NONE +from mo_dots import wrap + +from jx_base.expressions import ( + MissingOp as MissingOp_, + NotOp as NotOp_, + Variable as Variable_, +) +from jx_base.language import is_op +from jx_elasticsearch.es52.expressions._utils import ES52 +from jx_elasticsearch.es52.expressions.or_op import es_or +from mo_future import first +from mo_json import NESTED, OBJECT + + +class NotOp(NotOp_): + def to_esfilter(self, schema): + if is_op(self.term, MissingOp_) and is_op(self.term.expr, Variable_): + # PREVENT RECURSIVE LOOP + v = self.term.expr.var + cols = schema.values(v, (OBJECT, NESTED)) + if len(cols) == 0: + return MATCH_NONE + elif len(cols) == 1: + return {"exists": {"field": first(cols).es_column}} + else: + return es_or([{"exists": {"field": c.es_column}} for c in cols]) + else: + operand = ES52[self.term].to_esfilter(schema) + return es_not(operand) + + +def es_not(term): + return wrap({"bool": {"must_not": term}}) + + +literal.es_not = es_not +or_op.es_not = es_not +or_op.NotOp = NotOp diff --git a/vendor/jx_elasticsearch/es52/expressions/or_op.py b/vendor/jx_elasticsearch/es52/expressions/or_op.py new file mode 100644 index 0000000..6eef073 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/or_op.py @@ -0,0 +1,44 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import OrOp as OrOp_ +from jx_elasticsearch.es52.expressions._utils import ES52 +from jx_elasticsearch.es52.expressions.and_op import es_and +from mo_dots import wrap + +NotOp, es_not = [None] * 2 + + +class OrOp(OrOp_): + def to_esfilter(self, schema): + + if schema.snowflake.namespace.es_cluster.version.startswith("5."): + # VERSION 5.2.x + # WE REQUIRE EXIT-EARLY SEMANTICS, OTHERWISE EVERY EXPRESSION IS A SCRIPT EXPRESSION + # {"bool":{"should" :[a, b, c]}} RUNS IN PARALLEL + # {"bool":{"must_not":[a, b, c]}} ALSO RUNS IN PARALLEL + + # OR(x) == NOT(AND(NOT(xi) for xi in x)) + output = es_not( + es_and( + [NotOp(t).partial_eval().to_esfilter(schema) for t in self.terms] + ) + ) + return output + else: + # VERSION 6.2+ + return es_or( + [ES52[t].partial_eval().to_esfilter(schema) for t in self.terms] + ) + + +def es_or(terms): + return wrap({"bool": {"should": terms}}) diff --git a/vendor/jx_elasticsearch/es52/expressions/prefix_op.py b/vendor/jx_elasticsearch/es52/expressions/prefix_op.py new file mode 100644 index 0000000..76e8066 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/prefix_op.py @@ -0,0 +1,62 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ( + FALSE, + NULL, + PrefixOp as PrefixOp_, + StringOp as StringOp_, + TRUE, + Variable as Variable_, + is_literal, + simplified, +) +from jx_base.language import is_op +from jx_elasticsearch.es52.expressions.false_op import MATCH_NONE +from jx_elasticsearch.es52.expressions.true_op import MATCH_ALL +from jx_elasticsearch.es52.painless import StringOp as PainlessStringOp, PrefixOp as PainlessPrefixOp +from mo_future import first + + +class PrefixOp(PrefixOp_): + @simplified + def partial_eval(self): + expr = PainlessStringOp(self.expr).partial_eval() + prefix = PainlessStringOp(self.prefix).partial_eval() + + if prefix is NULL: + return TRUE + if expr is NULL: + return FALSE + + return PrefixOp([expr, prefix]) + + def to_esfilter(self, schema): + if is_literal(self.prefix) and not self.prefix.value: + return MATCH_ALL + + expr = self.expr + + if expr is NULL: + return MATCH_NONE + elif not expr: + return MATCH_ALL + + if is_op(expr, StringOp_): + expr = expr.term + + if is_op(expr, Variable_) and is_literal(self.prefix): + col = first(schema.leaves(expr.var)) + if not col: + return MATCH_NONE + return {"prefix": {col.es_column: self.prefix.value}} + else: + return PainlessPrefixOp.to_es_script(self, schema).to_esfilter(schema) diff --git a/vendor/jx_elasticsearch/es52/expressions/reg_exp_op.py b/vendor/jx_elasticsearch/es52/expressions/reg_exp_op.py new file mode 100644 index 0000000..21e9c75 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/reg_exp_op.py @@ -0,0 +1,30 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import RegExpOp as RegExpOp_, Variable as Variable_, is_literal +from jx_base.language import is_op +from jx_elasticsearch.es52.expressions.false_op import MATCH_NONE +from mo_future import first +from mo_logs import Log + + +class RegExpOp(RegExpOp_): + def to_esfilter(self, schema): + if is_literal(self.pattern) and is_op(self.var, Variable_): + cols = schema.leaves(self.var.var) + if len(cols) == 0: + return MATCH_NONE + elif len(cols) == 1: + return {"regexp": {first(cols).es_column: self.pattern.value}} + else: + Log.error("regex on not supported ") + else: + Log.error("regex only accepts a variable and literal pattern") diff --git a/vendor/jx_elasticsearch/es52/expressions/script_op.py b/vendor/jx_elasticsearch/es52/expressions/script_op.py new file mode 100644 index 0000000..6fcb0e0 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/script_op.py @@ -0,0 +1,18 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ScriptOp as ScriptOp_ +from jx_elasticsearch.es52.painless.es_script import es_script + + +class ScriptOp(ScriptOp_): + def to_esfilter(self, schema): + return {"script": es_script(self.script)} diff --git a/vendor/jx_elasticsearch/es52/expressions/suffix_op.py b/vendor/jx_elasticsearch/es52/expressions/suffix_op.py new file mode 100644 index 0000000..3d15797 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/suffix_op.py @@ -0,0 +1,27 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import SuffixOp as SuffixOp_, Variable as Variable_, is_literal +from jx_base.language import is_op +from jx_elasticsearch.es52.expressions.true_op import MATCH_ALL +from mo_future import first +from pyLibrary.convert import string2regexp +from jx_elasticsearch.es52.painless import SuffixOp as PainlessSuffixOp + +class SuffixOp(SuffixOp_): + def to_esfilter(self, schema): + if not self.suffix: + return MATCH_ALL + elif is_op(self.expr, Variable_) and is_literal(self.suffix): + var = first(schema.leaves(self.expr.var)).es_column + return {"regexp": {var: ".*" + string2regexp(self.suffix.value)}} + else: + return PainlessSuffixOp.to_es_script(self, schema).to_esfilter(schema) diff --git a/vendor/jx_elasticsearch/es52/expressions/true_op.py b/vendor/jx_elasticsearch/es52/expressions/true_op.py new file mode 100644 index 0000000..651a825 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/true_op.py @@ -0,0 +1,22 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import TrueOp, extend +from mo_dots import wrap + + +@extend(TrueOp) +def to_esfilter(self, schema): + return MATCH_ALL + + +MATCH_ALL = wrap({"match_all": {}}) diff --git a/vendor/jx_elasticsearch/es52/expressions/variable.py b/vendor/jx_elasticsearch/es52/expressions/variable.py new file mode 100644 index 0000000..4dd6955 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/variable.py @@ -0,0 +1,41 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import Variable as Variable_ +from jx_elasticsearch.es52.expressions.and_op import es_and +from jx_elasticsearch.es52.expressions.exists_op import es_exists +from jx_elasticsearch.es52.expressions.false_op import MATCH_NONE +from mo_future import first +from mo_json import BOOLEAN, NESTED, OBJECT + + +class Variable(Variable_): + def to_esfilter(self, schema): + v = self.var + cols = schema.values(v, (OBJECT, NESTED)) + if len(cols) == 0: + return MATCH_NONE + elif len(cols) == 1: + c = first(cols) + return ( + {"term": {c.es_column: True}} + if c.es_type == BOOLEAN + else es_exists(c.es_column) + ) + else: + return es_and( + [ + {"term": {c.es_column: True}} + if c.es_type == BOOLEAN + else es_exists(c.es_column) + for c in cols + ] + ) diff --git a/vendor/jx_elasticsearch/es52/expressions/when_op.py b/vendor/jx_elasticsearch/es52/expressions/when_op.py new file mode 100644 index 0000000..9676f39 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/expressions/when_op.py @@ -0,0 +1,28 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import WhenOp as WhenOp_ +from jx_elasticsearch.es52.expressions.and_op import AndOp +from jx_elasticsearch.es52.expressions.boolean_op import BooleanOp +from jx_elasticsearch.es52.expressions.not_op import NotOp +from jx_elasticsearch.es52.expressions.or_op import OrOp + + +class WhenOp(WhenOp_): + def to_esfilter(self, schema): + output = OrOp( + [ + AndOp([self.when, BooleanOp(self.then)]), + AndOp([NotOp(self.when), BooleanOp(self.els_)]), + ] + ).partial_eval() + + return output.to_esfilter(schema) diff --git a/vendor/jx_elasticsearch/es52/format.py b/vendor/jx_elasticsearch/es52/format.py deleted file mode 100644 index b63fc88..0000000 --- a/vendor/jx_elasticsearch/es52/format.py +++ /dev/null @@ -1,316 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from jx_base.expressions import TupleOp -from jx_elasticsearch.es52.aggs import count_dim, aggs_iterator, format_dispatch, drill -from jx_python.containers.cube import Cube -from mo_collections.matrix import Matrix -from mo_dots import Data, set_default, wrap, split_field, coalesce -from mo_future import sort_using_key -from mo_logs import Log -from mo_logs.strings import quote -from pyLibrary import convert - -FunctionType = type(lambda: 1) - -def format_cube(decoders, aggs, start, query, select): - # decoders = sorted(decoders, key=lambda d: -d.edge.dim) # REVERSE DECODER ORDER, BECAUSE ES QUERY WAS BUILT IN REVERSE ORDER - new_edges = count_dim(aggs, decoders) - - dims = [] - for e in new_edges: - if isinstance(e.value, TupleOp): - e.allowNulls = False - - extra = 0 if e.allowNulls is False else 1 - dims.append(len(e.domain.partitions) + extra) - - dims = tuple(dims) - matricies = [(s, Matrix(dims=dims, zeros=s.default)) for s in select] - for row, coord, agg in aggs_iterator(aggs, decoders): - for s, m in matricies: - try: - v = s.pull(agg) - m[coord] = v - except Exception as e: - # THIS HAPPENS WHEN ES RETURNS MORE TUPLE COMBINATIONS THAN DOCUMENTS - if agg.get('doc_count') != 0: - Log.error("Programmer error", cause=e) - - cube = Cube( - query.select, - sort_using_key(new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY - {s.name: m for s, m in matricies} - ) - cube.frum = query - return cube - - -def format_cube_from_aggop(decoders, aggs, start, query, select): - agg = drill(aggs) - matricies = [(s, Matrix(dims=[], zeros=s.default)) for s in select] - for s, m in matricies: - m[tuple()] = s.pull(agg) - cube = Cube(query.select, [], {s.name: m for s, m in matricies}) - cube.frum = query - return cube - - -def format_table(decoders, aggs, start, query, select): - new_edges = count_dim(aggs, decoders) - header = new_edges.name + select.name - - def data(): - dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) - is_sent = Matrix(dims=dims, zeros=0) - - if query.sort and not query.groupby: - all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS - for row, coord, agg in aggs_iterator(aggs, decoders): - missing_coord = all_coord.next() - while coord != missing_coord: - record = [d.get_value(missing_coord[i]) for i, d in enumerate(decoders)] - for s in select: - if s.aggregate == "count": - record.append(0) - else: - record.append(None) - yield record - missing_coord = all_coord.next() - - output = [d.get_value(c) for c, d in zip(coord, decoders)] - for s in select: - output.append(s.pull(agg)) - yield output - else: - for row, coord, agg in aggs_iterator(aggs, decoders): - is_sent[coord] = 1 - - output = [d.get_value(c) for c, d in zip(coord, decoders)] - for s in select: - output.append(s.pull(agg)) - yield output - - # EMIT THE MISSING CELLS IN THE CUBE - if not query.groupby: - for c, v in is_sent: - if not v: - record = [d.get_value(c[i]) for i, d in enumerate(decoders)] - for s in select: - if s.aggregate == "count": - record.append(0) - else: - record.append(None) - yield record - - return Data( - meta={"format": "table"}, - header=header, - data=list(data()) - ) - - -def format_table_from_groupby(decoders, aggs, start, query, select): - header = [d.edge.name.replace("\\.", ".") for d in decoders] + select.name - - def data(): - for row, coord, agg in aggs_iterator(aggs, decoders): - if agg.get('doc_count', 0) == 0: - continue - output = [d.get_value_from_row(row) for d in decoders] - for s in select: - output.append(s.pull(agg)) - yield output - - return Data( - meta={"format": "table"}, - header=header, - data=list(data()) - ) - - -def format_table_from_aggop(decoders, aggs, start, query, select): - header = select.name - agg = drill(aggs) - row = [] - for s in select: - row.append(s.pull(agg)) - - return Data( - meta={"format": "table"}, - header=header, - data=[row] - ) - - -def format_tab(decoders, aggs, start, query, select): - table = format_table(decoders, aggs, start, query, select) - - def data(): - yield "\t".join(map(quote, table.header)) - for d in table.data: - yield "\t".join(map(quote, d)) - - return data() - - -def format_csv(decoders, aggs, start, query, select): - table = format_table(decoders, aggs, start, query, select) - - def data(): - yield ", ".join(map(quote, table.header)) - for d in table.data: - yield ", ".join(map(quote, d)) - - return data() - - -def format_list_from_groupby(decoders, aggs, start, query, select): - def data(): - for row, coord, agg in aggs_iterator(aggs, decoders): - if agg.get('doc_count', 0) == 0: - continue - output = Data() - for g, d in zip(query.groupby, decoders): - output[coalesce(g.put.name, g.name)] = d.get_value_from_row(row) - - for s in select: - output[s.name] = s.pull(agg) - yield output - - for g in query.groupby: - g.put.name = coalesce(g.put.name, g.name) - - output = Data( - meta={"format": "list"}, - data=list(data()) - ) - return output - - -def format_list(decoders, aggs, start, query, select): - new_edges = count_dim(aggs, decoders) - - def data(): - dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) - - is_sent = Matrix(dims=dims, zeros=0) - if query.sort and not query.groupby: - # TODO: USE THE format_table() TO PRODUCE THE NEEDED VALUES INSTEAD OF DUPLICATING LOGIC HERE - all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS - for _, coord, agg in aggs_iterator(aggs, decoders): - missing_coord = all_coord.next() - while coord != missing_coord: - # INSERT THE MISSING COORDINATE INTO THE GENERATION - output = Data() - for i, d in enumerate(decoders): - output[query.edges[i].name] = d.get_value(missing_coord[i]) - - for s in select: - if s.aggregate == "count": - output[s.name] = 0 - yield output - missing_coord = all_coord.next() - - output = Data() - for e, c, d in zip(query.edges, coord, decoders): - output[e.name] = d.get_value(c) - - for s in select: - output[s.name] = s.pull(agg) - yield output - else: - - for row, coord, agg in aggs_iterator(aggs, decoders): - is_sent[coord] = 1 - - output = Data() - for e, c, d in zip(query.edges, coord, decoders): - output[e.name] = d.get_value(c) - - for s in select: - output[s.name] = s.pull(agg) - yield output - - # EMIT THE MISSING CELLS IN THE CUBE - if not query.groupby: - for c, v in is_sent: - if not v: - output = Data() - for i, d in enumerate(decoders): - output[query.edges[i].name] = d.get_value(c[i]) - - for s in select: - if s.aggregate == "count": - output[s.name] = 0 - yield output - - output = Data( - meta={"format": "list"}, - data=list(data()) - ) - return output - - -def format_list_from_aggop(decoders, aggs, start, query, select): - agg = drill(aggs) - - if isinstance(query.select, list): - item = Data() - for s in select: - item[s.name] = s.pull(agg) - else: - item = select[0].pull(agg) - - if query.edges or query.groupby: - return wrap({ - "meta": {"format": "list"}, - "data": [item] - }) - else: - return wrap({ - "meta": {"format": "value"}, - "data": item - }) - - -def format_line(decoders, aggs, start, query, select): - list = format_list(decoders, aggs, start, query, select) - - def data(): - for d in list.data: - yield convert.value2json(d) - - return data() - - -set_default(format_dispatch, { - None: (format_cube, format_table_from_groupby, format_cube_from_aggop, "application/json"), - "cube": (format_cube, format_cube, format_cube_from_aggop, "application/json"), - "table": (format_table, format_table_from_groupby, format_table_from_aggop, "application/json"), - "list": (format_list, format_list_from_groupby, format_list_from_aggop, "application/json"), - # "csv": (format_csv, format_csv_from_groupby, "text/csv"), - # "tab": (format_tab, format_tab_from_groupby, "text/tab-separated-values"), - # "line": (format_line, format_line_from_groupby, "application/json") -}) - - -def _get(v, k, d): - for p in split_field(k): - try: - v = v.get(p) - if v is None: - return d - except Exception: - v = [vv.get(p) for vv in v] - return v diff --git a/vendor/jx_elasticsearch/es52/painless/__init__.py b/vendor/jx_elasticsearch/es52/painless/__init__.py new file mode 100644 index 0000000..9d63ebb --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/__init__.py @@ -0,0 +1,59 @@ +from jx_elasticsearch.es52.painless._utils import Painless, LIST_TO_PIPE +from jx_elasticsearch.es52.painless.add_op import AddOp +from jx_elasticsearch.es52.painless.and_op import AndOp +from jx_elasticsearch.es52.painless.basic_add_op import BasicAddOp +from jx_elasticsearch.es52.painless.basic_eq_op import BasicEqOp +from jx_elasticsearch.es52.painless.basic_index_of_op import BasicIndexOfOp +from jx_elasticsearch.es52.painless.basic_mul_op import BasicMulOp +from jx_elasticsearch.es52.painless.basic_starts_with_op import BasicStartsWithOp +from jx_elasticsearch.es52.painless.basic_substring_op import BasicSubstringOp +from jx_elasticsearch.es52.painless.boolean_op import BooleanOp +from jx_elasticsearch.es52.painless.case_op import CaseOp +from jx_elasticsearch.es52.painless.coalesce_op import CoalesceOp +from jx_elasticsearch.es52.painless.concat_op import ConcatOp +from jx_elasticsearch.es52.painless.count_op import CountOp +from jx_elasticsearch.es52.painless.date_op import DateOp +from jx_elasticsearch.es52.painless.div_op import DivOp +from jx_elasticsearch.es52.painless.eq_op import EqOp +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.exists_op import ExistsOp +from jx_elasticsearch.es52.painless.exp_op import ExpOp +from jx_elasticsearch.es52.painless.find_op import FindOp +from jx_elasticsearch.es52.painless.first_op import FirstOp +from jx_elasticsearch.es52.painless.floor_op import FloorOp +from jx_elasticsearch.es52.painless.gt_op import GtOp +from jx_elasticsearch.es52.painless.gte_op import GteOp +from jx_elasticsearch.es52.painless.in_op import InOp +from jx_elasticsearch.es52.painless.integer_op import IntegerOp +from jx_elasticsearch.es52.painless.is_number_op import IsNumberOp +from jx_elasticsearch.es52.painless.leaves_op import LeavesOp +from jx_elasticsearch.es52.painless.length_op import LengthOp +from jx_elasticsearch.es52.painless.literal import Literal +from jx_elasticsearch.es52.painless.lt_op import LtOp +from jx_elasticsearch.es52.painless.lte_op import LteOp +from jx_elasticsearch.es52.painless.max_op import MaxOp +from jx_elasticsearch.es52.painless.min_op import MinOp +from jx_elasticsearch.es52.painless.missing_op import MissingOp +from jx_elasticsearch.es52.painless.mod_op import ModOp +from jx_elasticsearch.es52.painless.mul_op import MulOp +from jx_elasticsearch.es52.painless.ne_op import NeOp +from jx_elasticsearch.es52.painless.not_left_op import NotLeftOp +from jx_elasticsearch.es52.painless.not_op import NotOp +from jx_elasticsearch.es52.painless.number_op import NumberOp +from jx_elasticsearch.es52.painless.or_op import OrOp +from jx_elasticsearch.es52.painless.prefix_op import PrefixOp +from jx_elasticsearch.es52.painless.string_op import StringOp +from jx_elasticsearch.es52.painless.sub_op import SubOp +from jx_elasticsearch.es52.painless.suffix_op import SuffixOp +from jx_elasticsearch.es52.painless.tuple_op import TupleOp +from jx_elasticsearch.es52.painless.union_op import UnionOp +from jx_elasticsearch.es52.painless.variable import Variable +from jx_elasticsearch.es52.painless.when_op import WhenOp +from jx_elasticsearch.es52.painless.false_op import FalseOp, false_script +from jx_elasticsearch.es52.painless.true_op import TrueOp, true_script +from jx_elasticsearch.es52.painless.null_op import NullOp, null_script + + +Painless.register_ops(vars()) + + diff --git a/vendor/jx_elasticsearch/es52/painless/_utils.py b/vendor/jx_elasticsearch/es52/painless/_utils.py new file mode 100644 index 0000000..f4e92c0 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/_utils.py @@ -0,0 +1,185 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ( + FALSE, + NULL, + TRUE, +) +from jx_base.language import Language +from jx_elasticsearch.es52.painless.es_script import EsScript +from mo_dots import Null +from mo_json import BOOLEAN, NUMBER, STRING + +AndOp, Literal, NumberOp, OrOp, WhenOp = [None]*5 + + +MAX_INT32 = 2147483647 +MIN_INT32 = -2147483648 + + +NUMBER_TO_STRING = """ +Optional.of({{expr}}).map( + value -> { + String output = String.valueOf(value); + if (output.endsWith(".0")) output = output.substring(0, output.length() - 2); + return output; + } +).orElse(null) +""" + +LIST_TO_PIPE = """ +StringBuffer output=new StringBuffer(); +for(String s : {{expr}}){ + output.append("|"); + String sep2=""; + StringTokenizer parts = new StringTokenizer(s, "|"); + while (parts.hasMoreTokens()){ + output.append(sep2); + output.append(parts.nextToken()); + sep2="||"; + }//for +}//for +output.append("|"); +return output.toString() +""" + + + +def _binary_to_es_script(self, schema, not_null=False, boolean=False, many=True): + op, identity = _painless_operators[self.op] + lhs = NumberOp(self.lhs).partial_eval().to_es_script(schema) + rhs = NumberOp(self.rhs).partial_eval().to_es_script(schema) + script = "(" + lhs.expr + ") " + op + " (" + rhs.expr + ")" + missing = OrOp([self.lhs.missing(), self.rhs.missing()]) + + return EsScript( + type=NUMBER, + miss=missing, + frum=self, + expr=script, + schema=schema, + many=False + ) + + +def _inequality_to_es_script(self, schema, not_null=False, boolean=False, many=True): + op, identity = _painless_operators[self.op] + lhs = NumberOp(self.lhs).partial_eval().to_es_script(schema).expr + rhs = NumberOp(self.rhs).partial_eval().to_es_script(schema).expr + script = "(" + lhs + ") " + op + " (" + rhs + ")" + + output = ( + WhenOp( + OrOp([self.lhs.missing(), self.rhs.missing()]), + **{ + "then": FALSE, + "else": EsScript(type=BOOLEAN, expr=script, frum=self, schema=schema), + } + ) + .partial_eval() + .to_es_script(schema) + ) + return output + + +def _basic_binary_op_to_es_script( + self, schema, not_null=False, boolean=False, many=True +): + op, identity = _painless_operators[self.op] + if len(self.terms) == 0: + return Literal(identity).to_es_script(schema) + elif len(self.terms) == 1: + return self.terms[0].to_esscript() + else: + return EsScript( + type=NUMBER, + expr=op.join( + "(" + + Painless[t].to_es_script(schema, not_null=True, many=False).expr + + ")" + for t in self.terms + ), + frum=self, + schema=schema, + ) + + +def _multi_to_es_script(self, schema, not_null=False, boolean=False, many=True): + op, unit = _painless_operators[self.op] + if self.nulls: + calc = op.join( + "((" + + Painless[t.missing()].to_es_script(schema).expr + + ") ? " + + unit + + " : (" + + Painless[NumberOp(t)].partial_eval().to_es_script(schema).expr + + "))" + for t in self.terms + ) + return ( + WhenOp( + AndOp([t.missing() for t in self.terms]), + **{ + "then": self.default, + "else": EsScript(type=NUMBER, expr=calc, frum=self, schema=schema), + } + ) + .partial_eval() + .to_es_script(schema) + ) + else: + calc = op.join( + "(" + NumberOp(t).to_es_script(schema).expr + ")" for t in self.terms + ) + return ( + WhenOp( + OrOp([t.missing() for t in self.terms]), + **{ + "then": self.default, + "else": EsScript(type=NUMBER, expr=calc, frum=self, schema=schema), + } + ) + .partial_eval() + .to_es_script(schema) + ) + + + +Painless = Language("Painless") + + +_count_template = ( + "long count=0; for(v in {{expr}}) if (v!=null) count+=1; return count;" +) + +_painless_operators = { + "add": (" + ", "0"), # (operator, zero-array default value) PAIR + "sum": (" + ", "0"), + "mul": (" * ", "1"), + "basic.add": (" + ", "0"), + "basic.mul": (" * ", "1"), + "sub": ("-", None), + "div": ("/", None), + "exp": ("**", None), + "mod": ("%", None), + "gt": (">", None), + "gte": (">=", None), + "lte": ("<=", None), + "lt": ("<", None), +} + + +empty_string_script = EsScript( + miss=TRUE, type=STRING, expr='""', frum=NULL, schema=Null +) + diff --git a/vendor/jx_elasticsearch/es52/painless/add_op.py b/vendor/jx_elasticsearch/es52/painless/add_op.py new file mode 100644 index 0000000..ece0b4a --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/add_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import AddOp as AddOp_ +from jx_elasticsearch.es52.painless._utils import _multi_to_es_script + + +class AddOp(AddOp_): + to_es_script = _multi_to_es_script diff --git a/vendor/jx_elasticsearch/es52/painless/and_op.py b/vendor/jx_elasticsearch/es52/painless/and_op.py new file mode 100644 index 0000000..f16750e --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/and_op.py @@ -0,0 +1,39 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import AndOp as AndOp_ +from jx_elasticsearch.es52.painless import _utils +from jx_elasticsearch.es52.painless._utils import Painless +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.true_op import true_script +from jx_elasticsearch.es52.painless.false_op import false_script +from mo_json import BOOLEAN + + +class AndOp(AndOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + ands = [Painless[t].to_es_script(schema) for t in self.terms] + + # TODO: WE SHOULD NOT BE SIMPLIFYING AT THIS POINT + if all(a is true_script for a in ands): + return true_script + elif any(a is false_script for a in ands): + return false_script + + return EsScript( + type=BOOLEAN, + expr=" && ".join("(" + a + ")" for a in ands), + frum=self, + schema=schema, + ) + + +_utils.AndOp = AndOp diff --git a/vendor/jx_elasticsearch/es52/painless/basic_add_op.py b/vendor/jx_elasticsearch/es52/painless/basic_add_op.py new file mode 100644 index 0000000..51c972f --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/basic_add_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import BasicAddOp as BasicAddOp_ +from jx_elasticsearch.es52.painless._utils import _basic_binary_op_to_es_script + + +class BasicAddOp(BasicAddOp_): + to_es_script = _basic_binary_op_to_es_script diff --git a/vendor/jx_elasticsearch/es52/painless/basic_eq_op.py b/vendor/jx_elasticsearch/es52/painless/basic_eq_op.py new file mode 100644 index 0000000..8905966 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/basic_eq_op.py @@ -0,0 +1,113 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import BasicEqOp as BasicEqOp_, is_literal, FALSE, TRUE +from jx_elasticsearch.es52.painless._utils import Painless +from jx_elasticsearch.es52.painless.and_op import AndOp +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.null_op import null_script +from mo_json import BOOLEAN, NUMBER, INTEGER +from mo_logs import Log + + +class BasicEqOp(BasicEqOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + simple_rhs = Painless[self.rhs].partial_eval() + lhs = Painless[self.lhs].partial_eval().to_es_script(schema) + rhs = simple_rhs.to_es_script(schema) + + if lhs.many: + if rhs.many: + return AndOp( + [ + EsScript( + type=BOOLEAN, + expr="(" + lhs.expr + ").size()==(" + rhs.expr + ").size()", + frum=self, + schema=schema, + ), + EsScript( + type=BOOLEAN, + expr="(" + rhs.expr + ").containsAll(" + lhs.expr + ")", + frum=self, + schema=schema, + ), + ] + ).to_es_script(schema) + else: + if lhs.type == BOOLEAN: + if is_literal(simple_rhs) and simple_rhs.value in ("F", False): + return EsScript( + type=BOOLEAN, expr="!" + lhs.expr, frum=self, schema=schema + ) + elif is_literal(simple_rhs) and simple_rhs.value in ("T", True): + return EsScript( + type=BOOLEAN, expr=lhs.expr, frum=self, schema=schema + ) + else: + return EsScript( + type=BOOLEAN, + expr="(" + lhs.expr + ")==(" + rhs.expr + ")", + frum=self, + schema=schema, + ) + elif lhs.type == rhs.type: + return EsScript( + type=BOOLEAN, + expr="(" + lhs.expr + ").contains(" + rhs.expr + ")", + frum=self, + schema=schema, + ) + elif lhs.type == NUMBER and rhs.type == INTEGER: + return EsScript( + type=BOOLEAN, + expr="(" + lhs.expr + ").contains((double)" + rhs.expr + ")", + frum=self, + schema=schema, + ) + else: + Log.error("type mismatch not expected while converting to painless") + + elif rhs.many: + return EsScript( + type=BOOLEAN, + expr="(" + rhs.expr + ").contains(" + lhs.expr + ")", + frum=self, + schema=schema, + ) + else: + if lhs is null_script: + if rhs is null_script: + return TRUE.to_es_script(schema) + return FALSE.to_es_script(schema) + elif lhs.type == BOOLEAN: + if is_literal(simple_rhs) and simple_rhs.value in ("F", False): + return EsScript( + type=BOOLEAN, expr="!" + lhs.expr, frum=self, schema=schema + ) + elif is_literal(simple_rhs) and simple_rhs.value in ("T", True): + return EsScript( + type=BOOLEAN, expr=lhs.expr, frum=self, schema=schema + ) + else: + return EsScript( + type=BOOLEAN, + expr="(" + lhs.expr + ")==(" + rhs.expr + ")", + frum=self, + schema=schema, + ) + else: + return EsScript( + type=BOOLEAN, + expr="(" + lhs.expr + ")==(" + rhs.expr + ")", + frum=self, + schema=schema, + ) diff --git a/vendor/jx_elasticsearch/es52/painless/basic_index_of_op.py b/vendor/jx_elasticsearch/es52/painless/basic_index_of_op.py new file mode 100644 index 0000000..6c5a48d --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/basic_index_of_op.py @@ -0,0 +1,31 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import BasicIndexOfOp as BasicIndexOfOp_, FALSE +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.integer_op import IntegerOp +from jx_elasticsearch.es52.painless.string_op import StringOp +from mo_json import INTEGER + + +class BasicIndexOfOp(BasicIndexOfOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + v = StringOp(self.value).to_es_script(schema).expr + find = StringOp(self.find).to_es_script(schema).expr + start = IntegerOp(self.start).to_es_script(schema).expr + + return EsScript( + miss=FALSE, + type=INTEGER, + expr="(" + v + ").indexOf(" + find + ", " + start + ")", + frum=self, + schema=schema, + ) diff --git a/vendor/jx_elasticsearch/es52/painless/basic_mul_op.py b/vendor/jx_elasticsearch/es52/painless/basic_mul_op.py new file mode 100644 index 0000000..3858eb7 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/basic_mul_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import BasicMulOp as BasicMulOp_ +from jx_elasticsearch.es52.painless._utils import _basic_binary_op_to_es_script + + +class BasicMulOp(BasicMulOp_): + to_es_script = _basic_binary_op_to_es_script diff --git a/vendor/jx_elasticsearch/es52/painless/basic_starts_with_op.py b/vendor/jx_elasticsearch/es52/painless/basic_starts_with_op.py new file mode 100644 index 0000000..d6b34df --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/basic_starts_with_op.py @@ -0,0 +1,36 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import BasicStartsWithOp as BasicStartsWithOp_, FALSE +from jx_elasticsearch.es52.painless.false_op import false_script +from jx_elasticsearch.es52.painless._utils import ( + Painless, + empty_string_script, +) +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.first_op import FirstOp +from mo_json import BOOLEAN + + +class BasicStartsWithOp(BasicStartsWithOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + expr = Painless[FirstOp(self.value)].partial_eval().to_es_script(schema) + if expr is empty_string_script: + return false_script + + prefix = Painless[self.prefix].to_es_script(schema).partial_eval() + return EsScript( + miss=FALSE, + type=BOOLEAN, + expr="(" + expr.expr + ").startsWith(" + prefix.expr + ")", + frum=self, + schema=schema, + ) diff --git a/vendor/jx_elasticsearch/es52/painless/basic_substring_op.py b/vendor/jx_elasticsearch/es52/painless/basic_substring_op.py new file mode 100644 index 0000000..568f09a --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/basic_substring_op.py @@ -0,0 +1,31 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import BasicSubstringOp as BasicSubstringOp_, FALSE +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.integer_op import IntegerOp +from jx_elasticsearch.es52.painless.string_op import StringOp +from mo_json import STRING + + +class BasicSubstringOp(BasicSubstringOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + v = StringOp(self.value).partial_eval().to_es_script(schema).expr + start = IntegerOp(self.start).partial_eval().to_es_script(schema).expr + end = IntegerOp(self.end).partial_eval().to_es_script(schema).expr + + return EsScript( + miss=FALSE, + type=STRING, + expr="(" + v + ").substring(" + start + ", " + end + ")", + frum=self, + schema=schema, + ) diff --git a/vendor/jx_elasticsearch/es52/painless/boolean_op.py b/vendor/jx_elasticsearch/es52/painless/boolean_op.py new file mode 100644 index 0000000..b732c98 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/boolean_op.py @@ -0,0 +1,41 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import BooleanOp as BooleanOp_, FALSE +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.not_op import NotOp +from jx_elasticsearch.es52.painless.when_op import WhenOp +from mo_json import BOOLEAN + + +class BooleanOp(BooleanOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + value = self.lang[self.term].to_es_script(schema) + if value.many: + return BooleanOp( + EsScript( + miss=value.miss, + type=value.type, + expr="(" + value.expr + ")[0]", + frum=value.frum, + schema=schema, + ) + ).to_es_script(schema) + elif value.type == BOOLEAN: + miss = value.miss + value.miss = FALSE + return ( + WhenOp(miss, **{"then": FALSE, "else": value}) + .partial_eval() + .to_es_script(schema) + ) + else: + return NotOp(value.miss).partial_eval().to_es_script(schema) diff --git a/vendor/jx_elasticsearch/es52/painless/case_op.py b/vendor/jx_elasticsearch/es52/painless/case_op.py new file mode 100644 index 0000000..db21e4c --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/case_op.py @@ -0,0 +1,26 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import CaseOp as CaseOp_ +from jx_elasticsearch.es52.painless._utils import Painless +from jx_elasticsearch.es52.painless.when_op import WhenOp + + +class CaseOp(CaseOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + acc = Painless[self.whens[-1]].partial_eval().to_es_script(schema) + for w in reversed(self.whens[0:-1]): + acc = ( + WhenOp(w.when, **{"then": w.then, "else": acc}) + .partial_eval() + .to_es_script(schema) + ) + return acc diff --git a/vendor/jx_elasticsearch/es52/painless/coalesce_op.py b/vendor/jx_elasticsearch/es52/painless/coalesce_op.py new file mode 100644 index 0000000..4921eb9 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/coalesce_op.py @@ -0,0 +1,56 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import CoalesceOp as CoalesceOp_, FALSE, NULL, TRUE +from jx_elasticsearch.es52.painless import first_op +from jx_elasticsearch.es52.painless.and_op import AndOp +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.first_op import FirstOp +from jx_elasticsearch.es52.painless.not_op import NotOp +from mo_json import INTEGER, NUMBER, OBJECT, NUMBER_TYPES + + +class CoalesceOp(CoalesceOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + if not self.terms: + return NULL.to_es_script(schema) + # acc.miss WILL SAY IF THIS COALESCE RETURNS NULL, + # acc.expr WILL ASSUMED TO BE A VALUE, SO THE LAST TERM IS ASSUMED NOT NULL + v = self.terms[-1] + acc = FirstOp(v).partial_eval().to_es_script(schema) + for v in reversed(self.terms[:-1]): + m = v.missing().partial_eval() + e = NotOp(m).partial_eval().to_es_script(schema) + r = FirstOp(v).partial_eval().to_es_script(schema) + + if r.miss is TRUE: + continue + elif r.miss is FALSE: + acc = r + continue + elif acc.type == r.type or acc.miss is TRUE: + new_type = r.type + elif acc.type in NUMBER_TYPES and r.type in NUMBER_TYPES: + new_type = NUMBER + else: + new_type = OBJECT + + acc = EsScript( + miss=AndOp([acc.miss, m]).partial_eval(), + type=new_type, + expr="(" + e.expr + ") ? (" + r.expr + ") : (" + acc.expr + ")", + frum=self, + schema=schema, + ) + return acc + + +first_op.CoalesceOp = CoalesceOp diff --git a/vendor/jx_elasticsearch/es52/painless/concat_op.py b/vendor/jx_elasticsearch/es52/painless/concat_op.py new file mode 100644 index 0000000..f374629 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/concat_op.py @@ -0,0 +1,70 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ConcatOp as ConcatOp_, NULL +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.length_op import LengthOp +from jx_elasticsearch.es52.painless.literal import Literal +from jx_elasticsearch.es52.painless.string_op import StringOp +from jx_elasticsearch.es52.painless.when_op import WhenOp +from mo_json import STRING + + +class ConcatOp(ConcatOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + if len(self.terms) == 0: + return self.default.to_es_script(schema) + + acc = [] + separator = StringOp(self.separator).partial_eval() + sep = separator.to_es_script(schema).expr + for t in self.terms: + val = WhenOp( + t.missing(), + **{ + "then": Literal(""), + "else": EsScript( + type=STRING, + expr=sep + + "+" + + StringOp(t).partial_eval().to_es_script(schema).expr, + frum=t, + schema=schema, + ) + # "else": ConcatOp([sep, t]) + } + ) + acc.append("(" + val.partial_eval().to_es_script(schema).expr + ")") + expr_ = ( + "(" + + "+".join(acc) + + ").substring(" + + LengthOp(separator).to_es_script(schema).expr + + ")" + ) + + if self.default is NULL: + return EsScript( + miss=self.missing(), type=STRING, expr=expr_, frum=self, schema=schema + ) + else: + return EsScript( + miss=self.missing(), + type=STRING, + expr="((" + + expr_ + + ").length==0) ? (" + + self.default.to_es_script(schema).expr + + ") : (" + + expr_ + + ")", + frum=self, + ) diff --git a/vendor/jx_elasticsearch/es52/painless/count_op.py b/vendor/jx_elasticsearch/es52/painless/count_op.py new file mode 100644 index 0000000..ff5509f --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/count_op.py @@ -0,0 +1,30 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import CountOp as CountOp_, FALSE +from jx_elasticsearch.es52.painless._utils import Painless, _count_template +from jx_elasticsearch.es52.painless.es_script import EsScript +from mo_json import INTEGER +from mo_logs.strings import expand_template + + +class CountOp(CountOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + return EsScript( + miss=FALSE, + type=INTEGER, + expr=expand_template( + _count_template, + {"expr": Painless[self.terms].partial_eval().to_es_script(schema).expr}, + ), + frum=self, + schema=schema, + ) diff --git a/vendor/jx_elasticsearch/es52/painless/date_op.py b/vendor/jx_elasticsearch/es52/painless/date_op.py new file mode 100644 index 0000000..204f232 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/date_op.py @@ -0,0 +1,23 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import DateOp as DateOp_ +from jx_elasticsearch.es52.painless.es_script import EsScript +from mo_future import text +from mo_json import NUMBER +from mo_times import Date + + +class DateOp(DateOp_): + def to_es_script(self, schema): + return EsScript( + type=NUMBER, expr=text(Date(self.value).unix), frum=self, schema=schema + ) diff --git a/vendor/jx_elasticsearch/es52/painless/div_op.py b/vendor/jx_elasticsearch/es52/painless/div_op.py new file mode 100644 index 0000000..90146e7 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/div_op.py @@ -0,0 +1,47 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import DivOp as DivOp_, ZERO +from jx_elasticsearch.es52.painless.eq_op import EqOp +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.number_op import NumberOp +from jx_elasticsearch.es52.painless.or_op import OrOp +from jx_elasticsearch.es52.painless.when_op import WhenOp +from mo_json import NUMBER + + +class DivOp(DivOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + lhs = NumberOp(self.lhs).partial_eval() + rhs = NumberOp(self.rhs).partial_eval() + script = ( + "(" + + lhs.to_es_script(schema).expr + + ") / (" + + rhs.to_es_script(schema).expr + + ")" + ) + + output = ( + WhenOp( + OrOp([lhs.missing(), rhs.missing(), EqOp([rhs, ZERO])]), + **{ + "then": self.default, + "else": EsScript( + type=NUMBER, expr=script, frum=self, schema=schema + ), + } + ) + .partial_eval() + .to_es_script(schema) + ) + + return output diff --git a/vendor/jx_elasticsearch/es52/painless/eq_op.py b/vendor/jx_elasticsearch/es52/painless/eq_op.py new file mode 100644 index 0000000..5c9cc35 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/eq_op.py @@ -0,0 +1,30 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import EqOp as EqOp_, FALSE +from jx_elasticsearch.es52.painless.basic_eq_op import BasicEqOp +from jx_elasticsearch.es52.painless.case_op import CaseOp +from jx_elasticsearch.es52.painless.when_op import WhenOp + + +class EqOp(EqOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + return ( + CaseOp( + [ + WhenOp(self.lhs.missing(), **{"then": self.rhs.missing()}), + WhenOp(self.rhs.missing(), **{"then": FALSE}), + BasicEqOp([self.lhs, self.rhs]), + ] + ) + .partial_eval() + .to_es_script(schema) + ) diff --git a/vendor/jx_elasticsearch/es52/painless/es_script.py b/vendor/jx_elasticsearch/es52/painless/es_script.py new file mode 100644 index 0000000..2672508 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/es_script.py @@ -0,0 +1,106 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import EsScript as EsScript_, FALSE, NULL, ONE, TRUE, ZERO +from mo_dots import coalesce, wrap +from mo_future import PY2, text +from mo_json import BOOLEAN, INTEGER, NUMBER +from mo_logs import Log + + +class EsScript(EsScript_): + __slots__ = ("simplified", "miss", "data_type", "expr", "many") + + def __init__(self, type, expr, frum, schema, miss=None, many=False): + self.simplified = True + object.__init__(self) + if miss not in [None, NULL, FALSE, TRUE, ONE, ZERO]: + if frum.lang != miss.lang: + Log.error("logic error") + + self.miss = coalesce( + miss, FALSE + ) # Expression that will return true/false to indicate missing result + self.data_type = type + self.expr = expr + self.many = many # True if script returns multi-value + self.frum = frum # THE ORIGINAL EXPRESSION THAT MADE expr + self.schema = schema + + @property + def type(self): + return self.data_type + + def __str__(self): + """ + RETURN A SCRIPT SUITABLE FOR CODE OUTSIDE THIS MODULE (NO KNOWLEDGE OF Painless) + :param schema: + :return: + """ + missing = self.miss.partial_eval() + if missing is FALSE: + return self.partial_eval().to_es_script(self.schema).expr + elif missing is TRUE: + return "null" + + return ( + "(" + missing.to_es_script(self.schema).expr + ")?null:(" + box(self) + ")" + ) + + def __add__(self, other): + return text(self) + text(other) + + def __radd__(self, other): + return text(other) + text(self) + + if PY2: + __unicode__ = __str__ + + def to_esfilter(self, schema): + return {"script": es_script(text(self))} + + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + return self + + def missing(self): + return self.miss + + def __data__(self): + return {"script": text(self)} + + def __eq__(self, other): + if not isinstance(other, EsScript_): + return False + elif self.expr == other.expr: + return True + else: + return False + + +def box(script): + """ + :param es_script: + :return: TEXT EXPRESSION WITH NON OBJECTS BOXED + """ + if script.type is BOOLEAN: + return "Boolean.valueOf(" + text(script.expr) + ")" + elif script.type is INTEGER: + return "Integer.valueOf(" + text(script.expr) + ")" + elif script.type is NUMBER: + return "Double.valueOf(" + text(script.expr) + ")" + else: + return script.expr + + +def es_script(term): + return wrap({"script": {"lang": "painless", "source": term}}) + + diff --git a/vendor/jx_elasticsearch/es52/painless/exists_op.py b/vendor/jx_elasticsearch/es52/painless/exists_op.py new file mode 100644 index 0000000..1cb748f --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/exists_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ExistsOp as ExistsOp_ + + +class ExistsOp(ExistsOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + return self.field.exists().partial_eval().to_es_script(schema) diff --git a/vendor/jx_elasticsearch/es52/painless/exp_op.py b/vendor/jx_elasticsearch/es52/painless/exp_op.py new file mode 100644 index 0000000..8553cde --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/exp_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ExpOp as ExpOp_ +from jx_elasticsearch.es52.painless._utils import _binary_to_es_script + + +class ExpOp(ExpOp_): + to_es_script = _binary_to_es_script diff --git a/vendor/jx_elasticsearch/es52/painless/false_op.py b/vendor/jx_elasticsearch/es52/painless/false_op.py new file mode 100644 index 0000000..9cdbbc1 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/false_op.py @@ -0,0 +1,25 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import FalseOp, extend, FALSE +from jx_elasticsearch.es52.painless.es_script import EsScript +from mo_dots import Null +from mo_json import BOOLEAN + + +@extend(FalseOp) +def to_es_script(self, schema, not_null=False, boolean=False, many=True): + return false_script + + +false_script = EsScript(type=BOOLEAN, expr="false", frum=FALSE, schema=Null) + diff --git a/vendor/jx_elasticsearch/es52/painless/find_op.py b/vendor/jx_elasticsearch/es52/painless/find_op.py new file mode 100644 index 0000000..902964f --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/find_op.py @@ -0,0 +1,61 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import FindOp as FindOp_, simplified +from jx_elasticsearch.es52.painless.and_op import AndOp +from jx_elasticsearch.es52.painless.basic_eq_op import BasicEqOp +from jx_elasticsearch.es52.painless.basic_index_of_op import BasicIndexOfOp +from jx_elasticsearch.es52.painless.eq_op import EqOp +from jx_elasticsearch.es52.painless.literal import Literal +from jx_elasticsearch.es52.painless.or_op import OrOp +from jx_elasticsearch.es52.painless.when_op import WhenOp + + +class FindOp(FindOp_): + @simplified + def partial_eval(self): + index = self.lang[ + BasicIndexOfOp([self.value, self.find, self.start]) + ].partial_eval() + + output = self.lang[ + WhenOp( + OrOp( + [ + self.value.missing(), + self.find.missing(), + BasicEqOp([index, Literal(-1)]), + ] + ), + **{"then": self.default, "else": index} + ) + ].partial_eval() + return output + + def missing(self): + output = AndOp( + [ + self.default.missing(), + OrOp( + [ + self.value.missing(), + self.find.missing(), + EqOp( + [ + BasicIndexOfOp([self.value, self.find, self.start]), + Literal(-1), + ] + ), + ] + ), + ] + ).partial_eval() + return output diff --git a/vendor/jx_elasticsearch/es52/painless/first_op.py b/vendor/jx_elasticsearch/es52/painless/first_op.py new file mode 100644 index 0000000..3de5a61 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/first_op.py @@ -0,0 +1,60 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ( + CoalesceOp as CoalesceOp_, + FirstOp as FirstOp_, + Variable as Variable_, +) +from jx_base.language import is_op +from jx_elasticsearch.es52.painless import Painless +from jx_elasticsearch.es52.painless.null_op import null_script +from jx_elasticsearch.es52.painless.es_script import EsScript + +CoalesceOp, Variable = [None]*2 + + +class FirstOp(FirstOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + if is_op(self.term, Variable_): + columns = schema.values(self.term.var) + if len(columns) == 0: + return null_script + elif len(columns) == 1: + return self.term.to_es_script(schema, many=False) + # else: + # return CoalesceOp( + # [ + # FirstOp(Variable(c.es_column)) + # for c in columns + # ] + # ).to_es_script(schema) + + term = Painless[self.term].to_es_script(schema) + + if is_op(term.frum, CoalesceOp_): + return CoalesceOp( + [ + FirstOp(t.partial_eval().to_es_script(schema)) + for t in term.frum.terms + ] + ).to_es_script(schema) + + if term.many: + return EsScript( + miss=term.miss, + type=term.type, + expr="(" + term.expr + ")[0]", + frum=term.frum, + schema=schema, + ).to_es_script(schema) + else: + return term diff --git a/vendor/jx_elasticsearch/es52/painless/floor_op.py b/vendor/jx_elasticsearch/es52/painless/floor_op.py new file mode 100644 index 0000000..213531f --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/floor_op.py @@ -0,0 +1,53 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import FALSE, FloorOp as FloorOp_, ONE, ZERO +from jx_elasticsearch.es52.painless.eq_op import EqOp +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.first_op import FirstOp +from jx_elasticsearch.es52.painless.or_op import OrOp +from jx_elasticsearch.es52.painless.when_op import WhenOp +from mo_json import NUMBER + + +class FloorOp(FloorOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + lhs = FirstOp(self.lhs).partial_eval() + rhs = FirstOp(self.rhs).partial_eval() + + if rhs == ONE: + script = "(int)Math.floor(" + lhs.to_es_script(schema).expr + ")" + else: + rhs = rhs.to_es_script(schema) + script = ( + "Math.floor((" + + lhs.to_es_script(schema).expr + + ") / (" + + rhs.expr + + "))*(" + + rhs.expr + + ")" + ) + + output = ( + WhenOp( + OrOp([lhs.missing(), rhs.missing(), EqOp([self.rhs, ZERO])]), + **{ + "then": self.default, + "else": EsScript( + type=NUMBER, expr=script, frum=self, miss=FALSE, schema=schema + ), + } + ) + .partial_eval() + .to_es_script(schema) + ) + return output diff --git a/vendor/jx_elasticsearch/es52/painless/gt_op.py b/vendor/jx_elasticsearch/es52/painless/gt_op.py new file mode 100644 index 0000000..3db0864 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/gt_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import GtOp as GtOp_ +from jx_elasticsearch.es52.painless._utils import _inequality_to_es_script + + +class GtOp(GtOp_): + to_es_script = _inequality_to_es_script diff --git a/vendor/jx_elasticsearch/es52/painless/gte_op.py b/vendor/jx_elasticsearch/es52/painless/gte_op.py new file mode 100644 index 0000000..32e23da --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/gte_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import GteOp as GteOp_ +from jx_elasticsearch.es52.painless._utils import _inequality_to_es_script + + +class GteOp(GteOp_): + to_es_script = _inequality_to_es_script diff --git a/vendor/jx_elasticsearch/es52/painless/in_op.py b/vendor/jx_elasticsearch/es52/painless/in_op.py new file mode 100644 index 0000000..73b5816 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/in_op.py @@ -0,0 +1,27 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import InOp as InOp_ +from jx_elasticsearch.es52.painless._utils import Painless +from jx_elasticsearch.es52.painless.es_script import EsScript +from mo_json import BOOLEAN + + +class InOp(InOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + superset = Painless[self.superset].to_es_script(schema) + value = Painless[self.value].to_es_script(schema) + return EsScript( + type=BOOLEAN, + expr="(" + superset.expr + ").contains(" + value.expr + ")", + frum=self, + schema=schema, + ) diff --git a/vendor/jx_elasticsearch/es52/painless/integer_op.py b/vendor/jx_elasticsearch/es52/painless/integer_op.py new file mode 100644 index 0000000..66d6639 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/integer_op.py @@ -0,0 +1,70 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import IntegerOp as IntegerOp_ +from jx_elasticsearch.es52.painless._utils import Painless +from jx_elasticsearch.es52.painless.es_script import EsScript +from mo_json import BOOLEAN, INTEGER, NUMBER, STRING + + +class IntegerOp(IntegerOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + value = Painless[self.term].to_es_script(schema) + if value.many: + return IntegerOp( + EsScript( + miss=value.missing(), + type=value.type, + expr="(" + value.expr + ")[0]", + frum=value.frum, + schema=schema, + ) + ).to_es_script(schema) + elif value.type == BOOLEAN: + return EsScript( + miss=value.missing(), + type=INTEGER, + expr=value.expr + " ? 1 : 0", + frum=self, + schema=schema, + ) + elif value.type == INTEGER: + return value + elif value.type == NUMBER: + return EsScript( + miss=value.missing(), + type=INTEGER, + expr="(int)(" + value.expr + ")", + frum=self, + schema=schema, + ) + elif value.type == STRING: + return EsScript( + miss=value.missing(), + type=INTEGER, + expr="Integer.parseInt(" + value.expr + ")", + frum=self, + schema=schema, + ) + else: + return EsScript( + miss=value.missing(), + type=INTEGER, + expr="((" + + value.expr + + ") instanceof String) ? Integer.parseInt(" + + value.expr + + ") : (int)(" + + value.expr + + ")", + frum=self, + schema=schema, + ) diff --git a/vendor/jx_elasticsearch/es52/painless/is_number_op.py b/vendor/jx_elasticsearch/es52/painless/is_number_op.py new file mode 100644 index 0000000..fd53e4a --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/is_number_op.py @@ -0,0 +1,29 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import FALSE, IsNumberOp as IsNumberOp_ +from jx_elasticsearch.es52.painless.es_script import EsScript +from mo_json import BOOLEAN + + +class IsNumberOp(IsNumberOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + value = self.term.to_es_script(schema) + if value.expr or value.i: + return 3 + else: + return EsScript( + miss=FALSE, + type=BOOLEAN, + expr="(" + value.expr + ") instanceof java.lang.Double", + frum=self, + schema=schema, + ) diff --git a/vendor/jx_elasticsearch/es52/painless/leaves_op.py b/vendor/jx_elasticsearch/es52/painless/leaves_op.py new file mode 100644 index 0000000..4eb4630 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/leaves_op.py @@ -0,0 +1,18 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import LeavesOp as LeavesOp_ +from mo_logs import Log + + +class LeavesOp(LeavesOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + Log.error("not supported") diff --git a/vendor/jx_elasticsearch/es52/painless/length_op.py b/vendor/jx_elasticsearch/es52/painless/length_op.py new file mode 100644 index 0000000..fb15fdb --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/length_op.py @@ -0,0 +1,28 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import LengthOp as LengthOp_ +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.string_op import StringOp +from mo_json import INTEGER + + +class LengthOp(LengthOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + value = StringOp(self.term).to_es_script(schema) + missing = self.term.missing().partial_eval() + return EsScript( + miss=missing, + type=INTEGER, + expr="(" + value.expr + ").length()", + frum=self, + schema=schema, + ) diff --git a/vendor/jx_elasticsearch/es52/painless/literal.py b/vendor/jx_elasticsearch/es52/painless/literal.py new file mode 100644 index 0000000..6ce02ff --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/literal.py @@ -0,0 +1,76 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import Literal as Literal_, ONE +from jx_elasticsearch.es52.painless import _utils +from jx_elasticsearch.es52.painless.null_op import null_script +from jx_elasticsearch.es52.painless.true_op import true_script +from jx_elasticsearch.es52.painless.false_op import false_script +from jx_elasticsearch.es52.painless._utils import MIN_INT32, MAX_INT32 +from jx_elasticsearch.es52.painless.es_script import EsScript +from mo_dots import FlatList, data_types +from mo_future import integer_types, text +from mo_json import INTEGER, NUMBER, OBJECT, STRING +from mo_logs.strings import quote +from mo_times import Date + + +class Literal(Literal_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + def _convert(v): + if v is None: + return null_script + if v is True: + return true_script + if v is False: + return false_script + class_ = v.__class__ + if class_ is text: + return EsScript(type=STRING, expr=quote(v), frum=self, schema=schema) + if class_ in integer_types: + if MIN_INT32 <= v <= MAX_INT32: + return EsScript( + type=INTEGER, expr=text(v), frum=self, schema=schema + ) + else: + return EsScript( + type=INTEGER, expr=text(v) + "L", frum=self, schema=schema + ) + + if class_ is float: + return EsScript( + type=NUMBER, expr=text(v) + "D", frum=self, schema=schema + ) + if class_ in data_types: + return EsScript( + type=OBJECT, + expr="[" + + ", ".join(quote(k) + ": " + _convert(vv) for k, vv in v.items()) + + "]", + frum=self, + schema=schema, + ) + if class_ in (FlatList, list, tuple): + return EsScript( + type=OBJECT, + expr="[" + ", ".join(_convert(vv).expr for vv in v) + "]", + frum=self, + schema=schema, + ) + if class_ is Date: + return EsScript( + type=NUMBER, expr=text(v.unix), frum=self, schema=schema + ) + + return _convert(self._value) + + +_utils.Literal = Literal diff --git a/vendor/jx_elasticsearch/es52/painless/lt_op.py b/vendor/jx_elasticsearch/es52/painless/lt_op.py new file mode 100644 index 0000000..d8ae227 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/lt_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import LtOp as LtOp_ +from jx_elasticsearch.es52.painless._utils import _inequality_to_es_script + + +class LtOp(LtOp_): + to_es_script = _inequality_to_es_script diff --git a/vendor/jx_elasticsearch/es52/painless/lte_op.py b/vendor/jx_elasticsearch/es52/painless/lte_op.py new file mode 100644 index 0000000..e2317a7 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/lte_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import LteOp as LteOp_ +from jx_elasticsearch.es52.painless._utils import _inequality_to_es_script + + +class LteOp(LteOp_): + to_es_script = _inequality_to_es_script diff --git a/vendor/jx_elasticsearch/es52/painless/max_op.py b/vendor/jx_elasticsearch/es52/painless/max_op.py new file mode 100644 index 0000000..581da2d --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/max_op.py @@ -0,0 +1,36 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import MaxOp as MaxOp_ +from jx_elasticsearch.es52.painless.and_op import AndOp +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.number_op import NumberOp +from mo_json import NUMBER + + +class MaxOp(MaxOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + acc = NumberOp(self.terms[-1]).partial_eval().to_es_script(schema).expr + for t in reversed(self.terms[0:-1]): + acc = ( + "Math.max(" + + NumberOp(t).partial_eval().to_es_script(schema).expr + + " , " + + acc + + ")" + ) + return EsScript( + miss=AndOp([t.missing() for t in self.terms]), + type=NUMBER, + expr=acc, + frum=self, + schema=schema, + ) diff --git a/vendor/jx_elasticsearch/es52/painless/min_op.py b/vendor/jx_elasticsearch/es52/painless/min_op.py new file mode 100644 index 0000000..ab648f8 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/min_op.py @@ -0,0 +1,36 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import MinOp as MinOp_ +from jx_elasticsearch.es52.painless.and_op import AndOp +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.number_op import NumberOp +from mo_json import NUMBER + + +class MinOp(MinOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + acc = NumberOp(self.terms[-1]).partial_eval().to_es_script(schema).expr + for t in reversed(self.terms[0:-1]): + acc = ( + "Math.min(" + + NumberOp(t).partial_eval().to_es_script(schema).expr + + " , " + + acc + + ")" + ) + return EsScript( + miss=AndOp([t.missing() for t in self.terms]), + type=NUMBER, + expr=acc, + frum=self, + schema=schema, + ) diff --git a/vendor/jx_elasticsearch/es52/painless/missing_op.py b/vendor/jx_elasticsearch/es52/painless/missing_op.py new file mode 100644 index 0000000..efe98f5 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/missing_op.py @@ -0,0 +1,49 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ( + MissingOp as MissingOp_, + Variable as Variable_, + is_literal, +) +from jx_base.language import is_op +from jx_elasticsearch.es52.painless.and_op import AndOp +from jx_elasticsearch.es52.painless.es_script import EsScript +from mo_json import BOOLEAN +from mo_logs.strings import quote + + +class MissingOp(MissingOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + if is_op(self.expr, Variable_): + if self.expr.var == "_id": + return EsScript(type=BOOLEAN, expr="false", frum=self, schema=schema) + else: + columns = schema.leaves(self.expr.var) + return ( + AndOp( + [ + EsScript( + type=BOOLEAN, + expr="doc[" + quote(c.es_column) + "].empty", + frum=self, + schema=schema, + ) + for c in columns + ] + ) + .partial_eval() + .to_es_script(schema) + ) + elif is_literal(self.expr): + return self.expr.missing().to_es_script(schema) + else: + return self.expr.missing().partial_eval().to_es_script(schema) diff --git a/vendor/jx_elasticsearch/es52/painless/mod_op.py b/vendor/jx_elasticsearch/es52/painless/mod_op.py new file mode 100644 index 0000000..68f94a2 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/mod_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ModOp as ModOp_ +from jx_elasticsearch.es52.painless._utils import _binary_to_es_script + + +class ModOp(ModOp_): + to_es_script = _binary_to_es_script diff --git a/vendor/jx_elasticsearch/es52/painless/mul_op.py b/vendor/jx_elasticsearch/es52/painless/mul_op.py new file mode 100644 index 0000000..446623b --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/mul_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import MulOp as MulOp_ +from jx_elasticsearch.es52.painless._utils import _multi_to_es_script + + +class MulOp(MulOp_): + to_es_script = _multi_to_es_script diff --git a/vendor/jx_elasticsearch/es52/painless/ne_op.py b/vendor/jx_elasticsearch/es52/painless/ne_op.py new file mode 100644 index 0000000..c3e0d37 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/ne_op.py @@ -0,0 +1,31 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import NeOp as NeOp_ +from jx_elasticsearch.es52.painless.basic_eq_op import BasicEqOp +from jx_elasticsearch.es52.painless.case_op import CaseOp +from jx_elasticsearch.es52.painless.not_op import NotOp +from jx_elasticsearch.es52.painless.when_op import WhenOp + + +class NeOp(NeOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + return ( + CaseOp( + [ + WhenOp(self.lhs.missing(), **{"then": NotOp(self.rhs.missing())}), + WhenOp(self.rhs.missing(), **{"then": NotOp(self.lhs.missing())}), + NotOp(BasicEqOp([self.lhs, self.rhs])), + ] + ) + .partial_eval() + .to_es_script(schema) + ) diff --git a/vendor/jx_elasticsearch/es52/painless/not_left_op.py b/vendor/jx_elasticsearch/es52/painless/not_left_op.py new file mode 100644 index 0000000..374769c --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/not_left_op.py @@ -0,0 +1,40 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import NotLeftOp as NotLeftOp_ +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.number_op import NumberOp +from jx_elasticsearch.es52.painless.or_op import OrOp +from jx_elasticsearch.es52.painless.string_op import StringOp +from mo_json import STRING + + +class NotLeftOp(NotLeftOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + v = StringOp(self.value).partial_eval().to_es_script(schema).expr + l = NumberOp(self.length).partial_eval().to_es_script(schema).expr + + expr = ( + "(" + + v + + ").substring((int)Math.max(0, (int)Math.min(" + + v + + ".length(), " + + l + + ")))" + ) + return EsScript( + miss=OrOp([self.value.missing(), self.length.missing()]), + type=STRING, + expr=expr, + frum=self, + schema=schema, + ) diff --git a/vendor/jx_elasticsearch/es52/painless/not_op.py b/vendor/jx_elasticsearch/es52/painless/not_op.py new file mode 100644 index 0000000..2290a0c --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/not_op.py @@ -0,0 +1,39 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import NotOp as NotOp_ +from jx_elasticsearch.es52.painless._utils import Painless +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.null_op import null_script +from jx_elasticsearch.es52.painless.false_op import false_script +from jx_elasticsearch.es52.painless.true_op import true_script +from mo_json import BOOLEAN + + +class NotOp(NotOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + value = Painless[self.term].partial_eval().to_es_script(schema) + + if value is false_script: + return true_script + elif value is true_script: + return false_script + elif value is null_script: + return null_script + + return EsScript( + type=BOOLEAN, + expr="!(" + + value.expr + + ")", + frum=self, + schema=schema, + ) diff --git a/vendor/jx_elasticsearch/es52/painless/null_op.py b/vendor/jx_elasticsearch/es52/painless/null_op.py new file mode 100644 index 0000000..74030e3 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/null_op.py @@ -0,0 +1,25 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import extend, NullOp, TRUE, NULL +from jx_elasticsearch.es52.painless.es_script import EsScript +from mo_dots import Null +from mo_json import IS_NULL + + +@extend(NullOp) +def to_es_script(self, schema, not_null=False, boolean=False, many=True): + return null_script + + +null_script = EsScript(type=IS_NULL, expr="null", frum=NULL, miss=TRUE, schema=Null) + diff --git a/vendor/jx_elasticsearch/es52/painless/number_op.py b/vendor/jx_elasticsearch/es52/painless/number_op.py new file mode 100644 index 0000000..7858245 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/number_op.py @@ -0,0 +1,93 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import CoalesceOp as CoalesceOp_, NumberOp as NumberOp_ +from jx_base.language import is_op +from jx_elasticsearch.es52.painless import _utils +from jx_elasticsearch.es52.painless.literal import Literal +from jx_elasticsearch.es52.painless.null_op import null_script +from jx_elasticsearch.es52.painless.false_op import false_script +from jx_elasticsearch.es52.painless.true_op import true_script +from jx_elasticsearch.es52.painless.coalesce_op import CoalesceOp +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.first_op import FirstOp +from mo_json import BOOLEAN, INTEGER, NUMBER, OBJECT, STRING + + +class NumberOp(NumberOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + term = FirstOp(self.term).partial_eval() + + value = term.to_es_script(schema) + + if is_op(value.frum, CoalesceOp_): + return CoalesceOp( + [ + NumberOp(t).partial_eval().to_es_script(schema) + for t in value.frum.terms + ] + ).to_es_script(schema) + + if value is null_script: + return Literal(0).to_es_script(schema) + if value is false_script: + return Literal(0).to_es_script(schema) + if value is true_script: + return Literal(1).to_es_script(schema) + elif value.type == BOOLEAN: + return EsScript( + miss=term.missing().partial_eval(), + type=NUMBER, + expr="(" + value.expr + ") ? 1 : 0", + frum=self, + schema=schema, + ) + elif value.type == INTEGER: + return EsScript( + miss=term.missing().partial_eval(), + type=NUMBER, + expr=value.expr, + frum=self, + schema=schema, + ) + elif value.type == NUMBER: + return EsScript( + miss=term.missing().partial_eval(), + type=NUMBER, + expr=value.expr, + frum=self, + schema=schema, + ) + elif value.type == STRING: + return EsScript( + miss=term.missing().partial_eval(), + type=NUMBER, + expr="Double.parseDouble(" + value.expr + ")", + frum=self, + schema=schema, + ) + elif value.type == OBJECT: + return EsScript( + miss=term.missing().partial_eval(), + type=NUMBER, + expr="((" + + value.expr + + ") instanceof String) ? Double.parseDouble(" + + value.expr + + ") : (" + + value.expr + + ")", + frum=self, + schema=schema, + ) + + +_utils.NumberOp=NumberOp diff --git a/vendor/jx_elasticsearch/es52/painless/or_op.py b/vendor/jx_elasticsearch/es52/painless/or_op.py new file mode 100644 index 0000000..645304c --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/or_op.py @@ -0,0 +1,33 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import OrOp as OrOp_ +from jx_elasticsearch.es52.painless import _utils +from jx_elasticsearch.es52.painless._utils import Painless +from jx_elasticsearch.es52.painless.es_script import EsScript +from mo_json import BOOLEAN + + +class OrOp(OrOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + return EsScript( + type=BOOLEAN, + expr=" || ".join( + "(" + Painless[t].to_es_script(schema).expr + ")" + for t in self.terms + if t + ), + frum=self, + schema=schema, + ) + + +_utils.OrOp=OrOp diff --git a/vendor/jx_elasticsearch/es52/painless/prefix_op.py b/vendor/jx_elasticsearch/es52/painless/prefix_op.py new file mode 100644 index 0000000..e62d523 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/prefix_op.py @@ -0,0 +1,32 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import PrefixOp as PrefixOp_ +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.true_op import true_script +from mo_json import BOOLEAN + + +class PrefixOp(PrefixOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + if not self.expr: + return true_script + else: + return EsScript( + type=BOOLEAN, + expr="(" + + self.expr.to_es_script(schema).script(schema) + + ").startsWith(" + + self.prefix.to_es_script(schema).script(schema) + + ")", + frum=self, + schema=schema, + ) diff --git a/vendor/jx_elasticsearch/es52/painless/string_op.py b/vendor/jx_elasticsearch/es52/painless/string_op.py new file mode 100644 index 0000000..c1f3e41 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/string_op.py @@ -0,0 +1,70 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import CoalesceOp as CoalesceOp_, StringOp as StringOp_, TRUE +from jx_base.language import is_op +from jx_elasticsearch.es52.painless._utils import empty_string_script, NUMBER_TO_STRING +from jx_elasticsearch.es52.painless.coalesce_op import CoalesceOp +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.first_op import FirstOp +from mo_json import BOOLEAN, INTEGER, NUMBER, STRING +from mo_logs.strings import expand_template + + +class StringOp(StringOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + term = FirstOp(self.term).partial_eval() + value = term.to_es_script(schema) + + if is_op(value.frum, CoalesceOp_): + return CoalesceOp( + [StringOp(t).partial_eval() for t in value.frum.terms] + ).to_es_script(schema) + + if value.miss is TRUE: + return empty_string_script + elif value.type == BOOLEAN: + return EsScript( + miss=self.term.missing().partial_eval(), + type=STRING, + expr=value.expr + ' ? "T" : "F"', + frum=self, + schema=schema, + ) + elif value.type == INTEGER: + return EsScript( + miss=self.term.missing().partial_eval(), + type=STRING, + expr="String.valueOf(" + value.expr + ")", + frum=self, + schema=schema, + ) + elif value.type == NUMBER: + return EsScript( + miss=self.term.missing().partial_eval(), + type=STRING, + expr=expand_template(NUMBER_TO_STRING, {"expr": value.expr}), + frum=self, + schema=schema, + ) + elif value.type == STRING: + return value + else: + return EsScript( + miss=self.term.missing().partial_eval(), + type=STRING, + expr=expand_template(NUMBER_TO_STRING, {"expr": value.expr}), + frum=self, + schema=schema, + ) + + # ((Runnable)(() -> {int a=2; int b=3; System.out.println(a+b);})).run(); + # "((Runnable)((value) -> {String output=String.valueOf(value); if (output.endsWith('.0')) {return output.substring(0, output.length-2);} else return output;})).run(" + value.expr + ")" diff --git a/vendor/jx_elasticsearch/es52/painless/sub_op.py b/vendor/jx_elasticsearch/es52/painless/sub_op.py new file mode 100644 index 0000000..4308858 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/sub_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import SubOp as SubOp_ +from jx_elasticsearch.es52.painless._utils import _binary_to_es_script + + +class SubOp(SubOp_): + to_es_script = _binary_to_es_script diff --git a/vendor/jx_elasticsearch/es52/painless/suffix_op.py b/vendor/jx_elasticsearch/es52/painless/suffix_op.py new file mode 100644 index 0000000..430dac7 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/suffix_op.py @@ -0,0 +1,35 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import SuffixOp as SuffixOp_ +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.missing_op import MissingOp +from jx_elasticsearch.es52.painless.or_op import OrOp +from jx_elasticsearch.es52.painless.true_op import true_script + + +class SuffixOp(SuffixOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + if not self.suffix: + return true_script + else: + return EsScript( + miss=OrOp( + [MissingOp(self.expr), MissingOp(self.suffix)] + ).partial_eval(), + expr="(" + + self.expr.to_es_script(schema) + + ").endsWith(" + + self.suffix.to_es_script(schema) + + ")", + frum=self, + schema=schema, + ) diff --git a/vendor/jx_elasticsearch/es52/painless/true_op.py b/vendor/jx_elasticsearch/es52/painless/true_op.py new file mode 100644 index 0000000..c6c64b5 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/true_op.py @@ -0,0 +1,25 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import TrueOp, extend, TRUE +from jx_elasticsearch.es52.painless.es_script import EsScript +from mo_dots import Null +from mo_json import BOOLEAN + + +@extend(TrueOp) +def to_es_script(self, schema, not_null=False, boolean=False, many=True): + return true_script + + +true_script = EsScript(type=BOOLEAN, expr="true", frum=TRUE, schema=Null) + diff --git a/vendor/jx_elasticsearch/es52/painless/tuple_op.py b/vendor/jx_elasticsearch/es52/painless/tuple_op.py new file mode 100644 index 0000000..c765777 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/tuple_op.py @@ -0,0 +1,28 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import FALSE, TupleOp as TupleOp_ +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.first_op import FirstOp +from mo_future import text +from mo_json import OBJECT + + +class TupleOp(TupleOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + expr = ( + "new Object[]{" + + ",".join( + text(FirstOp(t).partial_eval().to_es_script(schema)) for t in self.terms + ) + + "}" + ) + return EsScript(type=OBJECT, expr=expr, many=FALSE, frum=self, schema=schema) diff --git a/vendor/jx_elasticsearch/es52/painless/union_op.py b/vendor/jx_elasticsearch/es52/painless/union_op.py new file mode 100644 index 0000000..bb72c2b --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/union_op.py @@ -0,0 +1,35 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import UnionOp as UnionOp_, merge_types +from jx_elasticsearch.es52.painless._utils import Painless +from jx_elasticsearch.es52.painless.es_script import EsScript + + +class UnionOp(UnionOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + code = """ + HashSet output = new HashSet(); + {{LOOPS}} + return output.toArray(); + """ + parts = [ + Painless[t].partial_eval().to_es_script(schema, many=True) + for t in self.terms + ] + loops = ["for (v in " + p.expr + ") output.add(v);" for p in parts] + return EsScript( + type=merge_types(p.type for p in parts), + expr=code.replace("{{LOOPS}}", "\n".join(loops)), + many=True, + frum=self, + schema=schema, + ) diff --git a/vendor/jx_elasticsearch/es52/painless/variable.py b/vendor/jx_elasticsearch/es52/painless/variable.py new file mode 100644 index 0000000..aba0479 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/variable.py @@ -0,0 +1,73 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import NULL, Variable as Variable_ +from jx_elasticsearch.es52.painless import first_op +from jx_elasticsearch.es52.painless.coalesce_op import CoalesceOp +from jx_elasticsearch.es52.painless.es_script import EsScript +from mo_json import BOOLEAN, OBJECT, STRING +from mo_logs.strings import quote + + +class Variable(Variable_): + def __init__(self, var): + Variable_.__init__(self, var) + + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + if self.var == ".": + return EsScript(type=OBJECT, expr="_source", frum=self) + else: + if self.var == "_id": + return EsScript( + type=STRING, + expr='doc["_uid"].value.substring(doc["_uid"].value.indexOf(\'#\')+1)', + frum=self, + schema=schema, + ) + + columns = schema.values(self.var) + acc = [] + for c in columns: + varname = c.es_column + frum = Variable(c.es_column) + q = quote(varname) + if c.multi > 1: + acc.append( + EsScript( + miss=frum.missing(), + type=c.jx_type, + expr="doc[" + q + "].values", + frum=frum, + schema=schema, + many=True + ) + ) + else: + acc.append( + EsScript( + miss=frum.missing(), + type=c.jx_type, + expr="doc[" + q + "].value", + frum=frum, + schema=schema, + many=False + ) + ) + + if len(acc) == 0: + return NULL.to_es_script(schema) + elif len(acc) == 1: + return acc[0] + else: + return CoalesceOp(acc).to_es_script(schema) + + +first_op.Variable=Variable diff --git a/vendor/jx_elasticsearch/es52/painless/when_op.py b/vendor/jx_elasticsearch/es52/painless/when_op.py new file mode 100644 index 0000000..a61d21f --- /dev/null +++ b/vendor/jx_elasticsearch/es52/painless/when_op.py @@ -0,0 +1,84 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import WhenOp as WhenOp_, FALSE, TRUE +from jx_elasticsearch.es52.painless import _utils +from jx_elasticsearch.es52.painless._utils import Painless +from jx_elasticsearch.es52.painless.es_script import EsScript +from jx_elasticsearch.es52.painless.false_op import false_script +from jx_elasticsearch.es52.painless.true_op import true_script +from mo_json import INTEGER, NUMBER, NUMBER_TYPES +from mo_logs import Log + + +class WhenOp(WhenOp_): + def to_es_script(self, schema, not_null=False, boolean=False, many=True): + if self.simplified: + when = Painless[self.when].to_es_script(schema) + then = Painless[self.then].to_es_script(schema) + els_ = Painless[self.els_].to_es_script(schema) + + if when is true_script: + return then + elif when is false_script: + return els_ + elif then.miss is TRUE: + return EsScript( + miss=self.missing(), + type=els_.type, + expr=els_.expr, + frum=self, + schema=schema, + ) + elif els_.miss is TRUE: + return EsScript( + miss=self.missing(), + type=then.type, + expr=then.expr, + frum=self, + schema=schema, + ) + + elif then.miss is TRUE or els_.miss is FALSE or then.type == els_.type: + return EsScript( + miss=self.missing(), + type=then.type if els_.miss is TRUE else els_.type, + expr="(" + + when.expr + + ") ? (" + + then.expr + + ") : (" + + els_.expr + + ")", + frum=self, + schema=schema, + ) + elif then.type in NUMBER_TYPES and els_.type in NUMBER_TYPES: + return EsScript( + miss=self.missing(), + type=NUMBER, + expr="(" + + when.expr + + ") ? (" + + then.expr + + ") : (" + + els_.expr + + ")", + frum=self, + schema=schema, + ) + else: + Log.error("do not know how to handle: {{self}}", self=self.__data__()) + else: + return self.partial_eval().to_es_script(schema) + + +_utils.WhenOp=WhenOp diff --git a/vendor/jx_elasticsearch/es52/set_bulk.py b/vendor/jx_elasticsearch/es52/set_bulk.py new file mode 100644 index 0000000..32ce808 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/set_bulk.py @@ -0,0 +1,244 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_elasticsearch.es52 import agg_bulk +from jx_elasticsearch.es52.agg_bulk import write_status, upload, URL_PREFIX +from jx_elasticsearch.es52.expressions import split_expression_by_path, ES52 +from jx_elasticsearch.es52.set_format import doc_formatter, row_formatter, format_table_header +from jx_elasticsearch.es52.set_op import get_selects, es_query_proto +from jx_elasticsearch.es52.util import jx_sort_to_es_sort +from mo_dots import wrap, Null +from mo_files import TempFile +from mo_json import value2json +from mo_logs import Log, Except +from mo_math import MIN +from mo_math.randoms import Random +from mo_threads import Thread +from mo_times import Date, Timer + +DEBUG = True +MAX_CHUNK_SIZE = 2000 +MAX_DOCUMENTS = 10 * 1000 * 1000 + + +def is_bulk_set(esq, query): + # ONLY ACCEPTING ONE DIMENSION AT THIS TIME + if not agg_bulk.S3_CONFIG: + return False + if query.destination not in {"s3", "url"}: + return False + if query.format not in {"list", "table"}: + return False + if query.groupby or query.edges: + return False + return True + + +def es_bulksetop(esq, frum, query): + abs_limit = MIN([query.limit, MAX_DOCUMENTS]) + guid = Random.base64(32, extra="-_") + + schema = query.frum.schema + query_path = schema.query_path[0] + new_select, split_select = get_selects(query) + split_wheres = split_expression_by_path(query.where, schema, lang=ES52) + es_query = es_query_proto(query_path, split_select, split_wheres, schema) + es_query.size = MIN([query.chunk_size, MAX_CHUNK_SIZE]) + es_query.sort = jx_sort_to_es_sort(query.sort, schema) + if not es_query.sort: + es_query.sort = ["_doc"] + + formatter = formatters[query.format](abs_limit, new_select, query) + + Thread.run( + "Download " + guid, + extractor, + guid, + abs_limit, + esq, + es_query, + formatter, + parent_thread=Null, + ).release() + + output = wrap( + { + "url": URL_PREFIX / (guid + ".json"), + "status": URL_PREFIX / (guid + ".status.json"), + "meta": {"format": query.format, "es_query": es_query, "limit": abs_limit}, + } + ) + return output + + +def extractor(guid, abs_limit, esq, es_query, formatter, please_stop): + start_time = Date.now() + total = 0 + write_status( + guid, + { + "status": "starting", + "limit": abs_limit, + "start_time": start_time, + "timestamp": Date.now(), + }, + ) + + try: + with TempFile() as temp_file: + with open(temp_file.abspath, "wb") as output: + result = esq.es.search(es_query, scroll="5m") + + while not please_stop: + scroll_id = result._scroll_id + hits = result.hits.hits + chunk_limit = abs_limit - total + hits = hits[:chunk_limit] + if len(hits) == 0: + break + formatter.add(hits) + for b in formatter.bytes(): + if b is DONE: + break + output.write(b) + else: + total += len(hits) + DEBUG and Log.note( + "{{num}} of {{total}} downloaded", + num=total, + total=result.hits.total, + ) + write_status( + guid, + { + "status": "working", + "row": total, + "rows": result.hits.total, + "start_time": start_time, + "timestamp": Date.now(), + }, + ) + with Timer("get more", verbose=DEBUG): + result = esq.es.scroll(scroll_id) + continue + break + if please_stop: + Log.error("Bulk download stopped for shutdown") + for b in formatter.footer(): + output.write(b) + + write_status( + guid, + { + "status": "uploading to s3", + "rows": total, + "start_time": start_time, + "timestamp": Date.now(), + }, + ) + upload(guid + ".json", temp_file) + if please_stop: + Log.error("shutdown requested, did not complete download") + DEBUG and Log.note("Done. {{total}} uploaded", total=total) + write_status( + guid, + { + "ok": True, + "status": "done", + "rows": total, + "start_time": start_time, + "end_time": Date.now(), + "timestamp": Date.now(), + }, + ) + except Exception as e: + e = Except.wrap(e) + write_status( + guid, + { + "ok": False, + "status": "error", + "error": e, + "start_time": start_time, + "end_time": Date.now(), + "timestamp": Date.now(), + }, + ) + Log.warning("Could not extract", cause=e) + + +class ListFormatter(object): + def __init__(self, abs_limit, select, query): + self.header = b"{\"meta\":{\"format\":\"list\"},\"data\":[\n" + self.count = 0 + self.abs_limit = abs_limit + self.formatter = doc_formatter(select, query) + self.rows = None + + def add(self, rows): + self.rows = rows + + def bytes(self): + yield self.header + self.header = b",\n" + + comma = b"" + for r in self.rows: + yield comma + comma = b",\n" + yield value2json(self.formatter(r)).encode('utf8') + self.count += 1 + if self.count >= self.abs_limit: + yield DONE + + def footer(self): + yield b"\n]}" + + +DONE = object() + + +class TableFormatter(object): + def __init__(self, abs_limit, select, query): + self.count = 0 + self.abs_limit = abs_limit + self.formatter = row_formatter(select) + self.rows = None + self.pre = ( + b"{\"meta\":{\"format\":\"table\"},\"header\":" + + value2json(format_table_header(select, query)).encode('utf8') + + b",\n\"data\":[\n" + ) + + def add(self, rows): + self.rows = rows + + def bytes(self): + yield self.pre + self.pre = b",\n" + + comma = b"" + for r in self.rows: + yield comma + comma = b",\n" + yield value2json(self.formatter(r)).encode('utf8') + self.count += 1 + if self.count >= self.abs_limit: + yield DONE + + def footer(self): + yield b"\n]}" + + +formatters = { + "list": ListFormatter, + "table": TableFormatter +} diff --git a/vendor/jx_elasticsearch/es52/set_format.py b/vendor/jx_elasticsearch/es52/set_format.py new file mode 100644 index 0000000..aceaee2 --- /dev/null +++ b/vendor/jx_elasticsearch/es52/set_format.py @@ -0,0 +1,168 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import LeavesOp +from jx_base.language import is_op +from jx_python.containers.cube import Cube +from mo_collections.matrix import Matrix +from mo_dots import Data, is_data, is_list, unwrap, unwraplist, wrap, listwrap +from mo_files import mimetype +from mo_future import transpose +from mo_logs import Log +from mo_math import MAX +from mo_times.timer import Timer + + +def doc_formatter(select, query=None): + # RETURN A FUNCTION THAT RETURNS A FORMATTED ROW + + if is_list(query.select): + def format_object(doc): + r = Data() + for s in select: + v = unwraplist(s.pull(doc)) + if v is not None: + try: + r[s.put.name][s.put.child] = v + except Exception as e: + Log.error("what's happening here?", cause=e) + return r if r else None + return format_object + + if is_op(query.select.value, LeavesOp): + def format_deep(doc): + r = Data() + for s in select: + r[s.put.name][s.put.child] = unwraplist(s.pull(doc)) + return r if r else None + return format_deep + else: + def format_value(doc): + r = None + for s in select: + v = unwraplist(s.pull(doc)) + if v is None: + continue + if s.put.child == ".": + r = v + else: + if r is None: + r = Data() + r[s.put.child] = v + + return r + return format_value + + +def format_list(documents, select, query=None): + f = doc_formatter(select, query) + data = [f(row) for row in documents] + + return Data(meta={"format": "list"}, data=data) + + +def row_formatter(select): + # RETURN A FUNCTION THAT RETURNS A FORMATTED ROW + + select = listwrap(select) + num_columns = MAX(select.put.index) + 1 + + def format_row(doc): + row = [None] * num_columns + for s in select: + value = unwraplist(s.pull(doc)) + + if value == None: + continue + + index, child = s.put.index, s.put.child + if child == ".": + row[index] = value + else: + if row[index] is None: + row[index] = Data() + row[index][child] = value + return row + + return format_row + + +def format_table(T, select, query=None): + form = row_formatter(select) + + data = [form(row) for row in T] + header = format_table_header(select, query) + + return Data(meta={"format": "table"}, header=header, data=data) + + +def format_table_header(select, query): + num_columns = MAX(select.put.index) + 1 + header = [None] * num_columns + + if is_data(query.select) and not is_op(query.select.value, LeavesOp): + for s in select: + header[s.put.index] = s.name + else: + for s in select: + if header[s.put.index]: + continue + header[s.put.index] = s.name + + return header + + +def format_cube(T, select, query=None): + with Timer("format table"): + table = format_table(T, select, query) + + if len(table.data) == 0: + return Cube( + scrub_select(select), + edges=[ + { + "name": "rownum", + "domain": {"type": "rownum", "min": 0, "max": 0, "interval": 1}, + } + ], + data={h: Matrix(list=[]) for i, h in enumerate(table.header)}, + ) + + cols = transpose(*unwrap(table.data)) + return Cube( + scrub_select(select), + edges=[ + { + "name": "rownum", + "domain": { + "type": "rownum", + "min": 0, + "max": len(table.data), + "interval": 1, + }, + } + ], + data={h: Matrix(list=cols[i]) for i, h in enumerate(table.header)}, + ) + + +def scrub_select(select): + return wrap( + [{k: v for k, v in s.items() if k not in ["pull", "put"]} for s in select] + ) + +set_formatters = { + # RESPONSE FORMATTER, SETUP_ROW_FORMATTER, DATATYPE + None: (format_cube, None, mimetype.JSON), + "cube": (format_cube, None, mimetype.JSON), + "table": (format_table, row_formatter, mimetype.JSON), + "list": (format_list, doc_formatter, mimetype.JSON), +} diff --git a/vendor/jx_elasticsearch/es52/set_op.py b/vendor/jx_elasticsearch/es52/set_op.py new file mode 100644 index 0000000..ece786a --- /dev/null +++ b/vendor/jx_elasticsearch/es52/set_op.py @@ -0,0 +1,458 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.domains import ALGEBRAIC +from jx_base.expressions import LeavesOp, Variable, IDENTITY +from jx_base.language import is_op +from jx_base.query import DEFAULT_LIMIT +from jx_elasticsearch.es52.expressions import ( + AndOp, + ES52, + split_expression_by_path, + MATCH_ALL, + es_and, + es_or, +) +from jx_elasticsearch.es52.painless import Painless +from jx_elasticsearch.es52.set_format import set_formatters +from jx_elasticsearch.es52.util import jx_sort_to_es_sort +from jx_python.expressions import jx_expression_to_function +from mo_dots import ( + Data, + FlatList, + coalesce, + concat_field, + join_field, + listwrap, + literal_field, + relative_field, + set_default, + split_field, + unwrap, + unwraplist, + wrap, +) +from mo_future import first, text +from mo_json import NESTED, STRUCT +from mo_json.typed_encoder import decode_property, unnest_path, untype_path, untyped +from mo_logs import Log +from mo_math import AND +from mo_times.timer import Timer + +DEBUG = False + + +def is_setop(es, query): + select = listwrap(query.select) + + if not query.edges: + isDeep = ( + len(split_field(query.frum.name)) > 1 + ) # LOOKING INTO NESTED WILL REQUIRE A SCRIPT + simpleAgg = AND( + [s.aggregate in ("count", "none") for s in select] + ) # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT + + # NO EDGES IMPLIES SIMPLER QUERIES: EITHER A SET OPERATION, OR RETURN SINGLE AGGREGATE + if simpleAgg or isDeep: + return True + else: + isSmooth = AND( + (e.domain.type in ALGEBRAIC and e.domain.interval == "none") + for e in query.edges + ) + if isSmooth: + return True + + return False + + +def get_selects(query): + schema = query.frum.schema + split_select = {".": ESSelect(".")} + + def get_select(path): + es_select = split_select.get(path) + if not es_select: + es_select = split_select[path] = ESSelect(path) + return es_select + + selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)]) + new_select = FlatList() + put_index = 0 + for select in selects: + # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS + if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable): + term = select.value.term + leaves = schema.leaves(term.var) + for c in leaves: + full_name = concat_field( + select.name, relative_field(untype_path(c.name), term.var) + ) + if c.jx_type == NESTED: + get_select(".").set_op = True + new_select.append( + { + "name": full_name, + "value": Variable(c.es_column), + "put": { + "name": literal_field(full_name), + "index": put_index, + "child": ".", + }, + "pull": get_pull_source(c.es_column), + } + ) + put_index += 1 + else: + get_select(c.nested_path[0]).fields.append(c.es_column) + new_select.append( + { + "name": full_name, + "value": Variable(c.es_column), + "put": { + "name": literal_field(full_name), + "index": put_index, + "child": ".", + }, + } + ) + put_index += 1 + elif is_op(select.value, Variable): + s_column = select.value.var + + if s_column == ".": + # PULL ALL SOURCE + get_select(".").set_op = True + new_select.append( + { + "name": select.name, + "value": select.value, + "put": {"name": select.name, "index": put_index, "child": "."}, + "pull": get_pull_source("."), + } + ) + continue + + leaves = schema.leaves(s_column) # LEAVES OF OBJECT + # nested_selects = {} + if leaves: + if any(c.jx_type == NESTED for c in leaves): + # PULL WHOLE NESTED ARRAYS + get_select(".").set_op = True + for c in leaves: + if ( + len(c.nested_path) == 1 + ): # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRST LEVEL PROPERTIES + pre_child = join_field( + decode_property(n) for n in split_field(c.name) + ) + new_select.append( + { + "name": select.name, + "value": Variable(c.es_column), + "put": { + "name": select.name, + "index": put_index, + "child": untype_path( + relative_field(pre_child, s_column) + ), + }, + "pull": get_pull_source(c.es_column), + } + ) + else: + # PULL ONLY WHAT'S NEEDED + for c in leaves: + c_nested_path = c.nested_path[0] + if c_nested_path == ".": + if c.es_column == "_id": + new_select.append( + { + "name": select.name, + "value": Variable(c.es_column), + "put": { + "name": select.name, + "index": put_index, + "child": ".", + }, + "pull": lambda row: row._id, + } + ) + elif c.jx_type == NESTED: + get_select(".").set_op = True + pre_child = join_field( + decode_property(n) for n in split_field(c.name) + ) + new_select.append( + { + "name": select.name, + "value": Variable(c.es_column), + "put": { + "name": select.name, + "index": put_index, + "child": untype_path( + relative_field(pre_child, s_column) + ), + }, + "pull": get_pull_source(c.es_column), + } + ) + else: + get_select(c_nested_path).fields.append(c.es_column) + pre_child = join_field( + decode_property(n) for n in split_field(c.name) + ) + new_select.append( + { + "name": select.name, + "value": Variable(c.es_column), + "put": { + "name": select.name, + "index": put_index, + "child": untype_path( + relative_field(pre_child, s_column) + ), + }, + } + ) + else: + es_select = get_select(c_nested_path) + es_select.fields.append(c.es_column) + + child = relative_field( + untype_path( + relative_field(c.name, schema.query_path[0]) + ), + s_column, + ) + pull = accumulate_nested_doc( + c_nested_path, + Variable( + relative_field(s_column, unnest_path(c_nested_path)) + ), + ) + new_select.append( + { + "name": select.name, + "value": select.value, + "put": { + "name": select.name, + "index": put_index, + "child": child, + }, + "pull": pull, + } + ) + else: + new_select.append( + { + "name": select.name, + "value": Variable("$dummy"), + "put": {"name": select.name, "index": put_index, "child": "."}, + } + ) + put_index += 1 + else: + split_scripts = split_expression_by_path( + select.value, schema, lang=Painless + ) + for p, script in split_scripts.items(): + es_select = get_select(p) + es_select.scripts[select.name] = { + "script": text( + Painless[first(script)].partial_eval().to_es_script(schema) + ) + } + new_select.append( + { + "name": select.name, + "pull": jx_expression_to_function( + "fields." + literal_field(select.name) + ), + "put": {"name": select.name, "index": put_index, "child": "."}, + } + ) + put_index += 1 + for n in new_select: + if n.pull: + continue + elif is_op(n.value, Variable): + if get_select(".").set_op: + n.pull = get_pull_source(n.value.var) + elif n.value == "_id": + n.pull = jx_expression_to_function("_id") + else: + n.pull = jx_expression_to_function( + concat_field("fields", literal_field(n.value.var)) + ) + else: + Log.error("Do not know what to do") + return new_select, split_select + + +def es_setop(es, query): + schema = query.frum.schema + query_path = schema.query_path[0] + + new_select, split_select = get_selects(query) + + split_wheres = split_expression_by_path(query.where, schema, lang=ES52) + es_query = es_query_proto(query_path, split_select, split_wheres, schema) + es_query.size = coalesce(query.limit, DEFAULT_LIMIT) + es_query.sort = jx_sort_to_es_sort(query.sort, schema) + + with Timer("call to ES", silent=DEBUG) as call_timer: + result = es.search(es_query) + + # Log.note("{{result}}", result=result) + + T = result.hits.hits + + try: + formatter, _, mime_type = set_formatters[query.format] + + with Timer("formatter", silent=True): + output = formatter(T, new_select, query) + output.meta.timing.es = call_timer.duration + output.meta.content_type = mime_type + output.meta.es_query = es_query + return output + except Exception as e: + Log.error("problem formatting", e) + + +def accumulate_nested_doc(nested_path, expr=IDENTITY): + """ + :param nested_path: THE PATH USED TO EXTRACT THE NESTED RECORDS + :param expr: FUNCTION USED ON THE NESTED OBJECT TO GET SPECIFIC VALUE + :return: THE DE_TYPED NESTED OBJECT ARRAY + """ + name = literal_field(nested_path) + + def output(doc): + acc = [] + for h in doc.inner_hits[name].hits.hits: + i = h._nested.offset + obj = Data() + for f, v in h.fields.items(): + local_path = untype_path(relative_field(f, nested_path)) + obj[local_path] = unwraplist(v) + # EXTEND THE LIST TO THE LENGTH WE REQUIRE + for _ in range(len(acc), i + 1): + acc.append(None) + acc[i] = expr(obj) + return acc + + return output + + +def get_pull(column): + if column.nested_path[0] == ".": + return concat_field("fields", literal_field(column.es_column)) + else: + rel_name = relative_field(column.es_column, column.nested_path[0]) + return concat_field("_inner", rel_name) + + +def get_pull_function(column): + func = jx_expression_to_function(get_pull(column)) + if column.jx_type in STRUCT: + return lambda doc: untyped(func(doc)) + else: + return func + +def get_pull_source(es_column): + def output(row): + return untyped(row._source[es_column]) + + return output + + +def get_pull_stats(): + return jx_expression_to_function( + { + "select": [ + {"name": "count", "value": "count"}, + {"name": "sum", "value": "sum"}, + {"name": "min", "value": "min"}, + {"name": "max", "value": "max"}, + {"name": "avg", "value": "avg"}, + {"name": "sos", "value": "sum_of_squares"}, + {"name": "std", "value": "std_deviation"}, + {"name": "var", "value": "variance"}, + ] + } + ) + + +class ESSelect(object): + """ + ACCUMULATE THE FIELDS WE ARE INTERESTED IN + """ + + def __init__(self, path): + self.path = path + self.set_op = False + self.fields = [] + self.scripts = {} + + def to_es(self): + return { + "_source": self.set_op, + "stored_fields": self.fields if not self.set_op else None, + "script_fields": self.scripts if self.scripts else None, + } + + +def es_query_proto(path, selects, wheres, schema): + """ + RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE + :param path: THE NESTED PATH (NOT INCLUDING TABLE NAME) + :param wheres: MAP FROM path TO LIST OF WHERE CONDITIONS + :return: (es_query, filters_map) TUPLE + """ + output = None + last_where = MATCH_ALL + for p in reversed(sorted(set(wheres.keys()) | set(selects.keys()))): + where = wheres.get(p) + select = selects.get(p) + + if where: + where = AndOp(where).partial_eval().to_esfilter(schema) + if output: + where = es_or([es_and([output, where]), where]) + else: + if output: + if last_where is MATCH_ALL: + where = es_or([output, MATCH_ALL]) + else: + where = output + else: + where = MATCH_ALL + + if p == ".": + output = set_default( + {"from": 0, "size": 0, "sort": [], "query": where}, select.to_es() + ) + else: + output = { + "nested": { + "path": p, + "inner_hits": set_default({"size": 100000}, select.to_es()) + if select + else None, + "query": where, + } + } + + last_where = where + return output diff --git a/vendor/jx_elasticsearch/es52/setop.py b/vendor/jx_elasticsearch/es52/setop.py deleted file mode 100644 index 413ea65..0000000 --- a/vendor/jx_elasticsearch/es52/setop.py +++ /dev/null @@ -1,390 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from collections import Mapping - -from jx_base.domains import ALGEBRAIC -from jx_base.expressions import IDENTITY -from jx_base.query import DEFAULT_LIMIT -from jx_elasticsearch import post as es_post -from jx_elasticsearch.es52.expressions import Variable, LeavesOp -from jx_elasticsearch.es52.util import jx_sort_to_es_sort, es_query_template, es_and, es_or, es_script -from jx_python.containers.cube import Cube -from jx_python.expressions import jx_expression_to_function -from mo_collections.matrix import Matrix -from mo_dots import coalesce, split_field, set_default, Data, unwraplist, literal_field, unwrap, wrap, concat_field, relative_field, join_field, listwrap -from mo_dots.lists import FlatList -from mo_future import transpose -from mo_json.typed_encoder import NESTED -from mo_json.typed_encoder import untype_path, unnest_path, untyped -from mo_logs import Log -from mo_math import AND, MAX -from mo_times.timer import Timer - -format_dispatch = {} - - -def is_setop(es, query): - select = listwrap(query.select) - - if not query.edges: - isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT - simpleAgg = AND([s.aggregate in ("count", "none") for s in select]) # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT - - # NO EDGES IMPLIES SIMPLER QUERIES: EITHER A SET OPERATION, OR RETURN SINGLE AGGREGATE - if simpleAgg or isDeep: - return True - else: - isSmooth = AND((e.domain.type in ALGEBRAIC and e.domain.interval == "none") for e in query.edges) - if isSmooth: - return True - - return False - - -def es_setop(es, query): - schema = query.frum.schema - - es_query, filters = es_query_template(schema.query_path[0]) - nested_filter = None - set_default(filters[0], query.where.partial_eval().to_esfilter(schema)) - es_query.size = coalesce(query.limit, DEFAULT_LIMIT) - es_query.stored_fields = FlatList() - - selects = wrap([s.copy() for s in listwrap(query.select)]) - new_select = FlatList() - schema = query.frum.schema - # columns = schema.columns - # nested_columns = set(c.names["."] for c in columns if c.nested_path[0] != ".") - - es_query.sort = jx_sort_to_es_sort(query.sort, schema) - - put_index = 0 - for select in selects: - # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS - if isinstance(select.value, LeavesOp) and isinstance(select.value.term, Variable): - term = select.value.term - leaves = schema.leaves(term.var) - for c in leaves: - full_name = concat_field(select.name, relative_field(untype_path(c.names["."]), term.var)) - if c.jx_type == NESTED: - es_query.stored_fields = ["_source"] - new_select.append({ - "name": full_name, - "value": Variable(c.es_column), - "put": {"name": literal_field(full_name), "index": put_index, "child": "."}, - "pull": get_pull_source(c.es_column) - }) - put_index += 1 - elif c.nested_path[0] != ".": - pass # THE NESTED PARENT WILL CAPTURE THIS - else: - es_query.stored_fields += [c.es_column] - new_select.append({ - "name": full_name, - "value": Variable(c.es_column), - "put": {"name": literal_field(full_name), "index": put_index, "child": "."} - }) - put_index += 1 - elif isinstance(select.value, Variable): - s_column = select.value.var - # LEAVES OF OBJECT - leaves = schema.leaves(s_column) - nested_selects = {} - if leaves: - if s_column == '.': - # PULL ALL SOURCE - es_query.stored_fields = ["_source"] - new_select.append({ - "name": select.name, - "value": select.value, - "put": {"name": select.name, "index": put_index, "child": "."}, - "pull": get_pull_source(".") - }) - elif any(c.jx_type == NESTED for c in leaves): - # PULL WHOLE NESTED ARRAYS - es_query.stored_fields = ["_source"] - for c in leaves: - if len(c.nested_path) == 1: # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRT LEVEL PROPERTIES - jx_name = untype_path(c.names["."]) - new_select.append({ - "name": select.name, - "value": Variable(c.es_column), - "put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)}, - "pull": get_pull_source(c.es_column) - }) - else: - # PULL ONLY WHAT'S NEEDED - for c in leaves: - if len(c.nested_path) == 1: - jx_name = untype_path(c.names["."]) - if c.jx_type == NESTED: - es_query.stored_fields = ["_source"] - new_select.append({ - "name": select.name, - "value": Variable(c.es_column), - "put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)}, - "pull": get_pull_source(c.es_column) - }) - - else: - es_query.stored_fields += [c.es_column] - new_select.append({ - "name": select.name, - "value": Variable(c.es_column), - "put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)} - }) - else: - if not nested_filter: - where = filters[0].copy() - nested_filter = [where] - for k in filters[0].keys(): - filters[0][k] = None - set_default( - filters[0], - es_and([where, es_or(nested_filter)]) - ) - - nested_path = c.nested_path[0] - if nested_path not in nested_selects: - where = nested_selects[nested_path] = Data() - nested_filter += [where] - where.nested.path = nested_path - where.nested.query.match_all = {} - where.nested.inner_hits._source = False - where.nested.inner_hits.stored_fields += [c.es_column] - - child = relative_field(untype_path(c.names[schema.query_path[0]]), s_column) - pull = accumulate_nested_doc(nested_path, Variable(relative_field(s_column, unnest_path(nested_path)))) - new_select.append({ - "name": select.name, - "value": select.value, - "put": { - "name": select.name, - "index": put_index, - "child": child - }, - "pull": pull - }) - else: - nested_selects[nested_path].nested.inner_hits.stored_fields += [c.es_column] - else: - new_select.append({ - "name": select.name, - "value": Variable("$dummy"), - "put": {"name": select.name, "index": put_index, "child": "."} - }) - put_index += 1 - else: - painless = select.value.partial_eval().to_es_script(schema) - es_query.script_fields[literal_field(select.name)] = es_script(painless.script(schema)) - new_select.append({ - "name": select.name, - "pull": jx_expression_to_function("fields." + literal_field(select.name)), - "put": {"name": select.name, "index": put_index, "child": "."} - }) - put_index += 1 - - for n in new_select: - if n.pull: - continue - elif isinstance(n.value, Variable): - if es_query.stored_fields[0] == "_source": - es_query.stored_fields = ["_source"] - n.pull = get_pull_source(n.value.var) - elif n.value == "_id": - n.pull = jx_expression_to_function("_id") - else: - n.pull = jx_expression_to_function(concat_field("fields", literal_field(n.value.var))) - else: - Log.error("Do not know what to do") - - with Timer("call to ES") as call_timer: - data = es_post(es, es_query, query.limit) - - T = data.hits.hits - - try: - formatter, groupby_formatter, mime_type = format_dispatch[query.format] - - with Timer("formatter"): - output = formatter(T, new_select, query) - output.meta.timing.es = call_timer.duration - output.meta.content_type = mime_type - output.meta.es_query = es_query - return output - except Exception as e: - Log.error("problem formatting", e) - - -def accumulate_nested_doc(nested_path, expr=IDENTITY): - """ - :param nested_path: THE PATH USED TO EXTRACT THE NESTED RECORDS - :param expr: FUNCTION USED ON THE NESTED OBJECT TO GET SPECIFIC VALUE - :return: THE DE_TYPED NESTED OBJECT ARRAY - """ - name = literal_field(nested_path) - def output(doc): - acc = [] - for h in doc.inner_hits[name].hits.hits: - i = h._nested.offset - obj = Data() - for f, v in h.fields.items(): - local_path = untype_path(relative_field(f, nested_path)) - obj[local_path] = unwraplist(v) - # EXTEND THE LIST TO THE LENGTH WE REQUIRE - for _ in range(len(acc), i+1): - acc.append(None) - acc[i] = expr(obj) - return acc - return output - - -def format_list(T, select, query=None): - data = [] - if isinstance(query.select, list): - for row in T: - r = Data() - for s in select: - v = s.pull(row) - r[s.put.name][s.put.child] = unwraplist(v) - data.append(r if r else None) - elif isinstance(query.select.value, LeavesOp): - for row in T: - r = Data() - for s in select: - r[s.put.name][s.put.child] = unwraplist(s.pull(row)) - data.append(r if r else None) - else: - for row in T: - r = None - for s in select: - v = unwraplist(s.pull(row)) - if v is None: - continue - if s.put.child == ".": - r = v - else: - if r is None: - r = Data() - r[s.put.child] = v - - data.append(r) - - return Data( - meta={"format": "list"}, - data=data - ) - - -def format_table(T, select, query=None): - data = [] - num_columns = (MAX(select.put.index) + 1) - for row in T: - r = [None] * num_columns - for s in select: - value = unwraplist(s.pull(row)) - - if value == None: - continue - - index, child = s.put.index, s.put.child - if child == ".": - r[index] = value - else: - if r[index] is None: - r[index] = Data() - r[index][child] = value - - data.append(r) - - header = [None] * num_columns - - if isinstance(query.select, Mapping) and not isinstance(query.select.value, LeavesOp): - for s in select: - header[s.put.index] = s.name - else: - for s in select: - if header[s.put.index]: - continue - if s.name == ".": - header[s.put.index] = "." - else: - header[s.put.index] = s.name - - return Data( - meta={"format": "table"}, - header=header, - data=data - ) - - -def format_cube(T, select, query=None): - with Timer("format table"): - table = format_table(T, select, query) - - if len(table.data) == 0: - return Cube( - select, - edges=[{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": 0, "interval": 1}}], - data={h: Matrix(list=[]) for i, h in enumerate(table.header)} - ) - - cols = transpose(*unwrap(table.data)) - return Cube( - select, - edges=[{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(table.data), "interval": 1}}], - data={h: Matrix(list=cols[i]) for i, h in enumerate(table.header)} - ) - - -set_default(format_dispatch, { - None: (format_cube, None, "application/json"), - "cube": (format_cube, None, "application/json"), - "table": (format_table, None, "application/json"), - "list": (format_list, None, "application/json") -}) - - -def get_pull(column): - if column.nested_path[0] == ".": - return concat_field("fields", literal_field(column.es_column)) - else: - depth = len(split_field(column.nested_path[0])) - rel_name = split_field(column.es_column)[depth:] - return join_field(["_inner"] + rel_name) - - -def get_pull_function(column): - return jx_expression_to_function(get_pull(column)) - - -def get_pull_source(es_column): - def output(row): - return untyped(row._source[es_column]) - return output - - -def get_pull_stats(stats_name, median_name): - return jx_expression_to_function({"select": [ - {"name": "count", "value": stats_name + ".count"}, - {"name": "sum", "value": stats_name + ".sum"}, - {"name": "min", "value": stats_name + ".min"}, - {"name": "max", "value": stats_name + ".max"}, - {"name": "avg", "value": stats_name + ".avg"}, - {"name": "sos", "value": stats_name + ".sum_of_squares"}, - {"name": "std", "value": stats_name + ".std_deviation"}, - {"name": "var", "value": stats_name + ".variance"}, - {"name": "median", "value": median_name + ".values.50\\.0"} - ]}) - diff --git a/vendor/jx_elasticsearch/es52/stats.py b/vendor/jx_elasticsearch/es52/stats.py new file mode 100644 index 0000000..2ab371f --- /dev/null +++ b/vendor/jx_elasticsearch/es52/stats.py @@ -0,0 +1,74 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import Expression +from jx_elasticsearch.meta import Table +from mo_dots import listwrap, set_default +from mo_future import is_text +from mo_logs import Log +from mo_times import Date + +DEBUG = True + +COMMON = {} + + +class QueryStats(object): + def __new__(cls, cluster): + existing = COMMON.get(id(cluster)) + if not existing: + existing = COMMON[id(cluster)] = object.__new__(cls) + return existing + + def __init__(self, cluster): + if hasattr(self, "index"): + return + + self.index = cluster.get_or_create_index( + index="meta.stats", typed=False, schema=SCHEMA + ) + self.todo = self.index.threaded_queue(max_size=100, period=60) + + def record(self, query): + try: + vars_record = get_stats(query) + except Exception as e: + Log.warning("problem processing query stats", cause=e) + self.todo.extend({"value": v} for v in vars_record) + + +def get_stats(query): + frum = query.frum + if isinstance(frum, Table): + vars_record = {"table": frum.name} + elif is_text(frum): + vars_record = {"table": frum} + else: + vars_record = get_stats(frum) + now = Date.now() + vars_record['timestamp']=now + + output = [] + for clause in ["select", "edges", "groupby", "window", "sort"]: + vars_record["mode"] = clause + for expr in listwrap(getattr(query, clause)): + if isinstance(expr.value, Expression): + for v in expr.value.vars(): + output.append(set_default({"column": v.var}, vars_record)) + for v in query.where.vars(): + output.append(set_default({"column": v.var, "mode": "where"}, vars_record)) + return output + + +SCHEMA = { + "settings": {"index.number_of_shards": 1, "index.number_of_replicas": 2}, + "mappings": {"stats": {"properties": {}}}, +} diff --git a/vendor/jx_elasticsearch/es52/util.py b/vendor/jx_elasticsearch/es52/util.py index e20f5a4..8f2e0cc 100644 --- a/vendor/jx_elasticsearch/es52/util.py +++ b/vendor/jx_elasticsearch/es52/util.py @@ -5,27 +5,30 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http:# mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from jx_elasticsearch.es52.expressions import Variable -from mo_dots import wrap -from mo_future import text_type -from mo_json.typed_encoder import STRING, BOOLEAN, NUMBER, OBJECT +import mo_math +from jx_base.expressions import Variable +from jx_base.language import is_op +from jx_base.query import DEFAULT_LIMIT, MAX_LIMIT +from jx_elasticsearch.es52.expressions.and_op import es_and +from mo_dots import wrap, Null, coalesce +from mo_future import is_text, first +from mo_json import BOOLEAN, IS_NULL, NUMBER, OBJECT, STRING, NUMBER_TYPES from mo_logs import Log +from pyLibrary.convert import value2boolean def es_query_template(path): """ RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE :param path: THE NESTED PATH (NOT INCLUDING TABLE NAME) - :return: + :return: (es_query, es_filters) TUPLE """ - if not isinstance(path, text_type): + if not is_text(path): Log.error("expecting path to be a string") if path != ".": @@ -62,7 +65,7 @@ def jx_sort_to_es_sort(sort, schema): output = [] for s in sort: - if isinstance(s.value, Variable): + if is_op(s.value, Variable): cols = schema.leaves(s.value.var) if s.sort == -1: types = OBJECT, STRING, NUMBER, BOOLEAN @@ -71,11 +74,21 @@ def jx_sort_to_es_sort(sort, schema): for type in types: for c in cols: - if c.jx_type == type: - if s.sort == -1: - output.append({c.es_column: "desc"}) + if c.jx_type == type or (c.jx_type in NUMBER_TYPES and type in NUMBER_TYPES): + np = first(c.nested_path) + if np == '.': + if s.sort == -1: + output.append({c.es_column: "desc"}) + else: + output.append(c.es_column) else: - output.append(c.es_column) + output.append({c.es_column: { + "order": {1: "asc", -1: "desc"}[s.sort], + "nested": { + "path": np, + "filter": {"match_all": {}} + }, + }}) else: from mo_logs import Log @@ -91,8 +104,11 @@ aggregates = { "sum": "sum", "add": "sum", "count": "value_count", + "count_values": "count_values", "maximum": "max", "minimum": "min", + "and": "min", + "or": "max", "max": "max", "min": "min", "mean": "avg", @@ -115,21 +131,23 @@ aggregates = { NON_STATISTICAL_AGGS = {"none", "one"} -def es_and(terms): - return wrap({"bool": {"filter": terms}}) +pull_functions = { + IS_NULL: lambda x: None, + STRING: lambda x: x, + NUMBER: lambda x: float(x) if x !=None else None, + BOOLEAN: value2boolean, +} -def es_or(terms): - return wrap({"bool": {"should": terms}}) +def temper_limit(proposed_limit, query): + """ + SUITABLE DEFAULTS AND LIMITS + """ + from jx_elasticsearch.es52.agg_bulk import is_bulk_agg + from jx_elasticsearch.es52.set_bulk import is_bulk_set + if is_bulk_agg(Null, query) or is_bulk_set(Null, query): + return coalesce(proposed_limit, query.limit) + else: + return mo_math.min(coalesce(proposed_limit, query.limit, DEFAULT_LIMIT), MAX_LIMIT) -def es_not(term): - return wrap({"bool": {"must_not": term}}) - - -def es_script(term): - return wrap({"script": {"lang": "painless", "inline": term}}) - - -def es_missing(term): - return {"bool": {"must_not": {"exists": {"field": term}}}} diff --git a/vendor/jx_elasticsearch/meta.py b/vendor/jx_elasticsearch/meta.py index 231c5d7..052bd76 100644 --- a/vendor/jx_elasticsearch/meta.py +++ b/vendor/jx_elasticsearch/meta.py @@ -5,45 +5,78 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http:# mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import itertools -from itertools import product +from datetime import date, datetime +from decimal import Decimal import jx_base -from jx_base import TableDesc +from jx_base import TableDesc, Column +from jx_base.meta_columns import ( + META_COLUMNS_DESC, + META_COLUMNS_NAME, + META_TABLES_DESC, + META_TABLES_NAME, +) from jx_base.namespace import Namespace from jx_base.query import QueryOp +from jx_elasticsearch import elasticsearch +from jx_elasticsearch.elasticsearch import ( + _get_best_type_from_mapping, + es_type_to_json_type, +) +from jx_elasticsearch.meta_columns import ColumnList from jx_python import jx from jx_python.containers.list_usingPythonList import ListContainer -from jx_python.meta import ColumnList, Column -from mo_collections.relation import Relation_usingList -from mo_dots import Data, relative_field, SELF_PATH, ROOT_PATH, coalesce, set_default, Null, split_field, join_field, wrap, concat_field, startswith_field, literal_field -from mo_json.typed_encoder import EXISTS_TYPE, untype_path, unnest_path, OBJECT, EXISTS, STRUCT, BOOLEAN +from mo_dots import ( + Data, + FlatList, + NullType, + ROOT_PATH, + coalesce, + concat_field, + is_list, + literal_field, + relative_field, + set_default, + split_field, + startswith_field, + tail_field, + wrap, + listwrap, unwrap) +from mo_dots.lists import last +from mo_future import first, long, none_type, text +from mo_json import BOOLEAN, EXISTS, OBJECT, STRUCT +from mo_json.typed_encoder import ( + BOOLEAN_TYPE, + EXISTS_TYPE, + NUMBER_TYPE, + STRING_TYPE, + unnest_path, + untype_path, + NESTED_TYPE, get_nested_path) from mo_kwargs import override from mo_logs import Log from mo_logs.exceptions import Except from mo_logs.strings import quote -from mo_math import MAX -from mo_threads import Queue, THREAD_STOP, Thread, Till -from mo_times import HOUR, MINUTE, Timer, Date -from pyLibrary.env import elasticsearch -from pyLibrary.env.elasticsearch import es_type_to_json_type, _get_best_type_from_mapping +from mo_threads import Queue, THREAD_STOP, Thread, Till, MAIN_THREAD +from mo_times import Date, HOUR, MINUTE, Timer, WEEK -MAX_COLUMN_METADATA_AGE = 12 * HOUR -ENABLE_META_SCAN = True DEBUG = False -TOO_OLD = 2*HOUR +ENABLE_META_SCAN = True +TOO_OLD = 24 * HOUR OLD_METADATA = MINUTE +MAX_COLUMN_METADATA_AGE = 12 * HOUR TEST_TABLE_PREFIX = "testing" # USED TO TURN OFF COMPLAINING ABOUT TEST INDEXES known_clusters = {} # MAP FROM id(Cluster) TO ElasticsearchMetadata INSTANCE +KNOWN_MULTITYPES = ["build.type", "run.type", "build.platform", "file.path"] + class ElasticsearchMetadata(Namespace): """ @@ -52,7 +85,7 @@ class ElasticsearchMetadata(Namespace): @override def __new__(cls, kwargs, *args, **_kwargs): - es_cluster = elasticsearch.Cluster(kwargs) + es_cluster = elasticsearch.Cluster(kwargs) # NOTICE cls IS PASSED IN output = known_clusters.get(id(es_cluster)) if output is None: output = object.__new__(cls) @@ -60,110 +93,196 @@ class ElasticsearchMetadata(Namespace): return output @override - def __init__(self, host, index, sql_file='metadata.sqlite', alias=None, name=None, port=9200, kwargs=None): + def __init__(self, host, index, alias=None, name=None, port=9200, kwargs=None): if hasattr(self, "settings"): return - self.too_old = TOO_OLD self.settings = kwargs - self.default_name = coalesce(name, alias, index) + self.too_old = TOO_OLD self.es_cluster = elasticsearch.Cluster(kwargs=kwargs) - self.index_does_not_exist = set() self.todo = Queue("refresh metadata", max=100000, unique=True) - self.index_to_alias = Relation_usingList() - - self.es_metadata = Null - self.metadata_last_updated = Date.now() - OLD_METADATA - self.meta = Data() - self.meta.columns = ColumnList() - - self.alias_to_query_paths = { - "meta.columns": [['.']], - "meta.tables": [['.']] - } - self.alias_last_updated = { - "meta.columns": Date.now(), - "meta.tables": Date.now() - } - table_columns = metadata_tables() + self.meta.columns = ColumnList(self.es_cluster) + self.meta.columns.extend(META_TABLES_DESC.columns) self.meta.tables = ListContainer( - "meta.tables", - [ - # TableDesc("meta.columns", None, ".", Date.now()), - # TableDesc("meta.tables", None, ".", Date.now()) - ], - jx_base.Schema(".", table_columns) + META_TABLES_NAME, [], jx_base.Schema(".", META_TABLES_DESC.columns) ) - self.meta.columns.extend(table_columns) + self.meta.table.extend([META_COLUMNS_DESC, META_TABLES_DESC]) + self.alias_to_query_paths = {} + for i, settings in self.es_cluster.get_metadata().indices.items(): + if len(settings.aliases) == 0: + alias = i + elif len(settings.aliases) == 1: + alias = first(settings.aliases) + else: + Log.error("expecting only one alias per index") + + desc = TableDesc( + name=alias, + url=None, + query_path=ROOT_PATH, + last_updated=self.es_cluster.metatdata_last_updated, + columns=[], + ) + self.meta.tables.add(desc) + self.alias_to_query_paths[alias] = [desc.query_path] + self.alias_to_query_paths[self._find_alias(alias)] = [desc.query_path] + + # WE MUST PAUSE? + # TODO: fix monitor so it does not bring down ES if ENABLE_META_SCAN: - self.worker = Thread.run("refresh metadata", self.monitor) + self.worker = Thread.run( + "refresh metadata", + self.monitor, + parent_thread=MAIN_THREAD + ) else: - self.worker = Thread.run("refresh metadata", self.not_monitor) + self.worker = Thread.run( + "not refresh metadata for " + host, + self.not_monitor, + parent_thread=MAIN_THREAD, + ) return @property def namespace(self): return self.meta.columns.namespace - @property - def url(self): - return self.es_cluster.url / self.default_name.replace(".", "/") - - def _reload_columns(self, table_desc): + def _reload_columns(self, table_desc, after): """ + ENSURE ALL INDICES FOR A GIVEN ALIAS HAVE THE SAME COLUMNS + :param alias: A REAL ALIAS (OR NAME OF INDEX THAT HAS NO ALIAS) + :param after: ENSURE DATA IS YOUNGER THAN after :return: """ + # FIND ALL INDEXES OF ALIAS - es_last_updated = self.es_cluster.metatdata_last_updated - alias = table_desc.name + metadata = self.es_cluster.get_metadata(after=after) canonical_index = self.es_cluster.get_best_matching_index(alias).index - update_required = not (table_desc.timestamp < es_last_updated) - metadata = self.es_cluster.get_metadata(force=update_required) - indexes = self.index_to_alias.get_domain(alias) props = [ + # NOTICE THIS TRIPLE (index, type, properties) (self.es_cluster.get_index(index=i, type=t, debug=DEBUG), t, m.properties) for i, d in metadata.indices.items() - if i in indexes + if alias in d.aliases for t, m in [_get_best_type_from_mapping(d.mappings)] ] # CONFIRM ALL COLUMNS ARE SAME, FIX IF NOT dirty = False - all_comparisions = list(jx.pairwise(props)) + list(jx.pairwise(jx.reverse(props))) + all_comparisions = list(jx.pairwise(props)) + list( + jx.pairwise(jx.reverse(props)) + ) # NOTICE THE SAME (index, type, properties) TRIPLE FROM ABOVE for (i1, t1, p1), (i2, t2, p2) in all_comparisions: diff = elasticsearch.diff_schema(p2, p1) - if not self.settings.read_only: - for d in diff: - dirty = True - i1.add_property(*d) - meta = self.es_cluster.get_metadata(force=dirty).indices[canonical_index] + for name, es_details in diff: + if es_details.type in {"object", "nested"}: + # QUERYING OBJECTS RETURNS NOTHING + continue + col = first(self.meta.columns.find(alias, name)) + if col and col.last_updated > after and col.cardinality == 0: + continue + if col and col.jx_type in STRUCT: + continue + for i, t, _ in props: + if i is not i1: # WE KNOW IT IS NOT IN i1 BECAUSE diff SAYS SO + try: + # TODO: THIS TAKES A LONG TIME, CACHE IN THE COLUMN METADATA? + # MAY NOT WORK - COLUMN METADATA IS FOR ALIASES, NOT INDEXES + result = i.search( + {"query": {"exists": {"field": name}}, "size": 0} + ) + if result.hits.total > 0: + dirty = True + i1.add_property(name, es_details) + break + except Exception as e: + Log.warning( + "problem adding field {{field}}", + field=name, + cause=e, + ) + else: + # ALL OTHER INDEXES HAVE ZERO RECORDS FOR THIS COLUMN + zero_col = Column( + name=name, + es_column=name, + es_index=alias, + es_type=es_details.type, + jx_type=es_type_to_json_type[coalesce(es_details.type, "object")], + nested_path=get_nested_path(name), + count=0, + cardinality=0, # MARKED AS DELETED + multi=1001 if es_details.type == 'nested' else 0, + partitions=None, + last_updated=Date.now() + ) + if len(zero_col.nested_path) > 1: + pass + self.meta.columns.add(zero_col) + if dirty: + metadata = self.es_cluster.get_metadata(after=Date.now()) - data_type, mapping = _get_best_type_from_mapping(meta.mappings) + now = self.es_cluster.metatdata_last_updated + meta = metadata.indices[literal_field(canonical_index)] + es_details, mapping = _get_best_type_from_mapping(meta.mappings) mapping.properties["_id"] = {"type": "string", "index": "not_analyzed"} - self._parse_properties(alias, mapping, meta) - table_desc.timestamp = es_last_updated + columns = self._parse_properties(alias, mapping) + table_desc.last_updated = now - def _parse_properties(self, alias, mapping, meta): - abs_columns = elasticsearch.parse_properties(alias, None, mapping.properties) - if any(c.cardinality == 0 and c.names['.'] != '_id' for c in abs_columns): - Log.warning( - "Some columns are not stored {{names}}", + column_names = {c.es_column for c in columns} + # DELETE SOME COLUMNS + current_columns = self.meta.columns.find(alias) + for c in current_columns: + if c.es_column not in column_names: + self.meta.columns.remove(c, now) + + # ASK FOR COLUMNS TO BE RE-SCANNED + rescan = [ + (c, after) + for c in columns + if c.es_index != META_COLUMNS_NAME + and (c.cardinality == None or not (c.last_updated > after)) + ] + # PUSH THESE COLUMNS SO THEY ARE SCANNED FIRST + # WE ARE ASSUMING THIS TABLE IS HIGHER PRIORITY THAN SOME + # BACKLOG CURRENTLY IN THE todo QUEUE + self.todo.push_all(rescan) + DEBUG and Log.note("asked for {{num}} columns to be rescanned", num=len(rescan)) + return columns + + def _parse_properties(self, alias, mapping): + """ + PARSE THE mapping, UPDATE self.meta.columns, AND RETURN CANONICAL COLUMNS + :param alias: + :param mapping: + :return: + """ + + abs_columns = elasticsearch.parse_properties( + alias, ".", ROOT_PATH, mapping.properties + ) + if DEBUG and any(c.cardinality == 0 and c.name != "_id" for c in abs_columns): + Log.note( + "Some columns are always missing in {{url}} {{index|quote}} table:\n{{names}}", + url=self.es_cluster.url, + index=alias, names=[ - ".".join((c.es_index, c.names['.'])) + ".".join((c.es_index, c.name)) for c in abs_columns if c.cardinality == 0 - ] + ], ) - with Timer("upserting {{num}} columns", {"num": len(abs_columns)}, silent=not DEBUG): + with Timer( + "upserting {{num}} columns", {"num": len(abs_columns)}, verbose=DEBUG + ): # LIST OF EVERY NESTED PATH query_paths = [[c.es_column] for c in abs_columns if c.es_type == "nested"] for a, b in itertools.product(query_paths, query_paths): @@ -178,88 +297,158 @@ class ElasticsearchMetadata(Namespace): b.insert(i, aa) break for q in query_paths: - q.append(SELF_PATH) + q.append(".") query_paths.append(ROOT_PATH) - self.alias_to_query_paths[alias] = query_paths - for i in self.index_to_alias.get_domain(alias): - self.alias_to_query_paths[i] = query_paths - # ADD RELATIVE NAMES + # ENSURE ALL TABLES HAVE THE QUERY PATHS SET + self.alias_to_query_paths[alias] = query_paths + self.alias_to_query_paths[self._find_alias(alias)] = query_paths + + # ENSURE COLUMN HAS CORRECT jx_type + # PICK DEEPEST NESTED PROPERTY AS REPRESENTATIVE + output = [] + best = {} for abs_column in abs_columns: - abs_column.last_updated = None abs_column.jx_type = jx_type(abs_column) - for query_path in query_paths: - abs_column.names[query_path[0]] = relative_field(abs_column.names["."], query_path[0]) - self.todo.add(self.meta.columns.add(abs_column)) - pass + if abs_column.jx_type not in STRUCT: + clean_name = unnest_path(abs_column.name) + other = best.get(clean_name) + if other: + if len(other.nested_path) < len(abs_column.nested_path): + output.remove(other) + self.meta.columns.update( + { + "clear": ".", + "where": { + "eq": { + "es_column": other.es_column, + "es_index": other.es_index, + } + }, + } + ) + else: + continue + best[clean_name] = abs_column + output.append(abs_column) + + # REGISTER ALL COLUMNS + canonicals = [] + for abs_column in output: + canonical = self.meta.columns.add(abs_column) + canonicals.append(canonical) + + return canonicals def query(self, _query): - return self.meta.columns.query(QueryOp(set_default( - { - "from": self.meta.columns, - "sort": ["table", "name"] - }, - _query.__data__() - ))) + return self.meta.columns.query( + QueryOp( + set_default( + {"from": self.meta.columns, "sort": ["table", "name"]}, + _query.__data__(), + ) + ) + ) def _find_alias(self, name): - if self.metadata_last_updated < self.es_cluster.metatdata_last_updated: - for a in self.es_cluster.get_aliases(): - self.index_to_alias[a.index] = coalesce(a.alias, a.index) - self.alias_last_updated.setdefault(a.alias, Date.MIN) - if name in self.alias_last_updated: - return name - else: - return self.index_to_alias[name] + indices = self.es_cluster.get_metadata().indices + settings = indices[name] + if settings: + aliases = settings.aliases + if not aliases: + return name + else: + return aliases[0] - def get_columns(self, table_name, column_name=None, force=False): + for settings in indices.values(): + if name in settings.aliases: + return name + + def get_columns(self, table_name, column_name=None, after=None, timeout=None): """ RETURN METADATA COLUMNS + + :param table_name: TABLE WE WANT COLUMNS FOR + :param column_name: OPTIONAL NAME, IF INTERESTED IN ONLY ONE COLUMN + :param after: FORCE LOAD, WAITING FOR last_updated TO BE AFTER THIS TIME + :param timeout: Signal; True when should give up + :return: """ - table_path = split_field(table_name) - root_table_name = table_path[0] + DEBUG and after and Log.note( + "getting columns for {{table}} after {{time}}", table=table_name, time=after + ) + if table_name == META_TABLES_NAME: + return self.meta.tables.schema.columns + elif table_name == META_COLUMNS_NAME: + root_table_name = table_name + else: + root_table_name, _ = tail_field(table_name) alias = self._find_alias(root_table_name) if not alias: - self.es_cluster.get_metadata(force=True) + self.es_cluster.get_metadata(after=after) alias = self._find_alias(root_table_name) if not alias: Log.error("{{table|quote}} does not exist", table=table_name) try: - last_update = MAX([ - self.es_cluster.index_last_updated[i] - for i in self.index_to_alias.get_domain(alias) - ]) - - table = self.get_table(alias)[0] + table = self.get_table(alias) # LAST TIME WE GOT INFO FOR THIS TABLE - if not table: + if table == None: table = TableDesc( name=alias, url=None, - query_path=['.'], - timestamp=Date.MIN + query_path=["."], + last_updated=Date.MIN, + columns=[], ) with self.meta.tables.locker: self.meta.tables.add(table) - self._reload_columns(table) - elif force or table.timestamp < last_update: - self._reload_columns(table) + columns = self._reload_columns(table, after=after) + elif after and table.last_updated < after: + columns = self._reload_columns(table, after=after) + elif table.last_updated < self.es_cluster.metatdata_last_updated: + # TODO: THIS IS TOO EXTREME; WE SHOULD WAIT FOR SOME SENSE OF "OLDNESS" + columns = self._reload_columns( + table, after=self.es_cluster.metatdata_last_updated + ) + else: + columns = self.meta.columns.find(alias, column_name) - columns = self.meta.columns.find(alias, column_name) - columns = jx.sort(columns, "names.\\.") - # AT LEAST WAIT FOR THE COLUMNS TO UPDATE - while len(self.todo) and not all(columns.get("last_updated")): + columns = jx.sort(columns, "name") + + if after is None: + return columns # DO NOT WAIT FOR COMPLETE COLUMNS + + # WAIT FOR THE COLUMNS TO UPDATE + while True: + pending = [c for c in columns if after >= c.last_updated] + if not pending: + break + if timeout: + Log.error("trying to gets columns timed out") if DEBUG: - if len(columns) > 10: - Log.note("waiting for {{num}} columns to update", num=len([c for c in columns if not c.last_updated])) + if len(pending) > 10: + Log.note( + "waiting for {{num}} columns to update by {{timestamp}}", + num=len(pending), + timestamp=after, + ) else: - Log.note("waiting for columns to update {{columns|json}}", columns=[c.es_index+"."+c.es_column for c in columns if not c.last_updated]) + Log.note( + "waiting for columns to update by {{timestamp}}; {{columns|json}}", + timestamp=after, + columns=[ + concat_field(c.es_index, c.es_column) + + " id=" + + text(id(c)) + for c in pending + ], + ) Till(seconds=1).wait() return columns except Exception as e: - Log.error("Not expected", cause=e) + Log.error("Failure to get columns for {{table}}", table=table_name, cause=e) return [] @@ -267,134 +456,262 @@ class ElasticsearchMetadata(Namespace): """ QUERY ES TO FIND CARDINALITY AND PARTITIONS FOR A SIMPLE COLUMN """ + now = Date.now() if column.es_index in self.index_does_not_exist: return if column.jx_type in STRUCT: Log.error("not supported") try: - if column.es_index == "meta.columns": - partitions = jx.sort([g[column.es_column] for g, _ in jx.groupby(self.meta.columns, column.es_column) if g[column.es_column] != None]) - self.meta.columns.update({ - "set": { - "partitions": partitions, - "count": len(self.meta.columns), - "cardinality": len(partitions), - "multi": 1, - "last_updated": Date.now() - }, - "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}} - }) + if column.es_index == META_TABLES_NAME: + partitions = jx.sort( + [ + g[column.es_column] + for g, _ in jx.groupby(self.meta.tables, column.es_column) + if g[column.es_column] != None + ] + ) + self.meta.columns.update( + { + "set": { + "partitions": partitions, + "count": len(self.meta.tables), + "cardinality": len(partitions), + "multi": 1, + "last_updated": now, + }, + "where": { + "eq": { + "es_index": column.es_index, + "es_column": column.es_column, + } + }, + } + ) return - if column.es_index == "meta.tables": - partitions = jx.sort([g[column.es_column] for g, _ in jx.groupby(self.meta.tables, column.es_column) if g[column.es_column] != None]) - self.meta.columns.update({ - "set": { - "partitions": partitions, - "count": len(self.meta.tables), - "cardinality": len(partitions), - "multi": 1, - "last_updated": Date.now() - }, - "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}} - }) + if column.es_index == META_COLUMNS_NAME: + DEBUG and Log.note( + "{{column.es_column}} is metadata, not scanned", column=column + ) return es_index = column.es_index.split(".")[0] - is_text = [cc for cc in self.meta.columns if cc.es_column == column.es_column and cc.es_type == "text"] + is_text = [ + cc + for cc in self.meta.columns + if cc.es_column == column.es_column and cc.es_type == "text" + ] if is_text: # text IS A MULTIVALUE STRING THAT CAN ONLY BE FILTERED - result = self.es_cluster.post("/" + es_index + "/_search", data={ - "aggs": { - "count": {"filter": {"match_all": {}}} - }, - "size": 0 - }) + result = self.es_cluster.post( + "/" + es_index + "/_search", + data={"aggs": {"count": {"filter": {"match_all": {}}}}, "size": 0}, + ) count = result.hits.total cardinality = max(1001, count) multi = 1001 elif column.es_column == "_id": - result = self.es_cluster.post("/" + es_index + "/_search", data={ - "query": {"match_all": {}}, - "size": 0 - }) + result = self.es_cluster.post( + "/" + es_index + "/_search", + data={"query": {"match_all": {}}, "size": 0}, + ) count = cardinality = result.hits.total multi = 1 elif column.es_type == BOOLEAN: - result = self.es_cluster.post("/" + es_index + "/_search", data={ - "aggs": { - "count": _counting_query(column) - }, - "size": 0 - }) + result = self.es_cluster.post( + "/" + es_index + "/_search", + data={"aggs": {"count": _counting_query(column)}, "size": 0}, + ) count = result.hits.total cardinality = 2 - multi = 1 + + DEBUG and Log.note( + "{{table}}.{{field}} has {{num}} parts", + table=column.es_index, + field=column.es_column, + num=cardinality, + ) + self.meta.columns.update( + { + "set": { + "count": count, + "cardinality": cardinality, + "partitions": [False, True], + "multi": 1, + "last_updated": now, + }, + "clear": ["partitions"], + "where": { + "eq": { + "es_index": column.es_index, + "es_column": column.es_column, + } + }, + } + ) + return + elif "_covered." in column.es_column or "_uncovered." in column.es_column: + # DO NOT EVEN LOOK AT THESE COLUMNS + self.meta.columns.update( + { + "set": { + "count": 1000*1000, + "cardinality": 10000, + "multi": 10000, + "last_updated": now, + }, + "clear": ["partitions"], + "where": { + "eq": { + "es_index": column.es_index, + "es_column": column.es_column, + } + }, + } + ) + return else: - result = self.es_cluster.post("/" + es_index + "/_search", data={ + es_query = { "aggs": { "count": _counting_query(column), - "multi": {"max": {"script": "doc[" + quote(column.es_column) + "].values.size()"}} + "_filter": { + "aggs": { + "multi": { + "max": { + "script": "doc[" + + quote(column.es_column) + + "].values.size()" + } + } + }, + "filter": { + "bool": { + "should": [ + { + "range": { + "etl.timestamp.~n~": { + "gte": (Date.today() - WEEK) + } + } + }, + { + "bool": { + "must_not": { + "exists": { + "field": "etl.timestamp.~n~" + } + } + } + }, + ] + } + }, + }, }, - "size": 0 - }) + "size": 0, + } + + result = self.es_cluster.post( + "/" + es_index + "/_search", data=es_query + ) agg_results = result.aggregations count = result.hits.total - cardinality = coalesce(agg_results.count.value, agg_results.count._nested.value, agg_results.count.doc_count) - multi = int(coalesce(agg_results.multi.value, 1)) + cardinality = coalesce( + agg_results.count.value, + agg_results.count._nested.value, + agg_results.count.doc_count, + ) + multi = int(coalesce(agg_results._filter.multi.value, 1)) if cardinality == None: - Log.error("logic error") + Log.error("logic error") query = Data(size=0) if column.es_column == "_id": - self.meta.columns.update({ - "set": { - "count": cardinality, - "cardinality": cardinality, - "multi": 1, - "last_updated": Date.now() - }, - "clear": ["partitions"], - "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}} - }) + self.meta.columns.update( + { + "set": { + "count": cardinality, + "cardinality": cardinality, + "multi": 1, + "last_updated": now, + }, + "clear": ["partitions"], + "where": { + "eq": { + "es_index": column.es_index, + "es_column": column.es_column, + } + }, + } + ) return - elif cardinality > 1000 or (count >= 30 and cardinality == count) or (count >= 1000 and cardinality / count > 0.99): - DEBUG and Log.note("{{table}}.{{field}} has {{num}} parts", table=column.es_index, field=column.es_column, num=cardinality) - self.meta.columns.update({ - "set": { - "count": count, - "cardinality": cardinality, - "multi": multi, - "last_updated": Date.now() - }, - "clear": ["partitions"], - "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}} - }) + elif ( + cardinality > 1000 + or (count >= 30 and cardinality == count) + or (count >= 1000 and cardinality / count > 0.99) + ): + DEBUG and Log.note( + "{{table}}.{{field}} has {{num}} parts", + table=column.es_index, + field=column.es_column, + num=cardinality, + ) + self.meta.columns.update( + { + "set": { + "count": count, + "cardinality": cardinality, + "multi": multi, + "last_updated": now, + }, + "clear": ["partitions"], + "where": { + "eq": { + "es_index": column.es_index, + "es_column": column.es_column, + } + }, + } + ) return elif column.es_type in elasticsearch.ES_NUMERIC_TYPES and cardinality > 30: - DEBUG and Log.note("{{table}}.{{field}} has {{num}} parts", table=column.es_index, field=column.es_column, num=cardinality) - self.meta.columns.update({ - "set": { - "count": count, - "cardinality": cardinality, - "multi": multi, - "last_updated": Date.now() - }, - "clear": ["partitions"], - "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}} - }) + DEBUG and Log.note( + "{{table}}.{{field}} has {{num}} parts", + table=column.es_index, + field=column.es_column, + num=cardinality, + ) + self.meta.columns.update( + { + "set": { + "count": count, + "cardinality": cardinality, + "multi": multi, + "last_updated": now, + }, + "clear": ["partitions"], + "where": { + "eq": { + "es_index": column.es_index, + "es_column": column.es_column, + } + }, + } + ) return elif len(column.nested_path) != 1: query.aggs["_"] = { "nested": {"path": column.nested_path[0]}, - "aggs": {"_nested": {"terms": {"field": column.es_column}}} + "aggs": {"_nested": {"terms": {"field": column.es_column}}}, } - elif cardinality == 0: + elif cardinality == 0: # WHEN DOES THIS HAPPEN? query.aggs["_"] = {"terms": {"field": column.es_column}} else: - query.aggs["_"] = {"terms": {"field": column.es_column, "size": cardinality}} + query.aggs["_"] = { + "terms": {"field": column.es_column, "size": cardinality} + } result = self.es_cluster.post("/" + es_index + "/_search", data=query) @@ -404,144 +721,293 @@ class ElasticsearchMetadata(Namespace): else: parts = jx.sort(aggs.buckets.key) - self.meta.columns.update({ - "set": { - "count": count, - "cardinality": cardinality, - "multi": multi, - "partitions": parts, - "last_updated": Date.now() - }, - "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}} - }) + DEBUG and Log.note( + "update metadata for {{column.es_index}}.{{column.es_column}} (id={{id}}) card={{card}} at {{time}}", + id=id(column), + column=column, + card=cardinality, + time=now, + ) + self.meta.columns.update( + { + "set": { + "count": count, + "cardinality": cardinality, + "multi": multi, + "partitions": parts, + "last_updated": now, + }, + "where": { + "eq": { + "es_index": column.es_index, + "es_column": column.es_column, + } + }, + } + ) + META_COLUMNS_DESC.last_updated = now except Exception as e: # CAN NOT IMPORT: THE TEST MODULES SETS UP LOGGING # from tests.test_jx import TEST_TABLE e = Except.wrap(e) TEST_TABLE = "testdata" - is_missing_index = any(w in e for w in ["IndexMissingException", "index_not_found_exception"]) + is_missing_index = any( + w in e for w in ["IndexMissingException", "index_not_found_exception"] + ) is_test_table = column.es_index.startswith((TEST_TABLE_PREFIX, TEST_TABLE)) - if is_missing_index and is_test_table: + if is_missing_index: # WE EXPECT TEST TABLES TO DISAPPEAR - self.meta.columns.update({ - "clear": ".", - "where": {"eq": {"es_index": column.es_index}} - }) + if not is_test_table: + Log.warning("Missing index {{col.es_index}}", col=column) + self.meta.columns.update( + {"clear": ".", "where": {"eq": {"es_index": column.es_index}}} + ) self.index_does_not_exist.add(column.es_index) + elif "No field found for" in e: + self.meta.columns.update( + { + "clear": ".", + "where": { + "eq": { + "es_index": column.es_index, + "es_column": column.es_column, + } + }, + } + ) + Log.warning( + "Could not get column {{col.es_index}}.{{col.es_column}} info", + col=column, + cause=e, + ) else: - self.meta.columns.update({ - "set": { - "last_updated": Date.now() - }, - "clear": [ - "count", - "cardinality", - "multi", - "partitions", - ], - "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}} - }) - Log.warning("Could not get {{col.es_index}}.{{col.es_column}} info", col=column, cause=e) + self.meta.columns.update( + { + "set": {"last_updated": now}, + "clear": ["count", "cardinality", "multi", "partitions"], + "where": { + "eq": { + "es_index": column.es_index, + "es_column": column.es_column, + } + }, + } + ) + Log.warning( + "Could not get {{col.es_index}}.{{col.es_column}} info", + col=column, + cause=e, + ) def monitor(self, please_stop): - please_stop.on_go(lambda: self.todo.add(THREAD_STOP)) + please_stop.then(lambda: self.todo.add(THREAD_STOP)) while not please_stop: try: if not self.todo: + # LOOK FOR OLD COLUMNS WE CAN RE-SCAN + now = Date.now() + last_good_update = now - MAX_COLUMN_METADATA_AGE old_columns = [ c for c in self.meta.columns - if (c.last_updated == None or c.last_updated < Date.now()-TOO_OLD) and c.jx_type not in STRUCT + if (c.last_updated < last_good_update) + and c.jx_type not in STRUCT + and c.es_index != META_COLUMNS_NAME ] - if old_columns: - DEBUG and Log.note( - "Old columns {{names|json}} last updated {{dates|json}}", - names=wrap(old_columns).es_column, - dates=[Date(t).format() for t in wrap(old_columns).last_updated] + + if DEBUG: + if old_columns: + Log.note( + "Old columns {{names|json}} last updated {{dates|json}}", + names=wrap(old_columns).es_column, + dates=[ + Date(t).format() for t in wrap(old_columns).last_updated + ], + ) + else: + Log.note("no more metatdata to update") + + for g, index_columns in jx.groupby(old_columns, "es_index"): + # TRIGGER COLUMN UNIFICATION BEFORE WE DO ANALYSIS + try: + self.get_columns(g.es_index) + except Exception as e: + if "{{table|quote}} does not exist" in e: + self.meta.columns.update( + { + "clear": ".", + "where": {"eq": {"es_index": g.es_index}}, + } + ) + continue + Log.warning("problem getting column info on {{table}}", table=g.es_index, cause=e) + + self.todo.extend( + (c, max(last_good_update, c.last_updated)) + for c in index_columns ) - self.todo.extend(old_columns) - # TEST CONSISTENCY - for c, d in product(list(self.todo.queue), list(self.todo.queue)): - if c.es_column == d.es_column and c.es_index == d.es_index and c != d: - Log.error("") - else: - DEBUG and Log.note("no more metatdata to update") - column = self.todo.pop(Till(seconds=(10*MINUTE).seconds)) - if column: - if column is THREAD_STOP: + META_COLUMNS_DESC.last_updated = now + + work_item = self.todo.pop(Till(seconds=(10 * MINUTE).seconds)) + if work_item: + if work_item is THREAD_STOP: continue + column, after = work_item - with Timer("update {{table}}.{{column}}", param={"table": column.es_index, "column": column.es_column}, silent=not DEBUG): - if column.es_index in self.index_does_not_exist: - self.meta.columns.update({ - "clear": ".", - "where": {"eq": {"es_index": column.es_index}} - }) + now = Date.now() + with Timer( + "review {{table}}.{{column}}", + param={"table": column.es_index, "column": column.es_column}, + verbose=DEBUG, + ): + all_tables = [n for p in self.es_cluster.get_aliases(after=after) for n in (p.index, p.alias)] + if column.es_index not in all_tables: + DEBUG and Log.note( + "{{column.es_column}} of {{column.es_index}} does not exist", + column=column, + ) + self.meta.columns.update( + { + "clear": ".", + "where": {"eq": {"es_index": column.es_index}}, + } + ) continue - if column.jx_type in STRUCT or column.es_column.endswith("." + EXISTS_TYPE): - column.last_updated = Date.now() + if ( + column.jx_type in STRUCT + or split_field(column.es_column)[-1] == EXISTS_TYPE + ): + if (column.es_type=="nested" or last(split_field(column.es_column))==NESTED_TYPE) and (column.multi==None or column.multi<2): + column.multi = 1001 + Log.warning("fixing multi on nested problem") + # DEBUG and Log.note("{{column.es_column}} is a struct, not scanned", column=column) + column.last_updated = now continue - elif column.last_updated >= Date.now()-TOO_OLD: + elif column.cardinality is None: + pass # NO CARDINALITY MEANS WE MUST GET UPDATE IT + elif after and column.last_updated < after: + pass # COLUMN IS TOO OLD + elif column.last_updated < now - TOO_OLD: + pass # COLUMN IS WAY TOO OLD + else: + # DO NOT UPDATE FRESH COLUMN METADATA + DEBUG and Log.note( + "{{column.es_column}} is still fresh ({{ago}} ago)", + column=column, + ago=(now - Date(column.last_updated)), + ) continue + try: self._update_cardinality(column) - (DEBUG and not column.es_index.startswith(TEST_TABLE_PREFIX)) and Log.note("updated {{column.name}}", column=column) + ( + DEBUG + and not column.es_index.startswith(TEST_TABLE_PREFIX) + ) and Log.note("updated {{column.name}}", column=column) except Exception as e: if '"status":404' in e: - self.meta.columns.update({ - "clear": ".", - "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}} - }) + self.meta.columns.update( + { + "clear": ".", + "where": { + "eq": { + "es_index": column.es_index, + "es_column": column.es_column, + } + }, + } + ) else: - Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e) + Log.warning( + "problem getting cardinality for {{column.name}}", + column=column, + cause=e, + ) + META_COLUMNS_DESC.last_updated = now except Exception as e: Log.warning("problem in cardinality monitor", cause=e) def not_monitor(self, please_stop): Log.alert("metadata scan has been disabled") - please_stop.on_go(lambda: self.todo.add(THREAD_STOP)) + please_stop.then(lambda: self.todo.add(THREAD_STOP)) while not please_stop: - c = self.todo.pop() - if c == THREAD_STOP: + pair = self.todo.pop() + if pair is THREAD_STOP: break + column, after = pair - if c.last_updated >= Date.now()-TOO_OLD: - continue + with Timer( + "Update {{col.es_index}}.{{col.es_column}}", + param={"col": column}, + verbose=DEBUG, + too_long=0.05, + ): + if ( + column.jx_type in STRUCT + or split_field(column.es_column)[-1] == EXISTS_TYPE + ): + # DEBUG and Log.note("{{column.es_column}} is a struct", column=column) + continue + elif after and column.last_updated > after: + continue # COLUMN IS STILL YOUNG + elif ( + column.last_updated > Date.now() - TOO_OLD + and column.cardinality > 0 + ): + # DO NOT UPDATE FRESH COLUMN METADATA + DEBUG and Log.note( + "{{column.es_column}} is still fresh ({{ago}} ago)", + column=column, + ago=(Date.now() - Date(column.last_updated)).seconds, + ) + continue - with Timer("Update {{col.es_index}}.{{col.es_column}}", param={"col": c}, silent=not DEBUG, too_long=0.05): - self.meta.columns.update({ - "set": { - "last_updated": Date.now() - }, - "clear": [ - "count", - "cardinality", - "multi", - "partitions", - ], - "where": {"eq": {"es_index": c.es_index, "es_column": c.es_column}} - }) + if untype_path(column.name) in KNOWN_MULTITYPES: + try: + self._update_cardinality(column) + except Exception as e: + Log.warning( + "problem getting cardinality for {{column.name}}", + column=column, + cause=e, + ) + continue + + self.meta.columns.update( + { + "set": {"last_updated": Date.now()}, + "clear": ["count", "cardinality", "multi", "partitions"], + "where": { + "eq": { + "es_index": column.es_index, + "es_column": column.es_column, + } + }, + } + ) def get_table(self, name): - if name == "meta.columns": - return self.meta.columns - - # return self.meta.columns + if name == META_COLUMNS_NAME: + pass with self.meta.tables.locker: - return wrap([t for t in self.meta.tables.data if t.name == name]) + return first(t for t in self.meta.tables.data if t.name == name) def get_snowflake(self, fact_table_name): return Snowflake(fact_table_name, self) def get_schema(self, name): - if name == "meta.columns": + if name == META_COLUMNS_NAME: return self.meta.columns.schema - query_path = split_field(name) - root, rest = query_path[0], join_field(query_path[1:]) + if name == META_TABLES_NAME: + return self.meta.tables.schema + root, rest = tail_field(name) return self.get_snowflake(root).get_schema(rest) +EXPECTING_SNOWFLAKE = "Expecting snowflake {{name|quote}} to exist" + + class Snowflake(object): """ REPRESENT ONE ALIAS, AND ITS NESTED ARRAYS @@ -550,6 +1016,8 @@ class Snowflake(object): def __init__(self, name, namespace): self.name = name self.namespace = namespace + if name not in self.namespace.alias_to_query_paths: + Log.error(EXPECTING_SNOWFLAKE, name=name) def get_schema(self, query_path): return Schema(query_path, self) @@ -564,6 +1032,17 @@ class Snowflake(object): return output Log.error("Can not find index {{index|quote}}", index=self.name) + @property + def sorted_query_paths(self): + """ + RETURN A LIST OF ALL SCHEMA'S IN DEPTH-FIRST TOPOLOGICAL ORDER + """ + return list( + reversed( + sorted(p[0] for p in self.namespace.alias_to_query_paths.get(self.name)) + ) + ) + @property def columns(self): """ @@ -578,15 +1057,52 @@ class Schema(jx_base.Schema): """ def __init__(self, query_path, snowflake): - if not isinstance(snowflake.query_paths[0], list): - Log.error("Snowflake query paths should be a list of string tuples (well, technically, a list of lists of strings)") + if not is_list(snowflake.query_paths[0]): + Log.error( + "Snowflake query paths should be a list of string tuples (well, technically, a list of lists of strings)" + ) + self.snowflake = snowflake try: - self.query_path = [ - p - for p in snowflake.query_paths - if untype_path(p[0]) == query_path - ][0] - self.snowflake = snowflake + path = [p for p in snowflake.query_paths if untype_path(p[0]) == query_path] + if path: + # WE DO NOT NEED TO LOOK INTO MULTI-VALUED FIELDS AS A TABLE + self.multi = None + self.query_path = path[0] + else: + # LOOK INTO A SPECIFIC MULTI VALUED COLUMN + try: + self.multi = first([ + c + for c in self.snowflake.columns + if ( + untype_path(c.name) == query_path + and ( + c.multi > 1 + or last(split_field(c.es_column)) == NESTED_TYPE # THIS IS TO COMPENSATE FOR BAD c.multi + ) + ) + ]) + if not self.multi: + Log.error("expecting a nested column") + self.query_path = [self.multi.name] + unwrap(listwrap(self.multi.nested_path)) + except Exception as e: + # PROBLEM WITH METADATA UPDATE + self.multi = None + self.query_path = (query_path, ".") + + Log.warning( + "Problem getting query path {{path|quote}} in snowflake {{sf|quote}}", + path=query_path, + sf=snowflake.name, + cause=e, + ) + + if ( + not is_list(self.query_path) + or self.query_path[len(self.query_path) - 1] != "." + ): + Log.error("error") + except Exception as e: Log.error("logic error", cause=e) @@ -595,43 +1111,111 @@ class Schema(jx_base.Schema): :param column_name: :return: ALL COLUMNS THAT START WITH column_name, NOT INCLUDING DEEPER NESTED COLUMNS """ - column_name = unnest_path(column_name) + clean_name = unnest_path(column_name) + + if clean_name != column_name: + clean_name = column_name + cleaner = lambda x: x + else: + cleaner = unnest_path + columns = self.columns - deep_path = self.query_path[0] - for path in self.query_path: + # TODO: '.' IMPLIES ALL FIELDS FROM ABSOLUTE PERPECTIVE, ALL OTHERS ARE A RELATIVE PERSPECTIVE + # TODO: HOW TO REFER TO FIELDS THAT MAY BE SHADOWED BY A RELATIVE NAME? + for path in reversed(self.query_path) if clean_name == "." else self.query_path: output = [ c for c in columns if ( - (c.names['.'] != "_id" or column_name == "_id") and - c.jx_type not in OBJECTS and - startswith_field(unnest_path(c.names[path]), column_name) + (c.name != "_id" or clean_name == "_id") + and ( + ( + c.jx_type == EXISTS + and column_name.endswith("." + EXISTS_TYPE) + ) + or c.jx_type not in OBJECTS + or (clean_name == "." and c.cardinality == 0) + ) + and startswith_field( + cleaner(relative_field(c.name, path)), clean_name + ) ) ] + if output: + return set(output) + return set() + + def new_leaves(self, column_name): + """ + :param column_name: + :return: ALL COLUMNS THAT START WITH column_name, INCLUDING DEEP COLUMNS + """ + column_name = unnest_path(column_name) + columns = self.columns + all_paths = self.snowflake.sorted_query_paths + + output = {} + for c in columns: + if c.name == "_id" and column_name != "_id": + continue + if c.jx_type in OBJECTS: + continue + if c.cardinality == 0: + continue + for path in all_paths: + if not startswith_field( + unnest_path(relative_field(c.name, path)), column_name + ): + continue + existing = output.get(path) + if not existing: + output[path] = [c] + continue + if len(path) > len(c.nested_path[0]): + continue + if any( + "." + t + "." in c.es_column + for t in (STRING_TYPE, NUMBER_TYPE, BOOLEAN_TYPE) + ): + # ELASTICSEARCH field TYPES ARE NOT ALLOWED + continue + # ONLY THE DEEPEST COLUMN WILL BE CHOSEN + output[path].append(c) + return set(output.values()) + + def both_leaves(self, column_name): + old = self.old_leaves(column_name) + new = self.new_leaves(column_name) + + if old != new: + Log.error( + "not the same: {{old}}, {{new}}", + old=[c.name for c in old], + new=[c.name for c in new], + ) + + return new + + def values(self, column_name, exclude_type=STRUCT): + """ + RETURN ALL COLUMNS THAT column_name REFERS TO + """ + column_name = unnest_path(column_name) + columns = self.columns + output = [] + for path in self.query_path: + full_path = untype_path(concat_field(path, column_name)) + for c in columns: + if c.jx_type in exclude_type: + continue + # if c.cardinality == 0: + # continue + if untype_path(c.name) == full_path: + output.append(c) if output: return output return [] - def values(self, column_name): - """ - RETURN ALL COLUMNS THAT column_name REFERES TO - """ - column_name = unnest_path(column_name) - columns = self.columns - deep_path = self.query_path[0] - for path in self.query_path: - output = [ - c - for c in columns - if ( - c.jx_type not in STRUCT and - untype_path(c.names[path]) == column_name - ) - ] - if output: - return output - return output - def __getitem__(self, column_name): return self.values(column_name) @@ -641,7 +1225,7 @@ class Schema(jx_base.Schema): @property def columns(self): - return self.snowflake.namespace.get_columns(literal_field(self.snowflake.name)) + return self.snowflake.columns def map_to_es(self): """ @@ -653,20 +1237,19 @@ class Schema(jx_base.Schema): output, { k: c.es_column - for c in self.snowflake.columns + for c in self.columns if c.jx_type not in STRUCT - for rel_name in [c.names[path]] + for rel_name in [relative_field(c.name, path)] for k in [rel_name, untype_path(rel_name), unnest_path(rel_name)] - } + }, ) return output class Table(jx_base.Table): - def __init__(self, full_name, container): jx_base.Table.__init__(self, full_name) - self.container=container + self.container = container self.schema = container.namespace.get_schema(full_name) @@ -675,50 +1258,20 @@ def _counting_query(c): return {"filter": {"match_all": {}}} elif len(c.nested_path) != 1: return { - "nested": { - "path": c.nested_path[0] # FIRST ONE IS LONGEST - }, + "nested": {"path": c.nested_path[0]}, # FIRST ONE IS LONGEST "aggs": { - "_nested": {"cardinality": { - "field": c.es_column, - "precision_threshold": 10 if c.es_type in elasticsearch.ES_NUMERIC_TYPES else 100 - }} - } + "_nested": { + "cardinality": { + "field": c.es_column, + "precision_threshold": 10 + if c.es_type in elasticsearch.ES_NUMERIC_TYPES + else 100, + } + } + }, } else: - return {"cardinality": { - "field": c.es_column - }} - - -def metadata_tables(): - return wrap( - [ - Column( - names={".": c}, - es_index="meta.tables", - es_column=c, - es_type="string", - nested_path=ROOT_PATH - ) - for c in [ - "name", - "url", - "query_path" - ] - ]+[ - Column( - names={".": c}, - es_index="meta.tables", - es_column=c, - es_type="integer", - nested_path=ROOT_PATH - ) - for c in [ - "timestamp" - ] - ] - ) + return {"cardinality": {"field": c.es_column}} def jx_type(column): @@ -730,4 +1283,148 @@ def jx_type(column): return es_type_to_json_type[column.es_type] +python_type_to_es_type = { + none_type: "undefined", + NullType: "undefined", + bool: "boolean", + str: "string", + text: "string", + int: "integer", + long: "integer", + float: "double", + Data: "object", + dict: "object", + set: "nested", + list: "nested", + FlatList: "nested", + Date: "double", + Decimal: "double", + datetime: "double", + date: "double", +} + +_merge_es_type = { + "undefined": { + "undefined": "undefined", + "boolean": "boolean", + "integer": "integer", + "long": "long", + "float": "float", + "double": "double", + "number": "number", + "string": "string", + "object": "object", + "nested": "nested", + }, + "boolean": { + "undefined": "boolean", + "boolean": "boolean", + "integer": "integer", + "long": "long", + "float": "float", + "double": "double", + "number": "number", + "string": "string", + "object": None, + "nested": None, + }, + "integer": { + "undefined": "integer", + "boolean": "integer", + "integer": "integer", + "long": "long", + "float": "float", + "double": "double", + "number": "number", + "string": "string", + "object": None, + "nested": None, + }, + "long": { + "undefined": "long", + "boolean": "long", + "integer": "long", + "long": "long", + "float": "double", + "double": "double", + "number": "number", + "string": "string", + "object": None, + "nested": None, + }, + "float": { + "undefined": "float", + "boolean": "float", + "integer": "float", + "long": "double", + "float": "float", + "double": "double", + "number": "number", + "string": "string", + "object": None, + "nested": None, + }, + "double": { + "undefined": "double", + "boolean": "double", + "integer": "double", + "long": "double", + "float": "double", + "double": "double", + "number": "number", + "string": "string", + "object": None, + "nested": None, + }, + "number": { + "undefined": "number", + "boolean": "number", + "integer": "number", + "long": "number", + "float": "number", + "double": "number", + "number": "number", + "string": "string", + "object": None, + "nested": None, + }, + "string": { + "undefined": "string", + "boolean": "string", + "integer": "string", + "long": "string", + "float": "string", + "double": "string", + "number": "string", + "string": "string", + "object": None, + "nested": None, + }, + "object": { + "undefined": "object", + "boolean": None, + "integer": None, + "long": None, + "float": None, + "double": None, + "number": None, + "string": None, + "object": "object", + "nested": "nested", + }, + "nested": { + "undefined": "nested", + "boolean": None, + "integer": None, + "long": None, + "float": None, + "double": None, + "number": None, + "string": None, + "object": "nested", + "nested": "nested", + }, +} + + OBJECTS = (OBJECT, EXISTS) diff --git a/vendor/jx_elasticsearch/meta_columns.py b/vendor/jx_elasticsearch/meta_columns.py new file mode 100644 index 0000000..19a867a --- /dev/null +++ b/vendor/jx_elasticsearch/meta_columns.py @@ -0,0 +1,500 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +import jx_base +from jx_base import Column, Table +from jx_base.meta_columns import META_COLUMNS_NAME, META_COLUMNS_TYPE_NAME, SIMPLE_METADATA_COLUMNS, META_COLUMNS_DESC +from jx_base.schema import Schema +from jx_python import jx +from mo_dots import Data, Null, is_data, is_list, unwraplist, wrap, listwrap, split_field +from mo_dots.lists import last +from mo_json import STRUCT, NESTED, OBJECT +from mo_json.typed_encoder import unnest_path, untype_path, untyped, NESTED_TYPE, get_nested_path +from mo_logs import Log +from mo_math import MAX +from mo_threads import Lock, MAIN_THREAD, Queue, Thread, Till +from mo_times import YEAR +from mo_times.dates import Date + +DEBUG = False +singlton = None +REPLICAS = 5 +COLUMN_LOAD_PERIOD = 10 +COLUMN_EXTRACT_PERIOD = 2 * 60 +ID = {"field": ["es_index", "es_column"], "version": "last_updated"} + + +class ColumnList(Table, jx_base.Container): + """ + CENTRAL CONTAINER FOR ALL COLUMNS + SYNCHRONIZED WITH ELASTICSEARCH + OPTIMIZED FOR THE PARTICULAR ACCESS PATTERNS USED + """ + + def __init__(self, es_cluster): + Table.__init__(self, META_COLUMNS_NAME) + self.data = {} # MAP FROM ES_INDEX TO (abs_column_name to COLUMNS) + self.locker = Lock() + self._schema = None + self.dirty = False + self.es_cluster = es_cluster + self.es_index = None + self.last_load = Null + self.for_es_update = Queue( + "update columns to es" + ) # HOLD (action, column) PAIR, WHERE action in ['insert', 'update'] + self._db_load() + Thread.run( + "update " + META_COLUMNS_NAME, self._update_from_es, parent_thread=MAIN_THREAD + ).release() + + def _query(self, query): + result = Data() + curr = self.es_cluster.execute(query) + result.meta.format = "table" + result.header = [d[0] for d in curr.description] if curr.description else None + result.data = curr.fetchall() + return result + + def _db_create(self): + schema = { + "settings": {"index.number_of_shards": 1, "index.number_of_replicas": REPLICAS}, + "mappings": {META_COLUMNS_TYPE_NAME: {}}, + } + + self.es_index = self.es_cluster.create_index( + id=ID, index=META_COLUMNS_NAME, schema=schema + ) + self.es_index.add_alias(META_COLUMNS_NAME) + + for c in META_COLUMNS_DESC.columns: + self._add(c) + self.es_index.add({"value": c.__dict__()}) + + def _db_load(self): + self.last_load = Date.now() + + try: + self.es_index = self.es_cluster.get_index( + id=ID, index=META_COLUMNS_NAME, type=META_COLUMNS_TYPE_NAME, read_only=False + ) + + result = self.es_index.search( + { + "query": { + "bool": { + "should": [ + { + "bool": { + "must_not": { + "exists": {"field": "cardinality.~n~"} + } + } + }, + { # ASSUME UNUSED COLUMNS DO NOT EXIST + "range": {"cardinality.~n~": {"gt": 0}} + }, + ] + } + }, + "sort": ["es_index.~s~", "name.~s~", "es_column.~s~"], + "size": 10000, + } + ) + + Log.note("{{num}} columns loaded", num=result.hits.total) + with self.locker: + for r in result.hits.hits._source: + col = doc_to_column(r) + if col: + self._add(col) + + except Exception as e: + metadata = self.es_cluster.get_metadata(after=Date.now()) + if any(index.startswith(META_COLUMNS_NAME) for index in metadata.indices.keys()): + Log.error("metadata already exists!", cause=e) + + Log.warning("no {{index}} exists, making one", index=META_COLUMNS_NAME, cause=e) + self._db_create() + + def _update_from_es(self, please_stop): + try: + last_extract = Date.now() + while not please_stop: + now = Date.now() + try: + if (now - last_extract).seconds > COLUMN_EXTRACT_PERIOD: + result = self.es_index.search( + { + "query": { + "range": { + "last_updated.~n~": {"gte": self.last_load} + } + }, + "sort": ["es_index.~s~", "name.~s~", "es_column.~s~"], + "from": 0, + "size": 10000, + } + ) + last_extract = now + + with self.locker: + for r in result.hits.hits._source: + c = doc_to_column(r) + if c: + self._add(c) + self.last_load = MAX((self.last_load, c.last_updated)) + + while not please_stop: + updates = self.for_es_update.pop_all() + if not updates: + break + + DEBUG and updates and Log.note( + "{{num}} columns to push to db", num=len(updates) + ) + self.es_index.extend([ + {"value": column.__dict__()} for column in updates + ]) + except Exception as e: + Log.warning("problem updating database", cause=e) + + (Till(seconds=COLUMN_LOAD_PERIOD) | please_stop).wait() + finally: + Log.note("done") + + def find(self, es_index, abs_column_name=None): + with self.locker: + if es_index.startswith("meta."): + self._update_meta() + + if not abs_column_name: + return [c for cs in self.data.get(es_index, {}).values() for c in cs] + else: + return self.data.get(es_index, {}).get(abs_column_name, []) + + def extend(self, columns): + self.dirty = True + with self.locker: + for column in columns: + self._add(column) + + def add(self, column): + self.dirty = True + with self.locker: + canonical = self._add(column) + if canonical == None: + return column # ALREADY ADDED + self.for_es_update.add(canonical) + return canonical + + def remove(self, column, after): + if column.last_updated>after: + return + with self.locker: + canonical = self._add(column) + if canonical: + Log.error("Expecting canonical column to be removed") + mark_as_deleted(column) + DEBUG and Log.note("delete {{col|quote}}, at {{timestamp}}", col=column.es_column, timestamp=column.last_updated) + self.for_es_update.add(column) + + def remove_table(self, table_name): + del self.data[table_name] + + def _add(self, column): + """ + :param column: ANY COLUMN OBJECT + :return: None IF column IS canonical ALREADY (NET-ZERO EFFECT) + """ + columns_for_table = self.data.setdefault(column.es_index, {}) + existing_columns = columns_for_table.setdefault(column.name, []) + + for canonical in existing_columns: + if canonical is column: + return None + if canonical.es_type == column.es_type: + if column.last_updated > canonical.last_updated: + for key in Column.__slots__: + old_value = canonical[key] + new_value = column[key] + if new_value == old_value: + pass # NO NEED TO UPDATE WHEN NO CHANGE MADE (COMMON CASE) + else: + canonical[key] = new_value + return canonical + existing_columns.append(column) + return column + + def _update_meta(self): + if not self.dirty: + return + + now = Date.now() + for mc in META_COLUMNS_DESC.columns: + count = 0 + values = set() + objects = 0 + multi = 1 + for column in self._all_columns(): + value = column[mc.name] + if value == None: + pass + else: + count += 1 + if is_list(value): + multi = max(multi, len(value)) + try: + values |= set(value) + except Exception: + objects += len(value) + elif is_data(value): + objects += 1 + else: + values.add(value) + mc.count = count + mc.cardinality = len(values) + objects + mc.partitions = jx.sort(values) + mc.multi = multi + mc.last_updated = now + + META_COLUMNS_DESC.last_updated = now + self.dirty = False + + def _all_columns(self): + return [ + column + for t, cs in self.data.items() + for _, css in cs.items() + for column in css + ] + + def __iter__(self): + with self.locker: + self._update_meta() + return iter(self._all_columns()) + + def __len__(self): + return self.data[META_COLUMNS_NAME]["es_index"].count + + def update(self, command): + self.dirty = True + try: + command = wrap(command) + DEBUG and Log.note( + "Update {{timestamp}}: {{command|json}}", + command=command, + timestamp=Date(command["set"].last_updated), + ) + eq = command.where.eq + if eq.es_index: + if len(eq) == 1: + if unwraplist(command.clear) == ".": + d = self.data + i = eq.es_index + with self.locker: + cols = d[i] + del d[i] + + for c in cols: + self.remove(c) + return + + # FASTEST + all_columns = self.data.get(eq.es_index, {}).values() + with self.locker: + columns = [c for cs in all_columns for c in cs] + elif eq.es_column and len(eq) == 2: + # FASTER + all_columns = self.data.get(eq.es_index, {}).values() + with self.locker: + columns = [ + c + for cs in all_columns + for c in cs + if c.es_column == eq.es_column + ] + + else: + # SLOWER + all_columns = self.data.get(eq.es_index, {}).values() + with self.locker: + columns = [ + c + for cs in all_columns + for c in cs + if all( + c[k] == v for k, v in eq.items() + ) # THIS LINE IS VERY SLOW + ] + else: + columns = list(self) + columns = jx.filter(columns, command.where) + + with self.locker: + for col in columns: + DEBUG and Log.note( + "update column {{table}}.{{column}}", + table=col.es_index, + column=col.es_column, + ) + for k in command["clear"]: + if k == ".": + mark_as_deleted(col) + self.for_es_update.add(col) + lst = self.data[col.es_index] + cols = lst[col.name] + cols.remove(col) + if len(cols) == 0: + del lst[col.name] + if len(lst) == 0: + del self.data[col.es_index] + break + else: + col[k] = None + else: + # DID NOT DELETE COLUMNM ("."), CONTINUE TO SET PROPERTIES + for k, v in command.set.items(): + col[k] = v + self.for_es_update.add(col) + + except Exception as e: + Log.error("should not happen", cause=e) + + def query(self, query): + # NOT EXPECTED TO BE RUN + Log.error("not") + with self.locker: + self._update_meta() + if not self._schema: + self._schema = Schema( + ".", [c for cs in self.data[META_COLUMNS_NAME].values() for c in cs] + ) + snapshot = self._all_columns() + + from jx_python.containers.list_usingPythonList import ListContainer + + query.frum = ListContainer(META_COLUMNS_NAME, snapshot, self._schema) + return jx.run(query) + + def groupby(self, keys): + with self.locker: + self._update_meta() + return jx.groupby(self.__iter__(), keys) + + def window(self, window): + raise NotImplemented() + + @property + def schema(self): + if not self._schema: + with self.locker: + self._update_meta() + self._schema = Schema( + ".", [c for cs in self.data[META_COLUMNS_NAME].values() for c in cs] + ) + return self._schema + + @property + def namespace(self): + return self + + def get_table(self, table_name): + if table_name != META_COLUMNS_NAME: + Log.error("this container has only the " + META_COLUMNS_NAME) + return self + + def get_columns(self, table_name): + if table_name != META_COLUMNS_NAME: + Log.error("this container has only the " + META_COLUMNS_NAME) + return self._all_columns() + + def denormalized(self): + """ + THE INTERNAL STRUCTURE FOR THE COLUMN METADATA IS VERY DIFFERENT FROM + THE DENORMALIZED PERSPECITVE. THIS PROVIDES THAT PERSPECTIVE FOR QUERIES + """ + with self.locker: + self._update_meta() + output = [ + { + "table": c.es_index, + "name": untype_path(c.name), + "cardinality": c.cardinality, + "es_column": c.es_column, + "es_index": c.es_index, + "last_updated": c.last_updated, + "count": c.count, + "nested_path": [unnest_path(n) for n in c.nested_path], + "es_type": c.es_type, + "type": c.jx_type, + } + for tname, css in self.data.items() + for cname, cs in css.items() + for c in cs + if c.jx_type not in STRUCT # and c.es_column != "_id" + ] + + from jx_python.containers.list_usingPythonList import ListContainer + + return ListContainer( + self.name, + data=output, + schema=jx_base.Schema(META_COLUMNS_NAME, SIMPLE_METADATA_COLUMNS), + ) + + +def doc_to_column(doc): + try: + doc = wrap(untyped(doc)) + if not doc.last_updated: + doc.last_updated = Date.now()-YEAR + + if doc.es_type == None: + if doc.jx_type == OBJECT: + doc.es_type = "object" + else: + Log.warning("{{doc}} has no es_type", doc=doc) + doc.multi = 1001 if doc.es_type == "nested" else doc.multi + + doc.nested_path = tuple(listwrap(doc.nested_path)) + if last(split_field(doc.es_column)) == NESTED_TYPE and doc.es_type != "nested": + doc.es_type = "nested" + doc.jx_type = NESTED + doc.multi = 1001 + doc.last_updated = Date.now() + + expected_nested_path = get_nested_path(doc.es_column) + if len(doc.nested_path) > 1 and doc.nested_path[-2] == '.': + doc.nested_path = doc.nested_path[:-1] + if untype_path(doc.es_column) == doc.es_column: + if doc.nested_path != (".",): + if doc.es_index in {"repo"}: + pass + else: + Log.note("not expected") + doc.nested_path = expected_nested_path + else: + if doc.nested_path != expected_nested_path: + doc.nested_path = expected_nested_path + return Column(**doc) + except Exception: + doc.nested_path = ["."] + mark_as_deleted(Column(**doc)) + return None + + +def mark_as_deleted(col): + col.count = 0 + col.cardinality = 0 + col.multi = 1001 if col.es_type == "nested" else 0, + col.partitions = None + col.last_updated = Date.now() diff --git a/vendor/pyLibrary/env/rollover_index.py b/vendor/jx_elasticsearch/rollover_index.py similarity index 80% rename from vendor/pyLibrary/env/rollover_index.py rename to vendor/jx_elasticsearch/rollover_index.py index 8d191d5..66cba28 100644 --- a/vendor/pyLibrary/env/rollover_index.py +++ b/vendor/jx_elasticsearch/rollover_index.py @@ -4,30 +4,32 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # from __future__ import unicode_literals -from activedata_etl import etl2path -from activedata_etl import key2etl +import re + +from jx_elasticsearch import elasticsearch from jx_python import jx -from jx_python.containers.list_usingPythonList import ListContainer -from mo_dots import coalesce, wrap, Null -from mo_json import json2value, value2json, CAN_NOT_DECODE_JSON +from mo_dots import Null, coalesce, wrap +from mo_dots.lists import last +from mo_future import items, sort_using_key +from mo_json import CAN_NOT_DECODE_JSON, json2value, value2json from mo_kwargs import override from mo_logs import Log -from mo_logs.exceptions import suppress_exception, Except +from mo_logs.exceptions import Except from mo_math.randoms import Random -from mo_threads import Lock +from mo_threads import Lock, Thread from mo_times.dates import Date, unicode2Date, unix2Date from mo_times.durations import Duration from mo_times.timer import Timer -from pyLibrary.aws.s3 import strip_extension, KEY_IS_WRONG_FORMAT -from pyLibrary.env import elasticsearch +from pyLibrary.aws.s3 import KEY_IS_WRONG_FORMAT, strip_extension MAX_RECORD_LENGTH = 400000 DATA_TOO_OLD = "data is too old to be indexed" -DEBUG=False +DEBUG = False + class RolloverIndex(object): """ @@ -40,21 +42,25 @@ class RolloverIndex(object): rollover_field, # the FIELD with a timestamp to use for determining which index to push to rollover_interval, # duration between roll-over to new index rollover_max, # remove old indexes, do not add old records + schema, # es schema queue_size=10000, # number of documents to queue in memory batch_size=5000, # number of documents to push at once typed=None, # indicate if we are expected typed json kwargs=None # plus additional ES settings ): if kwargs.tjson != None: - Log.error + Log.error("not expected") if typed == None: Log.error("not expected") + schema.settings.index.max_result_window = 100000 # REQUIRED FOR ACTIVEDATA NESTED QUERIES + schema.settings.index.max_inner_result_window = 100000 # REQUIRED FOR ACTIVEDATA NESTED QUERIES + self.settings = kwargs self.locker = Lock("lock for rollover_index") self.rollover_field = jx.get(rollover_field) - self.rollover_interval = self.settings.rollover_interval = Duration(kwargs.rollover_interval) - self.rollover_max = self.settings.rollover_max = Duration(kwargs.rollover_max) + self.rollover_interval = self.settings.rollover_interval = Duration(rollover_interval) + self.rollover_max = self.settings.rollover_max = Duration(rollover_max) self.known_queues = {} # MAP DATE TO INDEX self.cluster = elasticsearch.Cluster(self.settings) @@ -76,19 +82,23 @@ class RolloverIndex(object): with self.locker: queue = self.known_queues.get(rounded_timestamp.unix) if queue == None: - candidates = jx.run({ - "from": ListContainer('.', self.cluster.get_aliases()), - "where": {"regex": {"index": self.settings.index + "\d\d\d\d\d\d\d\d_\d\d\d\d\d\d"}}, - "sort": "index" - }) + candidates = wrap(sort_using_key( + filter( + lambda r: re.match( + re.escape(self.settings.index) + r"\d\d\d\d\d\d\d\d_\d\d\d\d\d\d$", + r['index'] + ), + self.cluster.get_aliases() + ), + key=lambda r: r['index'] + )) best = None for c in candidates: - c = wrap(c) c.date = unicode2Date(c.index[-15:], elasticsearch.INDEX_DATE_FORMAT) if timestamp > c.date: best = c if not best or rounded_timestamp > best.date: - if rounded_timestamp < wrap(candidates[-1]).date: + if rounded_timestamp < wrap(last(candidates)).date: es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings) else: try: @@ -102,8 +112,13 @@ class RolloverIndex(object): else: es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings) - with suppress_exception: - es.set_refresh_interval(seconds=60 * 5, timeout=5) + def refresh(please_stop): + try: + es.set_refresh_interval(seconds=coalesce(Duration(self.settings.refresh_interval).seconds, 60 * 10), timeout=5) + except Exception: + Log.note("Could not set refresh interval for {{index}}", index=es.settings.index) + + Thread.run("refresh", refresh).release() self._delete_old_indexes(candidates) threaded_queue = es.threaded_queue(max_size=self.settings.queue_size, batch_size=self.settings.batch_size, silent=True) @@ -129,6 +144,7 @@ class RolloverIndex(object): # ADD keys() SO ETL LOOP CAN FIND WHAT'S GETTING REPLACED def keys(self, prefix=None): + from activedata_etl import etl2path, key2etl path = jx.reverse(etl2path(key2etl(prefix))) if self.cluster.version.startswith(("5.", "6.")): @@ -152,6 +168,9 @@ class RolloverIndex(object): return set() def extend(self, documents, queue=None): + if len(documents) == 0: + return + i = 0 if queue == None: for i, doc in enumerate(documents): @@ -189,7 +208,7 @@ class RolloverIndex(object): queue = None pending = [] # FOR WHEN WE DO NOT HAVE QUEUE YET for key in keys: - timer = Timer("Process {{key}}", param={"key": key}, silent=not DEBUG) + timer = Timer("Process {{key}}", param={"key": key}, verbose=DEBUG) try: with timer: for rownum, line in enumerate(source.read_lines(strip_extension(key))): @@ -199,14 +218,18 @@ class RolloverIndex(object): if rownum > 0 and rownum % 1000 == 0: Log.note("Ingested {{num}} records from {{key}} in bucket {{bucket}}", num=rownum, key=key, bucket=source.name) - row, please_stop = fix(rownum, line, source, sample_only_filter, sample_size) - if row == None: + insert_me, please_stop = fix(key, rownum, line, source, sample_only_filter, sample_size) + if insert_me == None: continue + value = insert_me['value'] + + if '_id' not in value: + Log.warning("expecting an _id in all S3 records. If missing, there can be duplicates") if queue == None: - queue = self._get_queue(row) + queue = self._get_queue(insert_me) if queue == None: - pending.append(row) + pending.append(insert_me) if len(pending) > 1000: if done_copy: done_copy() @@ -219,7 +242,7 @@ class RolloverIndex(object): pending = [] num_keys += 1 - queue.add(row) + queue.add(insert_me) if please_stop: break @@ -242,14 +265,14 @@ class RolloverIndex(object): else: queue.add(done_copy) - if pending: + if [p for p in pending if wrap(p).value.task.state not in ('failed', 'exception')]: Log.error("Did not find an index for {{alias}} to place the data for key={{key}}", key=tuple(keys)[0], alias=self.settings.index) Log.note("{{num}} keys from {{key|json}} added", num=num_keys, key=keys) return num_keys -def fix(rownum, line, source, sample_only_filter, sample_size): +def fix(source_key, rownum, line, source, sample_only_filter, sample_size): """ :param rownum: :param line: @@ -262,7 +285,7 @@ def fix(rownum, line, source, sample_only_filter, sample_size): if rownum == 0: if len(line) > MAX_RECORD_LENGTH: - _shorten(value, source) + _shorten(source_key, value, source) value = _fix(value) if sample_only_filter and Random.int(int(1.0/coalesce(sample_size, 0.01))) != 0 and jx.filter([value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE @@ -271,7 +294,7 @@ def fix(rownum, line, source, sample_only_filter, sample_size): row = {"value": value} return row, True elif len(line) > MAX_RECORD_LENGTH: - _shorten(value, source) + _shorten(source_key, value, source) value = _fix(value) elif '"resource_usage":' in line: value = _fix(value) @@ -280,7 +303,7 @@ def fix(rownum, line, source, sample_only_filter, sample_size): return row, False -def _shorten(value, source): +def _shorten(source_key, value, source): if source.name.startswith("active-data-test-result"): value.result.subtests = [s for s in value.result.subtests if s.ok is False] value.result.missing_subtests = True @@ -295,7 +318,7 @@ def _shorten(value, source): else: pass # NOT A PROBLEM else: - Log.warning("Monstrous {{name}} record {{id}} of length {{length}}", id=value._id, name=source.name, length=shorter_length) + Log.warning("Monstrous {{name}} record {{id}} of length {{length}}", id=source_key, name=source.name, length=shorter_length) def _fix(value): diff --git a/vendor/pyLibrary/env/typed_inserter.py b/vendor/jx_elasticsearch/typed_inserter.py similarity index 55% rename from vendor/pyLibrary/env/typed_inserter.py rename to vendor/jx_elasticsearch/typed_inserter.py index 9c14f5c..cfea155 100644 --- a/vendor/pyLibrary/env/typed_inserter.py +++ b/vendor/jx_elasticsearch/typed_inserter.py @@ -5,49 +5,47 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from collections import Mapping - -from jx_python.expressions import jx_expression_to_function -from mo_dots import Data, unwrap -from pyLibrary.env.elasticsearch import parse_properties, random_id - -from mo_json import json2value +from jx_elasticsearch.elasticsearch import parse_properties, random_id +from jx_python import jx +from mo_dots import Data, ROOT_PATH, is_data, unwrap +from mo_future import text +from mo_json import NESTED, OBJECT, json2value, value2json from mo_json.encoder import UnicodeBuilder -from mo_json.typed_encoder import typed_encode, OBJECT, NESTED +from mo_json.typed_encoder import typed_encode class TypedInserter(object): - def __init__(self, es=None, id_expression="_id"): + def __init__(self, es=None, id_info=None): self.es = es - self.id_column = id_expression - self.get_id = jx_expression_to_function(id_expression) - self.remove_id = True if id_expression == "_id" else False + self.id_info = id_info + self.get_id = jx.get(id_info.field) + self.get_version = jx.get(id_info.version) if es: _schema = Data() - for c in parse_properties(es.settings.alias, ".", es.get_properties()): - if c.es_type not in (OBJECT, NESTED): - _schema[c.names["."]] = c + for c in parse_properties(es.settings.alias, ".", ROOT_PATH, es.get_properties()): + if c.es_type in (OBJECT, NESTED): + _schema[c.name] = {} + else: + _schema[c.name] = c self.schema = unwrap(_schema) else: self.schema = {} - def typed_encode(self, r): + def typed_encode(self, record): """ :param record: expecting id and value properties :return: dict with id and json properties """ try: - value = r['value'] - if "json" in r: - value = json2value(r["json"]) - elif isinstance(value, Mapping) or value != None: + value = record.get('value') + if "json" in record: + value = json2value(record["json"]) + elif is_data(value) or value != None: pass else: from mo_logs import Log @@ -56,26 +54,29 @@ class TypedInserter(object): _buffer = UnicodeBuilder(1024) net_new_properties = [] path = [] - if isinstance(value, Mapping): + if is_data(value): given_id = self.get_id(value) - if self.remove_id: - value['_id'] = None + if given_id != None and not isinstance(given_id, text): + given_id = value2json(given_id) + value['_id'] = None + version = self.get_version(value) else: given_id = None + version = None if given_id: - record_id = r.get('id') + record_id = record.get('id') if record_id and record_id != given_id: from mo_logs import Log raise Log.error( "expecting {{property}} of record ({{record_id|quote}}) to match one given ({{given|quote}})", - property=self.id_column, + property=self.id_info, record_id=record_id, given=given_id ) else: - record_id = r.get('id') + record_id = record.get('id') if record_id: given_id = record_id else: @@ -84,11 +85,7 @@ class TypedInserter(object): typed_encode(value, self.schema, path, net_new_properties, _buffer) json = _buffer.build() - for props in net_new_properties: - path, type = props[:-1], props[-1][1:] - # self.es.add_column(join_field(path), type) - - return {"id": given_id, "json": json} + return given_id, version, json except Exception as e: # THE PRETTY JSON WILL PROVIDE MORE DETAIL ABOUT THE SERIALIZATION CONCERNS from mo_logs import Log diff --git a/vendor/jx_python/containers/cube.py b/vendor/jx_python/containers/cube.py index b433190..d8fb634 100644 --- a/vendor/jx_python/containers/cube.py +++ b/vendor/jx_python/containers/cube.py @@ -5,23 +5,20 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from collections import Mapping - -import mo_dots as dot -from mo_dots import Null, Data, FlatList, wrap, wrap_leaves, listwrap -from mo_logs import Log -from mo_math import MAX, OR -from mo_collections.matrix import Matrix from jx_base.container import Container +from jx_base.query import _normalize_edge from jx_python.cubes.aggs import cube_aggs from jx_python.lists.aggs import is_aggs -from jx_base.query import _normalize_edge +from mo_collections.matrix import Matrix +from mo_dots import Data, FlatList, Null, is_data, is_list, listwrap, wrap, wrap_leaves +import mo_dots as dot +from mo_future import is_text, transpose +from mo_logs import Log +from mo_math import MAX, OR class Cube(Container): @@ -36,7 +33,7 @@ class Cube(Container): ALLOWED, USING THE select AND edges TO DESCRIBE THE data """ - self.is_value = False if isinstance(select, list) else True + self.is_value = False if is_list(select) else True self.select = select self.meta = Data(format="cube") # PUT EXTRA MARKUP HERE self.is_none = False @@ -45,37 +42,37 @@ class Cube(Container): is_none = True # ENSURE frum IS PROPER FORM - if isinstance(select, list): + if is_list(select): if edges and OR(not isinstance(v, Matrix) for v in data.values()): Log.error("Expecting data to be a dict with Matrix values") if not edges: if not data: - if isinstance(select, list): + if is_list(select): Log.error("not expecting a list of records") data = {select.name: Matrix.ZERO} self.edges = FlatList.EMPTY - elif isinstance(data, Mapping): + elif is_data(data): # EXPECTING NO MORE THAN ONE rownum EDGE IN THE DATA length = MAX([len(v) for v in data.values()]) if length >= 1: self.edges = wrap([{"name": "rownum", "domain": {"type": "rownum"}}]) else: self.edges = FlatList.EMPTY - elif isinstance(data, list): - if isinstance(select, list): + elif is_list(data): + if is_list(select): Log.error("not expecting a list of records") data = {select.name: Matrix.wrap(data)} self.edges = wrap([{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(data), "interval": 1}}]) elif isinstance(data, Matrix): - if isinstance(select, list): + if is_list(select): Log.error("not expecting a list of records") data = {select.name: data} else: - if isinstance(select, list): + if is_list(select): Log.error("not expecting a list of records") data = {select.name: Matrix(value=data)} @@ -148,7 +145,7 @@ class Cube(Container): return Null if self.edges: Log.error("can not get value of with dimension") - if isinstance(self.select, list): + if is_list(self.select): Log.error("can not get value of multi-valued cubes") return self.data[self.select.name].cube @@ -205,7 +202,7 @@ class Cube(Container): # EDGE REMOVES THAT EDGE FROM THIS RESULT, OR ADDS THE PART # AS A select {"name":edge.name, "value":edge.domain.partitions[coord]} # PROBABLY NOT, THE value IS IDENTICAL OVER THE REMAINING - if isinstance(item, Mapping): + if is_data(item): coordinates = [None] * len(self.edges) # MAP DICT TO NUMERIC INDICES @@ -232,7 +229,7 @@ class Cube(Container): data={k: Matrix(values=c.__getitem__(coordinates)) for k, c in self.data.items()} ) return output - elif isinstance(item, text_type): + elif is_text(item): # RETURN A VALUE CUBE if self.is_value: if item != self.select.name: @@ -320,7 +317,11 @@ class Cube(Container): getKey = [e.domain.getKey for e in self.edges] lookup = [[getKey[i](p) for p in e.domain.partitions+([None] if e.allowNulls else [])] for i, e in enumerate(self.edges)] - if isinstance(self.select, list): + def coord2term(coord): + output = wrap_leaves({keys[i]: lookup[i][c] for i, c in enumerate(coord)}) + return output + + if is_list(self.select): selects = listwrap(self.select) index, v = transpose(*self.data[selects[0].name].groupby(selector)) @@ -375,7 +376,7 @@ class Cube(Container): output = wrap_leaves({keys[i]: lookup[i][c] for i, c in enumerate(coord)}) return output - if isinstance(self.select, list): + if is_list(self.select): selects = listwrap(self.select) index, v = transpose(*self.data[selects[0].name].groupby(selector)) diff --git a/vendor/jx_python/containers/doc_store.py b/vendor/jx_python/containers/doc_store.py deleted file mode 100644 index d582b1d..0000000 --- a/vendor/jx_python/containers/doc_store.py +++ /dev/null @@ -1,252 +0,0 @@ -# encoding: utf-8 -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http://mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from copy import copy -from datetime import datetime - -from mo_future import text_type -from mo_dots import wrap, Data, FlatList, literal_field -from mo_json.typed_encoder import TYPE_PREFIX -from mo_logs import Log -from pyLibrary import convert -from jx_python import jx -from jx_python.containers import Container -from jx_python.expressions import Variable, Literal -from jx_base.query import QueryOp - -INDEX = "__index__" -PARENT = "__parent__" - -class DocStore(Container): - """ - SIMPLE COLUMNAR DATASTORE, EVERYTHING IS INDEXED, WITH QUERY INTERFACE - HOPE IS IT WILL USE NUMPY - """ - def __init__(self, uid="_id"): - self._uid = uid # COLUMN NAME HOLDING UID - self._source = [] # ORDERED LIST OF ALL wrapped DOCUMENTS - self._index = {} # MAP FROM PROPERTY_NAME -> (VALUE -> set(OF _source INDEXES) - self._unique_index = {} # MAP FROM _UID TO _source INDEX - - def add(self, doc): - doc = wrap(copy(doc)) - _id = doc[self._uid] - if _id == None: - _source_index = _id = doc[self._uid] = len(self._source) - else: - _source_index = self._unique_index[_id] - existing = self._source[_source_index] - self._unindex_values(existing, _source_index) - self._source.append(doc) - self._index_values(doc, _source_index) - - def update(self, clear, set, where): - doc_list = self._filter(where) - self._update(clear, set, doc_list) - - def upsert(self, clear, set, where): - doc_list = self._filter(where) - if not doc_list: - self.add(set) - else: - self._update(clear, set, doc_list) - - def _update(self, clear, set, doc_list): - for _source_index in doc_list: - existing = self._source[_source_index] - self._unindex_values(existing, _source_index) - for c in clear: - existing[c] = None - for k, v in set.items(): - existing[k] = v - self._index_values(existing, _source_index) - - def _index_values(self, doc, start_index, parent_index=-1, prefix=""): - curr_index = doc[INDEX] = start_index - doc[PARENT] = parent_index - _index = self._index - - for k, v in doc.items(): - k = literal_field(k) - _type = _type_map[v.__class__] - if _type == "object": - self._index_values(v, start_index, prefix=k + ".") - v = "." - elif _type == "nested": - for vv in v: - curr_index = self._index_values(vv, curr_index + 1, start_index, prefix=k + ".") - _type = "object" - v = "." - - typed_key = k + "." + TYPE_PREFIX + _type - i = _index.get(typed_key) - if i is None: - i = _index[typed_key] = {} - j = i.get(v) - if j is None: - j = i[v] = set() - j |= {start_index} - return curr_index - - def _unindex_values(self, existing, _source_index): - self._unique_index[existing[self._uid]] = None - for k, v in existing.leaves(): - self._index[k][v] -= {_source_index} - - def query(self, query): - query = QueryOp.wrap(query) - short_list = self._filter(query.where) - if query.sort: - short_list = self._sort(query.sort) - - if isinstance(query.select, list): - accessors = map(jx.get, query.select.value) - - if query.window: - for w in query.window: - window_list = self._filter(w.where) - - def _edges(self, short_list, edges): - edge_values = self._index_columns(edges) - - def _index_columns(self, columns): - # INDEX ALL COLUMNS, ESPECIALLY THOSE FUNCTION RESULTS - indexed_values = [None]*len(columns) - for i, s in enumerate(columns): - index = self._index.get(s.value, None) - if index is not None: - indexed_values[i]=index - continue - - function_name = value2json(s.value.__data__(), sort_keys=True) - index = self._index.get(function_name, None) - indexed_values[i]=index - if index is not None: - continue - - indexed_values[i] = index = self._index[function_name] = {} - accessor = jx.get(s.value) - for k, ii in self._unique_index.items(): - v = accessor(self._source[ii]) - j = index.get(v) - if j is None: - j = index[v] = set() - j |= {ii} - return indexed_values - - def _sort(self, short_list, sorts): - """ - TAKE SHORTLIST, RETURN IT SORTED - :param short_list: - :param sorts: LIST OF SORTS TO PERFORM - :return: - """ - - sort_values = self._index_columns(sorts) - - # RECURSIVE SORTING - output = [] - def _sort_more(short_list, i, sorts): - if len(sorts) == 0: - output.extend(short_list) - - sort = sorts[0] - - index = self._index[sort_values[i]] - if sort.sort == 1: - sorted_keys = sorted(index.keys()) - elif sort.sort == -1: - sorted_keys = reversed(sorted(index.keys())) - else: - sorted_keys = list(index.keys()) - - for k in sorted_keys: - self._sort(index[k] & short_list, i + 1, sorts[1:]) - - _sort_more(short_list, 0, sorts) - return output - - def filter(self, where): - return self.where(where) - - def where(self, where): - return self.query({"from": self, "where": where}) - - def sort(self, sort): - return self.query({"from": self, "sort": sort}) - - def select(self, select): - return self.query({"from": self, "select": select}) - - def window(self, window): - return self.query({"from": self, "window": window}) - - def having(self, having): - _ = having - raise NotImplementedError() - - def format(self, format): - if format == "list": - return { - "meta": {"format": "list"}, - "data": [self._source[i] for i in self._unique_index.values()] - } - elif format == "table": - columns = list(self._index.keys()) - data = [[self._source[i].get(c, None) for c in columns] for i in self._unique_index.values()] - return { - "meta": {"format": "table"}, - "header": columns, - "data": data - } - elif format == "cube": - Log.error("not supported") - - def get_leaves(self, table_name): - return {"name":c for c in self._index.keys()} - - def _filter(self, where): - return filters[where.name](self, where) - - def _eq(self, op): - if isinstance(op.lhs, Variable) and isinstance(op.rhs, Literal): - return copy(self._index[op.lhs][op.rhs]) - - def _and(self, op): - if not op.terms: - return self._unique_index.values() - - agg = filters[op.name](self, op.terms[0]) - for t in op.terms[1:]: - agg &= filters[op.name](self, t) - return agg - - def _true(self, op): - return self._unique_index.values() - - -filters={ - "eq": DocStore._eq, - "and": DocStore._and, - "true": DocStore._true -} - -_type_map = { - text_type: "text", - int: "long", - float: "real", - datetime: "real", - list: "nested", - FlatList: "nested", - dict: "object", - Data: "object" -} diff --git a/vendor/jx_python/containers/list_usingPythonList.py b/vendor/jx_python/containers/list_usingPythonList.py index ca3b007..f1ae483 100644 --- a/vendor/jx_python/containers/list_usingPythonList.py +++ b/vendor/jx_python/containers/list_usingPythonList.py @@ -5,30 +5,27 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import itertools -from collections import Mapping +from copy import copy import jx_base from jx_base import Container -from jx_base.expressions import jx_expression, Expression, Variable, TRUE -from jx_python.expression_compiler import compile_expression +from jx_base.expressions import TRUE, Variable +from jx_base.language import is_expression, is_op +from jx_base.meta_columns import get_schema_from_list +from jx_base.schema import Schema +from jx_python.convert import list2cube, list2table from jx_python.expressions import jx_expression_to_function from jx_python.lists.aggs import is_aggs, list_aggs -from jx_python.meta import get_schema_from_list from mo_collections import UniqueIndex -from mo_dots import Data, wrap, listwrap, unwraplist, unwrap, Null -from mo_future import sort_using_key +from mo_dots import Data, Null, is_data, is_list, listwrap, unwrap, unwraplist, wrap, coalesce +from mo_future import first, sort_using_key from mo_logs import Log from mo_threads import Lock -from pyLibrary import convert - -_get = object.__getattribute__ class ListContainer(Container, jx_base.Namespace, jx_base.Table): @@ -43,7 +40,7 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table): self._schema = get_schema_from_list(name, data) else: self._schema = schema - self.name = name + self.name = coalesce(name, ".") self.data = data self.locker = Lock() # JUST IN CASE YOU WANT TO DO MORE THAN ONE THING @@ -73,7 +70,7 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table): output = self if is_aggs(q): output = list_aggs(output.data, q) - else: # SETOP + else: try: if q.filter != None or q.esfilter != None: Log.error("use 'where' clause") @@ -88,7 +85,8 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table): if q.select: output = output.select(q.select) - #TODO: ADD EXTRA COLUMN DESCRIPTIONS TO RESULTING SCHEMA + + # TODO: ADD EXTRA COLUMN DESCRIPTIONS TO RESULTING SCHEMA for param in q.window: output.window(param) @@ -96,14 +94,14 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table): if q.format == "list": return Data(data=output.data, meta={"format": "list"}) elif q.format == "table": - head = [c.names['.'] for c in output.schema.columns] + head = [c.name for c in output.schema.columns] data = [ - [r if h == '.' else r[h] for h in head] + [r if h == "." else r[h] for h in head] for r in output.data ] return Data(header=head, data=data, meta={"format": "table"}) elif q.format == "cube": - head = [c.names['.'] for c in output.schema.columns] + head = [c.name for c in output.schema.columns] rows = [ [r[h] for h in head] for r in output.data @@ -144,10 +142,10 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table): return self.where(where) def where(self, where): - if isinstance(where, Mapping): - temp = compile_expression(jx_expression(where).to_python()) - elif isinstance(where, Expression): - temp = compile_expression(where.to_python()) + if is_data(where): + temp = jx_expression_to_function(where) + elif is_expression(where): + temp = jx_expression_to_function(where) else: temp = where @@ -161,7 +159,7 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table): :param select: the variable to extract from list :return: a simple list of the extraction """ - if isinstance(select, list): + if is_list(select): return [(d[s] for s in select) for d in self.data] else: return [d[select] for d in self.data] @@ -169,20 +167,20 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table): def select(self, select): selects = listwrap(select) - if len(selects) == 1 and isinstance(selects[0].value, Variable) and selects[0].value.var == ".": + if len(selects) == 1 and is_op(selects[0].value, Variable) and selects[0].value.var == ".": new_schema = self.schema if selects[0].name == ".": return self else: new_schema = None - if isinstance(select, list): + if is_list(select): if all( - isinstance(s.value, Variable) and s.name == s.value.var + is_op(s.value, Variable) and s.name == s.value.var for s in select ): names = set(s.value.var for s in select) - new_schema = Schema(".", [c for c in self.schema.columns if c.names['.'] in names]) + new_schema = Schema(".", [c for c in self.schema.columns if c.name in names]) push_and_pull = [(s.name, jx_expression_to_function(s.value)) for s in selects] def selector(d): @@ -191,10 +189,14 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table): output[n] = unwraplist(p(wrap(d))) return unwrap(output) - new_data = map(selector, self.data) + new_data = list(map(selector, self.data)) else: select_value = jx_expression_to_function(select.value) - new_data = map(select_value, self.data) + new_data = list(map(select_value, self.data)) + if is_op(select.value, Variable): + column = copy(first(c for c in self.schema.columns if c.name == select.value.var)) + column.name = '.' + new_schema = Schema("from " + self.name, [column]) return ListContainer("from "+self.name, data=new_data, schema=new_schema) @@ -203,15 +205,11 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table): jx.window(self.data, window) return self - def having(self, having): - _ = having - raise NotImplementedError() - def format(self, format): if format == "table": - frum = convert.list2table(self.data, self._schema.lookup.keys()) + frum = list2table(self.data, self._schema.lookup.keys()) elif format == "cube": - frum = convert.list2cube(self.data, self.schema.lookup.keys()) + frum = list2cube(self.data, self.schema.lookup.keys()) else: frum = self.__data__() @@ -242,10 +240,16 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table): self.data.extend(documents) def __data__(self): - return wrap({ - "meta": {"format": "list"}, - "data": [{k: unwraplist(v) for k, v in row.items()} for row in self.data] - }) + if first(self.schema.columns).name=='.': + return wrap({ + "meta": {"format": "list"}, + "data": self.data + }) + else: + return wrap({ + "meta": {"format": "list"}, + "data": [{k: unwraplist(v) for k, v in row.items()} for row in self.data] + }) def get_columns(self, table_name=None): return self.schema.values() @@ -264,8 +268,6 @@ class ListContainer(Container, jx_base.Namespace, jx_base.Table): def __len__(self): return len(self.data) - # class Namespace(jx_base.Namespace): - def get_snowflake(self, name): if self.name != name: Log.error("This container only has table by name of {{name}}", name=name) @@ -291,15 +293,10 @@ def _exec(code): Log.error("Could not execute {{code|quote}}", code=code, cause=e) - - -from jx_base.schema import Schema from jx_python import jx - DUAL = ListContainer( name="dual", data=[{}], - schema=Schema(table_name="dual", columns=UniqueIndex(keys=("names.\\.",))) + schema=Schema(table_name="dual", columns=UniqueIndex(keys=("name",))) ) - diff --git a/vendor/jx_python/convert.py b/vendor/jx_python/convert.py new file mode 100644 index 0000000..973d153 --- /dev/null +++ b/vendor/jx_python/convert.py @@ -0,0 +1,78 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +from __future__ import absolute_import, division, unicode_literals + +from mo_dots import wrap, unwraplist +from mo_future import text +from mo_json import value2json +from mo_logs.strings import expand_template + + +def list2cube(rows, column_names=None): + if column_names: + keys = column_names + else: + columns = set() + for r in rows: + columns |= set(r.keys()) + keys = list(columns) + + data = {k: [] for k in keys} + output = wrap({ + "meta": {"format": "cube"}, + "edges": [ + { + "name": "rownum", + "domain": {"type": "rownum", "min": 0, "max": len(rows), "interval": 1} + } + ], + "data": data + }) + + for r in rows: + for k in keys: + data[k].append(unwraplist(r[k])) + + return output + + +def list2table(rows, column_names=None): + if column_names: + keys = list(set(column_names)) + else: + columns = set() + for r in rows: + columns |= set(r.keys()) + keys = list(columns) + + output = [[unwraplist(r.get(k)) for k in keys] for r in rows] + + return wrap({ + "meta": {"format": "table"}, + "header": keys, + "data": output + }) + + +def table2csv(table_data): + """ + :param table_data: expecting a list of tuples + :return: text in nice formatted csv + """ + text_data = [tuple(value2json(vals, pretty=True) for vals in rows) for rows in table_data] + + col_widths = [max(len(t) for t in cols) for cols in zip(*text_data)] + template = ", ".join( + "{{" + text(i) + "|left_align(" + text(w) + ")}}" + for i, w in enumerate(col_widths) + ) + output = "\n".join(expand_template(template, d) for d in text_data) + return output diff --git a/vendor/jx_python/cubes/__init__.py b/vendor/jx_python/cubes/__init__.py index 9358683..e69de29 100644 --- a/vendor/jx_python/cubes/__init__.py +++ b/vendor/jx_python/cubes/__init__.py @@ -1 +0,0 @@ -__author__ = 'kyle' diff --git a/vendor/jx_python/cubes/aggs.py b/vendor/jx_python/cubes/aggs.py index 084e7c0..b5bb86a 100644 --- a/vendor/jx_python/cubes/aggs.py +++ b/vendor/jx_python/cubes/aggs.py @@ -5,21 +5,18 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http:# mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import itertools +from jx_base.domains import DefaultDomain, SimpleSetDomain from jx_python import windows -from mo_dots import listwrap -from mo_logs import Log - -from jx_base.domains import SimpleSetDomain, DefaultDomain from jx_python.expressions import jx_expression_to_function from mo_collections.matrix import Matrix +from mo_dots import listwrap +from mo_logs import Log def cube_aggs(frum, query): diff --git a/vendor/jx_python/expression_compiler.py b/vendor/jx_python/expression_compiler.py index 580151a..4d70ffe 100644 --- a/vendor/jx_python/expression_compiler.py +++ b/vendor/jx_python/expression_compiler.py @@ -5,58 +5,57 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http:# mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division import re -from pyLibrary import convert -from mo_logs import Log -from mo_dots import coalesce, Data, listwrap, wrap_leaves +from mo_future import first +from mo_dots import Data, coalesce, is_data, listwrap, wrap_leaves +from mo_logs import Log, strings from mo_times.dates import Date -true = True -false = False -null = None -EMPTY_DICT = {} +GLOBALS = { + "true": True, + "false": False, + "null": None, + "EMPTY_DICT": {}, + "coalesce": coalesce, + "listwrap": listwrap, + "Date": Date, + "Log": Log, + "Data": Data, + "re": re, + "wrap_leaves": wrap_leaves, + "is_data": is_data, + "first": first +} -def compile_expression(source): +def compile_expression(source, function_name="output"): """ THIS FUNCTION IS ON ITS OWN FOR MINIMAL GLOBAL NAMESPACE :param source: PYTHON SOURCE CODE + :param function_name: OPTIONAL NAME TO GIVE TO OUTPUT FUNCTION :return: PYTHON FUNCTION """ - # FORCE MODULES TO BE IN NAMESPACE - _ = coalesce - _ = listwrap - _ = Date - _ = convert - _ = Log - _ = Data - _ = EMPTY_DICT - _ = re - _ = wrap_leaves - fake_locals = {} try: exec( -""" -def output(row, rownum=None, rows=None): - _source = """ + convert.value2quote(source) + """ - try: - return """ + source + """ - except Exception as e: - Log.error("Problem with dynamic function {{func|quote}}", func=_source, cause=e) -""", - globals(), - fake_locals + ( + "def " + function_name + "(row, rownum=None, rows=None):\n" + + " _source = " + strings.quote(source) + "\n" + + " try:\n" + + " return " + source + "\n" + + " except Exception as e:\n" + + " Log.error(u'Problem with dynamic function {{func|quote}}', func=_source, cause=e)\n" + ), + GLOBALS, + fake_locals, ) except Exception as e: - Log.error("Bad source: {{source}}", source=source, cause=e) - return fake_locals['output'] + Log.error(u"Bad source: {{source}}", source=source, cause=e) + return fake_locals["output"] diff --git a/vendor/jx_python/expressions.py b/vendor/jx_python/expressions.py deleted file mode 100644 index ecd489b..0000000 --- a/vendor/jx_python/expressions.py +++ /dev/null @@ -1,369 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from collections import Mapping - -from mo_future import text_type -from mo_dots import split_field -from mo_dots import unwrap -from mo_json import json2value -from mo_logs import Log -from mo_logs.strings import quote -from pyLibrary import convert - -from jx_base.expressions import Variable, DateOp, TupleOp, LeavesOp, BinaryOp, OrOp, ScriptOp, \ - InequalityOp, extend, RowsOp, OffsetOp, GetOp, Literal, NullOp, TrueOp, FalseOp, DivOp, FloorOp, \ - EqOp, NeOp, NotOp, LengthOp, NumberOp, StringOp, CountOp, MultiOp, RegExpOp, CoalesceOp, MissingOp, ExistsOp, \ - PrefixOp, NotLeftOp, RightOp, NotRightOp, FindOp, BetweenOp, RangeOp, CaseOp, AndOp, \ - ConcatOp, InOp, jx_expression, Expression, WhenOp, MaxOp, SplitOp, NULL, SelectOp, SuffixOp, LastOp, IntegerOp, BasicEqOp -from jx_python.expression_compiler import compile_expression -from mo_times.dates import Date - - -def jx_expression_to_function(expr): - """ - RETURN FUNCTION THAT REQUIRES PARAMETERS (row, rownum=None, rows=None): - """ - if isinstance(expr, Expression): - if isinstance(expr, ScriptOp) and not isinstance(expr.script, text_type): - return expr.script - else: - return compile_expression(expr.to_python()) - if expr != None and not isinstance(expr, (Mapping, list)) and hasattr(expr, "__call__"): - return expr - return compile_expression(jx_expression(expr).to_python()) - - -@extend(Variable) -def to_python(self, not_null=False, boolean=False, many=False): - path = split_field(self.var) - agg = "row" - if not path: - return agg - elif path[0] in ["row", "rownum"]: - # MAGIC VARIABLES - agg = path[0] - path = path[1:] - if len(path) == 0: - return agg - elif path[0] == "rows": - if len(path) == 1: - return "rows" - elif path[1] in ["first", "last"]: - agg = "rows." + path[1] + "()" - path = path[2:] - else: - Log.error("do not know what {{var}} of `rows` is", var=path[1]) - - for p in path[:-1]: - if not_null: - agg = agg + ".get(" + convert.value2quote(p) + ")" - else: - agg = agg + ".get(" + convert.value2quote(p) + ", EMPTY_DICT)" - output = agg + ".get(" + convert.value2quote(path[-1]) + ")" - if many: - output = "listwrap(" + output + ")" - return output - - -@extend(OffsetOp) -def to_python(self, not_null=False, boolean=False, many=False): - return "row[" + text_type(self.var) + "] if 0<=" + text_type(self.var) + " ", None), + "gte": (" >= ", None), + "lte": (" <= ", None), + "lt": (" < ", None), +} diff --git a/vendor/jx_python/expressions/add_op.py b/vendor/jx_python/expressions/add_op.py new file mode 100644 index 0000000..02f4b46 --- /dev/null +++ b/vendor/jx_python/expressions/add_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import AddOp as AddOp_ +from jx_python.expressions._utils import multiop_to_python + + +class AddOp(AddOp_): + to_python = multiop_to_python diff --git a/vendor/jx_python/expressions/and_op.py b/vendor/jx_python/expressions/and_op.py new file mode 100644 index 0000000..2dd2815 --- /dev/null +++ b/vendor/jx_python/expressions/and_op.py @@ -0,0 +1,24 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import AndOp as AndOp_ +from jx_python.expressions._utils import Python +from jx_python.expressions.boolean_op import BooleanOp + + +class AndOp(AndOp_): + def to_python(self, not_null=False, boolean=False, many=False): + if not self.terms: + return "True" + else: + return " and ".join( + "(" + BooleanOp(Python[t]).to_python() + ")" for t in self.terms + ) diff --git a/vendor/jx_python/expressions/basic_eq_op.py b/vendor/jx_python/expressions/basic_eq_op.py new file mode 100644 index 0000000..126f200 --- /dev/null +++ b/vendor/jx_python/expressions/basic_eq_op.py @@ -0,0 +1,24 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import BasicEqOp as BasicEqOp_ +from jx_python.expressions._utils import Python + + +class BasicEqOp(BasicEqOp_): + def to_python(self, not_null=False, boolean=False, many=False): + return ( + "(" + + Python[self.rhs].to_python() + + ") == (" + + Python[self.lhs].to_python() + + ")" + ) diff --git a/vendor/jx_python/expressions/basic_index_of_op.py b/vendor/jx_python/expressions/basic_index_of_op.py new file mode 100644 index 0000000..55d4632 --- /dev/null +++ b/vendor/jx_python/expressions/basic_index_of_op.py @@ -0,0 +1,27 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import BasicIndexOfOp as BasicIndexOfOp_ +from jx_python.expressions._utils import with_var, Python + + +class BasicIndexOfOp(BasicIndexOfOp_): + def to_python(self, not_null=False, boolean=False, many=False): + return with_var( + "f", + "(" + + Python[self.value].to_python() + + ").find" + + "(" + + Python[self.find].to_python() + + ")", + "None if f==-1 else f", + ) diff --git a/vendor/jx_python/expressions/between_op.py b/vendor/jx_python/expressions/between_op.py new file mode 100644 index 0000000..542d1ad --- /dev/null +++ b/vendor/jx_python/expressions/between_op.py @@ -0,0 +1,22 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import BetweenOp as BetweenOp_ +from jx_python.expressions._utils import Python + + +class BetweenOp(BetweenOp_): + def to_python(self, not_null=False, boolean=False, many=False): + return ( + Python[self.value].to_python() + + " in " + + Python[self.superset].to_python(many=True) + ) diff --git a/vendor/jx_python/expressions/boolean_op.py b/vendor/jx_python/expressions/boolean_op.py new file mode 100644 index 0000000..db48a3f --- /dev/null +++ b/vendor/jx_python/expressions/boolean_op.py @@ -0,0 +1,22 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import BooleanOp as BooleanOp_ +from jx_python.expressions._utils import with_var, Python + + +class BooleanOp(BooleanOp_): + def to_python(self, not_null=False, boolean=False, many=False): + return with_var( + "f", + Python[self.term].to_python(), + "bool(f)", + ) diff --git a/vendor/jx_python/expressions/case_op.py b/vendor/jx_python/expressions/case_op.py new file mode 100644 index 0000000..c0bfb85 --- /dev/null +++ b/vendor/jx_python/expressions/case_op.py @@ -0,0 +1,29 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import CaseOp as CaseOp_ +from jx_python.expressions._utils import Python + + +class CaseOp(CaseOp_): + def to_python(self, not_null=False, boolean=False, many=False): + acc = Python[self.whens[-1]].to_python() + for w in reversed(self.whens[0:-1]): + acc = ( + "(" + + Python[w.then].to_python() + + ") if (" + + Python[w.when].to_python(boolean=True) + + ") else (" + + acc + + ")" + ) + return acc diff --git a/vendor/jx_python/expressions/coalesce_op.py b/vendor/jx_python/expressions/coalesce_op.py new file mode 100644 index 0000000..673f12f --- /dev/null +++ b/vendor/jx_python/expressions/coalesce_op.py @@ -0,0 +1,20 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import CoalesceOp as CoalesceOp_ +from jx_python.expressions._utils import Python + + +class CoalesceOp(CoalesceOp_): + def to_python(self, not_null=False, boolean=False, many=False): + return ( + "coalesce(" + (", ".join(Python[t].to_python() for t in self.terms)) + ")" + ) diff --git a/vendor/jx_python/expressions/concat_op.py b/vendor/jx_python/expressions/concat_op.py new file mode 100644 index 0000000..b4149aa --- /dev/null +++ b/vendor/jx_python/expressions/concat_op.py @@ -0,0 +1,30 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ConcatOp as ConcatOp_ +from jx_python.expressions._utils import Python + + +class ConcatOp(ConcatOp_): + def to_python(self, not_null=False, boolean=False, many=False): + v = Python[self.value].to_python() + l = Python[self.length].to_python() + return ( + "None if " + + v + + " == None or " + + l + + " == None else " + + v + + "[0:max(0, " + + l + + ")]" + ) diff --git a/vendor/jx_python/expressions/count_op.py b/vendor/jx_python/expressions/count_op.py new file mode 100644 index 0000000..c68f3cf --- /dev/null +++ b/vendor/jx_python/expressions/count_op.py @@ -0,0 +1,22 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import CountOp as CountOp_ +from jx_python.expressions._utils import Python + + +class CountOp(CountOp_): + def to_python(self, not_null=False, boolean=False, many=False): + return ( + "sum(((0 if v==None else 1) for v in " + + Python[self.terms].to_python(not_null=False, boolean=False, many=True) + + "), 0)" + ) diff --git a/vendor/jx_python/expressions/date_op.py b/vendor/jx_python/expressions/date_op.py new file mode 100644 index 0000000..f974640 --- /dev/null +++ b/vendor/jx_python/expressions/date_op.py @@ -0,0 +1,19 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import DateOp as DateOp_ +from mo_future import text +from mo_times.dates import Date + + +class DateOp(DateOp_): + def to_python(self, not_null=False, boolean=False, many=False): + return text(Date(self.value).unix) diff --git a/vendor/jx_python/expressions/div_op.py b/vendor/jx_python/expressions/div_op.py new file mode 100644 index 0000000..345b88b --- /dev/null +++ b/vendor/jx_python/expressions/div_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import DivOp as DivOp_ +from jx_python.expressions._utils import _binaryop_to_python + + +class DivOp(DivOp_): + to_python = _binaryop_to_python diff --git a/vendor/jx_python/expressions/eq_op.py b/vendor/jx_python/expressions/eq_op.py new file mode 100644 index 0000000..ba1a860 --- /dev/null +++ b/vendor/jx_python/expressions/eq_op.py @@ -0,0 +1,24 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import EqOp as EqOp_ +from jx_python.expressions._utils import Python + + +class EqOp(EqOp_): + def to_python(self, not_null=False, boolean=False, many=False): + return ( + "(" + + Python[self.rhs].to_python() + + ") in listwrap(" + + Python[self.lhs].to_python() + + ")" + ) diff --git a/vendor/jx_python/expressions/exists_op.py b/vendor/jx_python/expressions/exists_op.py new file mode 100644 index 0000000..ebd012d --- /dev/null +++ b/vendor/jx_python/expressions/exists_op.py @@ -0,0 +1,18 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ExistsOp as ExistsOp_ +from jx_python.expressions._utils import Python + + +class ExistsOp(ExistsOp_): + def to_python(self, not_null=False, boolean=False, many=False): + return Python[self.field].to_python() + " != None" diff --git a/vendor/jx_python/expressions/exp_op.py b/vendor/jx_python/expressions/exp_op.py new file mode 100644 index 0000000..bd5abe0 --- /dev/null +++ b/vendor/jx_python/expressions/exp_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ExpOp as ExpOp_ +from jx_python.expressions._utils import _binaryop_to_python + + +class ExpOp(ExpOp_): + to_python = _binaryop_to_python diff --git a/vendor/jx_python/expressions/false_op.py b/vendor/jx_python/expressions/false_op.py new file mode 100644 index 0000000..46c789e --- /dev/null +++ b/vendor/jx_python/expressions/false_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import FalseOp as FalseOp_ + + +class FalseOp(FalseOp_): + def to_python(self, not_null=False, boolean=False, many=False): + return "False" diff --git a/vendor/jx_python/expressions/find_op.py b/vendor/jx_python/expressions/find_op.py new file mode 100644 index 0000000..c5bd565 --- /dev/null +++ b/vendor/jx_python/expressions/find_op.py @@ -0,0 +1,74 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import FindOp as FindOp_, simplified +from jx_python.expressions._utils import with_var, Python +from jx_python.expressions.and_op import AndOp +from jx_python.expressions.basic_eq_op import BasicEqOp +from jx_python.expressions.basic_index_of_op import BasicIndexOfOp +from jx_python.expressions.eq_op import EqOp +from jx_python.expressions.literal import Literal +from jx_python.expressions.or_op import OrOp +from jx_python.expressions.when_op import WhenOp + + +class FindOp(FindOp_): + @simplified + def partial_eval(self): + index = self.lang[ + BasicIndexOfOp([self.value, self.find, self.start]) + ].partial_eval() + + output = self.lang[ + WhenOp( + OrOp( + [ + self.value.missing(), + self.find.missing(), + BasicEqOp([index, Literal(-1)]), + ] + ), + **{"then": self.default, "else": index} + ) + ].partial_eval() + return output + + def missing(self): + output = AndOp( + [ + self.default.missing(), + OrOp( + [ + self.value.missing(), + self.find.missing(), + EqOp( + [ + BasicIndexOfOp([self.value, self.find, self.start]), + Literal(-1), + ] + ), + ] + ), + ] + ).partial_eval() + return output + + def to_python(self, not_null=False, boolean=False, many=False): + return with_var( + "f", + "(" + + Python[self.value].to_python() + + ").find" + + "(" + + Python[self.find].to_python() + + ")", + "None if f==-1 else f", + ) diff --git a/vendor/jx_python/expressions/first_op.py b/vendor/jx_python/expressions/first_op.py new file mode 100644 index 0000000..9a085de --- /dev/null +++ b/vendor/jx_python/expressions/first_op.py @@ -0,0 +1,19 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import FirstOp as FirstOp_ +from jx_python.expressions._utils import Python + + +class FirstOp(FirstOp_): + def to_python(self, not_null=False, boolean=False, many=False): + value = Python[self.term].to_python() + return "listwrap(" + value + ")[0]" diff --git a/vendor/jx_python/expressions/floor_op.py b/vendor/jx_python/expressions/floor_op.py new file mode 100644 index 0000000..345cbf2 --- /dev/null +++ b/vendor/jx_python/expressions/floor_op.py @@ -0,0 +1,24 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import FloorOp as FloorOp_ +from jx_python.expressions._utils import Python + + +class FloorOp(FloorOp_): + def to_python(self, not_null=False, boolean=False, many=False): + return ( + "mo_math.floor(" + + Python[self.lhs].to_python() + + ", " + + Python[self.rhs].to_python() + + ")" + ) diff --git a/vendor/jx_python/expressions/get_op.py b/vendor/jx_python/expressions/get_op.py new file mode 100644 index 0000000..eb877e9 --- /dev/null +++ b/vendor/jx_python/expressions/get_op.py @@ -0,0 +1,21 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import GetOp as GetOp_ +from jx_python.expressions._utils import Python + + +class GetOp(GetOp_): + def to_python(self, not_null=False, boolean=False, many=False): + output = ["(" + Python[self.var].to_python() + ")"] + for o in self.offsets: + output.append("[" + Python[o].to_python() + "]") + return "".join(output) diff --git a/vendor/jx_python/expressions/gt_op.py b/vendor/jx_python/expressions/gt_op.py new file mode 100644 index 0000000..59c1871 --- /dev/null +++ b/vendor/jx_python/expressions/gt_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import GtOp as GtOp_ +from jx_python.expressions._utils import _inequality_to_python + + +class GtOp(GtOp_): + to_python = _inequality_to_python diff --git a/vendor/jx_python/expressions/gte_op.py b/vendor/jx_python/expressions/gte_op.py new file mode 100644 index 0000000..44669a8 --- /dev/null +++ b/vendor/jx_python/expressions/gte_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import GteOp as GteOp_ +from jx_python.expressions._utils import _inequality_to_python + + +class GteOp(GteOp_): + to_python = _inequality_to_python diff --git a/vendor/jx_python/expressions/in_op.py b/vendor/jx_python/expressions/in_op.py new file mode 100644 index 0000000..456873b --- /dev/null +++ b/vendor/jx_python/expressions/in_op.py @@ -0,0 +1,22 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import InOp as InOp_ +from jx_python.expressions._utils import Python + + +class InOp(InOp_): + def to_python(self, not_null=False, boolean=False, many=False): + return ( + Python[self.value].to_python() + + " in " + + Python[self.superset].to_python(many=True) + ) diff --git a/vendor/jx_python/expressions/integer_op.py b/vendor/jx_python/expressions/integer_op.py new file mode 100644 index 0000000..d7ad57b --- /dev/null +++ b/vendor/jx_python/expressions/integer_op.py @@ -0,0 +1,18 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import IntegerOp as IntegerOp_ +from jx_python.expressions._utils import Python + + +class IntegerOp(IntegerOp_): + def to_python(self, not_null=False, boolean=False, many=False): + return "int(" + Python[self.term].to_python() + ")" diff --git a/vendor/jx_python/expressions/last_op.py b/vendor/jx_python/expressions/last_op.py new file mode 100644 index 0000000..637cebb --- /dev/null +++ b/vendor/jx_python/expressions/last_op.py @@ -0,0 +1,19 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import LastOp as LastOp_ +from jx_python.expressions._utils import Python + + +class LastOp(LastOp_): + def to_python(self, not_null=False, boolean=False, many=False): + term = Python[self.term].to_python() + return "last(" + term + ")" diff --git a/vendor/jx_python/expressions/leaves_op.py b/vendor/jx_python/expressions/leaves_op.py new file mode 100644 index 0000000..c0c20f0 --- /dev/null +++ b/vendor/jx_python/expressions/leaves_op.py @@ -0,0 +1,18 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import LeavesOp as LeavesOp_ +from jx_python.expressions._utils import Python + + +class LeavesOp(LeavesOp_): + def to_python(self, not_null=False, boolean=False, many=False): + return "Data(" + Python[self.term].to_python() + ").leaves()" diff --git a/vendor/jx_python/expressions/length_op.py b/vendor/jx_python/expressions/length_op.py new file mode 100644 index 0000000..dee8ee8 --- /dev/null +++ b/vendor/jx_python/expressions/length_op.py @@ -0,0 +1,19 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import LengthOp as LengthOp_ +from jx_python.expressions._utils import Python + + +class LengthOp(LengthOp_): + def to_python(self, not_null=False, boolean=False, many=False): + value = Python[self.term].to_python() + return "len(" + value + ") if (" + value + ") != None else None" diff --git a/vendor/jx_python/expressions/literal.py b/vendor/jx_python/expressions/literal.py new file mode 100644 index 0000000..79bcca8 --- /dev/null +++ b/vendor/jx_python/expressions/literal.py @@ -0,0 +1,20 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import Literal as Literal_ +from mo_dots import unwrap +from mo_future import text +from mo_json import json2value + + +class Literal(Literal_): + def to_python(self, not_null=False, boolean=False, many=False): + return text(repr(unwrap(json2value(self.json)))) diff --git a/vendor/jx_python/expressions/lt_op.py b/vendor/jx_python/expressions/lt_op.py new file mode 100644 index 0000000..0d20761 --- /dev/null +++ b/vendor/jx_python/expressions/lt_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import LtOp as LtOp_ +from jx_python.expressions._utils import _inequality_to_python + + +class LtOp(LtOp_): + to_python = _inequality_to_python diff --git a/vendor/jx_python/expressions/lte_op.py b/vendor/jx_python/expressions/lte_op.py new file mode 100644 index 0000000..004bc42 --- /dev/null +++ b/vendor/jx_python/expressions/lte_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import LteOp as LteOp_ +from jx_python.expressions._utils import _inequality_to_python + + +class LteOp(LteOp_): + to_python = _inequality_to_python diff --git a/vendor/jx_python/expressions/max_op.py b/vendor/jx_python/expressions/max_op.py new file mode 100644 index 0000000..4121ba3 --- /dev/null +++ b/vendor/jx_python/expressions/max_op.py @@ -0,0 +1,18 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import MaxOp as MaxOp_ +from jx_python.expressions._utils import Python + + +class MaxOp(MaxOp_): + def to_python(self, not_null=False, boolean=False, many=False): + return "max([" + (",".join(Python[t].to_python() for t in self.terms)) + "])" diff --git a/vendor/jx_python/expressions/missing_op.py b/vendor/jx_python/expressions/missing_op.py new file mode 100644 index 0000000..01a7929 --- /dev/null +++ b/vendor/jx_python/expressions/missing_op.py @@ -0,0 +1,18 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import MissingOp as MissingOp_ +from jx_python.expressions._utils import Python + + +class MissingOp(MissingOp_): + def to_python(self, not_null=False, boolean=False, many=False): + return Python[self.expr].to_python() + " == None" diff --git a/vendor/jx_python/expressions/mod_op.py b/vendor/jx_python/expressions/mod_op.py new file mode 100644 index 0000000..93232f7 --- /dev/null +++ b/vendor/jx_python/expressions/mod_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import ModOp as ModOp_ +from jx_python.expressions._utils import _binaryop_to_python + + +class ModOp(ModOp_): + to_python = _binaryop_to_python diff --git a/vendor/jx_python/expressions/mul_op.py b/vendor/jx_python/expressions/mul_op.py new file mode 100644 index 0000000..7fcd5a2 --- /dev/null +++ b/vendor/jx_python/expressions/mul_op.py @@ -0,0 +1,17 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import MulOp as MulOp_ +from jx_python.expressions._utils import multiop_to_python + + +class MulOp(MulOp_): + to_python = multiop_to_python diff --git a/vendor/jx_python/expressions/ne_op.py b/vendor/jx_python/expressions/ne_op.py new file mode 100644 index 0000000..7c612a1 --- /dev/null +++ b/vendor/jx_python/expressions/ne_op.py @@ -0,0 +1,21 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import NeOp as NeOp_ +from jx_python.expressions._utils import Python, with_var + + +class NeOp(NeOp_): + def to_python(self, not_null=False, boolean=False, many=False): + lhs = Python[self.lhs].to_python() + rhs = Python[self.rhs].to_python() + + return with_var("r, l", "("+lhs+","+rhs+")", "l!=None and r!=None and l!=r") diff --git a/vendor/jx_python/expressions/not_left_op.py b/vendor/jx_python/expressions/not_left_op.py new file mode 100644 index 0000000..6d909e2 --- /dev/null +++ b/vendor/jx_python/expressions/not_left_op.py @@ -0,0 +1,30 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import NotLeftOp as NotLeftOp_ +from jx_python.expressions._utils import Python + + +class NotLeftOp(NotLeftOp_): + def to_python(self, not_null=False, boolean=False, many=False): + v = Python[self.value].to_python() + l = Python[self.length].to_python() + return ( + "None if " + + v + + " == None or " + + l + + " == None else " + + v + + "[max(0, " + + l + + "):]" + ) diff --git a/vendor/jx_python/expressions/not_op.py b/vendor/jx_python/expressions/not_op.py new file mode 100644 index 0000000..4279b0a --- /dev/null +++ b/vendor/jx_python/expressions/not_op.py @@ -0,0 +1,19 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import NotOp as NotOp_ +from jx_python.expressions._utils import Python +from jx_python.expressions.boolean_op import BooleanOp + + +class NotOp(NotOp_): + def to_python(self, not_null=False, boolean=False, many=False): + return "not (" + BooleanOp(Python[self.term]).to_python(boolean=True) + ")" diff --git a/vendor/jx_python/expressions/not_right_op.py b/vendor/jx_python/expressions/not_right_op.py new file mode 100644 index 0000000..0ce9957 --- /dev/null +++ b/vendor/jx_python/expressions/not_right_op.py @@ -0,0 +1,32 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import NotRightOp as NotRightOp_ +from jx_python.expressions._utils import Python + + +class NotRightOp(NotRightOp_): + def to_python(self, not_null=False, boolean=False, many=False): + v = Python[self.value].to_python() + l = Python[self.length].to_python() + return ( + "None if " + + v + + " == None or " + + l + + " == None else " + + v + + "[0:max(0, len(" + + v + + ")-(" + + l + + "))]" + ) diff --git a/vendor/jx_python/expressions/number_op.py b/vendor/jx_python/expressions/number_op.py new file mode 100644 index 0000000..1271adc --- /dev/null +++ b/vendor/jx_python/expressions/number_op.py @@ -0,0 +1,43 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions.number_op import NumberOp as NumberOp_ +from jx_base.expressions.true_op import TRUE +from jx_python.expressions import _utils +from jx_python.expressions._utils import Python +from mo_json import NUMBER_TYPES + + +class NumberOp(NumberOp_): + def to_python(self, not_null=False, boolean=False, many=False): + term = Python[self.term] + if not_null: + if term.type in NUMBER_TYPES: + return term.to_python(not_null=True) + else: + return "float(" + Python[self.term].to_python(not_null=True) + ")" + else: + exists = self.term.exists() + value = Python[self.term].to_python(not_null=True) + + if exists is TRUE: + return "float(" + value + ")" + else: + return ( + "float(" + + value + + ") if (" + + Python[exists].to_python() + + ") else None" + ) + + +_utils.NumberOp = NumberOp diff --git a/vendor/jx_python/expressions/offset_op.py b/vendor/jx_python/expressions/offset_op.py new file mode 100644 index 0000000..d1ccf0b --- /dev/null +++ b/vendor/jx_python/expressions/offset_op.py @@ -0,0 +1,24 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http:# mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import absolute_import, division, unicode_literals + +from jx_base.expressions import OffsetOp as OffsetOp_ +from mo_future import text + + +class OffsetOp(OffsetOp_): + def to_python(self, not_null=False, boolean=False, many=False): + return ( + "row[" + + text(self.var) + + "] if 0<=" + + text(self.var) + + " 0: - break - i += 1 +def _groupby_value(data): + start = 0 + prev = data[0] + for i, d in enumerate(data): + curr = d + if curr != prev: + yield prev, data[start:i:] + start = i + prev = curr + yield prev, data[start::] -def groupby_Multiset(data, min_size, max_size): +def _groupby_keys(data, key_paths, accessors): + start = 0 + prev = accessors(data[0]) + for i, d in enumerate(data): + curr = accessors(d) + if curr != prev: + group = {} + for k, gg in zip(key_paths, prev): + group[k] = gg + yield Data(group), data[start:i:] + start = i + prev = curr + group = {} + for k, gg in zip(key_paths, prev): + group[k] = gg + yield Data(group), data[start::] + + +def groupby_multiset(data, min_size, max_size): # GROUP multiset BASED ON POPULATION OF EACH KEY, TRYING TO STAY IN min/max LIMITS if min_size == None: min_size = 0 @@ -142,15 +116,16 @@ def groupby_Multiset(data, min_size, max_size): yield (i, g) -def groupby_min_max_size(data, min_size=0, max_size=None, ): - if max_size == None: - max_size = sys.maxint +def chunk(data, size=0): + if not size: + return [data] - if isinstance(data, (bytearray, text_type, binary_type, list)): + if data.__class__ in list_types + (tuple, bytearray, text, binary_type): + # USE SLICING def _iter(): - num = int(math.ceil(len(data)/max_size)) + num = int(math.ceil(len(data) / size)) for i in range(num): - output = (i, data[i * max_size:i * max_size + max_size:]) + output = (i, data[i * size:i * size + size:]) yield output return _iter() @@ -158,26 +133,24 @@ def groupby_min_max_size(data, min_size=0, max_size=None, ): elif hasattr(data, "__iter__"): def _iter(): g = 0 - out = FlatList() + out = [] try: for i, d in enumerate(data): out.append(d) - if (i + 1) % max_size == 0: - yield g, out + if (i + 1) % size == 0: + yield g, FlatList(vals=out) g += 1 - out = FlatList() + out = [] if out: - yield g, out + yield g, FlatList(vals=out) except Exception as e: e = Except.wrap(e) if out: # AT LEAST TRY TO RETURN WHAT HAS BEEN PROCESSED SO FAR yield g, out - Log.error("Problem inside jx.groupby", e) + Log.error("Problem inside jx.chunk", e) return _iter() - elif not isinstance(data, Multiset): - return groupby_size(data, max_size) else: - return groupby_Multiset(data, min_size, max_size) + Log.error("not supported") diff --git a/vendor/jx_python/jx.py b/vendor/jx_python/jx.py index bda1d40..a56dc67 100644 --- a/vendor/jx_python/jx.py +++ b/vendor/jx_python/jx.py @@ -5,41 +5,32 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals - -_range = range - -from mo_times import Date -from collections import Mapping from jx_base import query -from jx_python import expressions as _expressions -from jx_python import flat_list, group_by -from mo_dots import listwrap, wrap, unwrap, FlatList, NullType -from mo_dots import set_default, Null, Data, split_field, coalesce, join_field -from mo_future import text_type, boolean_type, none_type, long, generator_types, sort_using_cmp, PY2 -from mo_logs import Log -from mo_math import Math -from mo_math import UNION, MIN -from pyLibrary import convert - -import mo_dots from jx_base.container import Container -from jx_base.expressions import TRUE, FALSE, NullOp +from jx_base.expressions import FALSE, TRUE from jx_base.query import QueryOp, _normalize_selects +from jx_base.language import is_op, value_compare +from jx_python import expressions as _expressions, flat_list, group_by from jx_python.containers.cube import Cube +from jx_python.convert import list2table, list2cube from jx_python.cubes.aggs import cube_aggs from jx_python.expression_compiler import compile_expression -from jx_python.expressions import jx_expression_to_function +from jx_python.expressions import jx_expression_to_function as get from jx_python.flat_list import PartFlatList from mo_collections.index import Index from mo_collections.unique_index import UniqueIndex +import mo_dots +from mo_dots import Data, FlatList, Null, coalesce, is_container, is_data, is_list, is_many, join_field, listwrap, set_default, split_field, unwrap, wrap from mo_dots.objects import DataObject +from mo_future import is_text, sort_using_cmp +from mo_logs import Log +import mo_math +from mo_math import MIN, UNION # A COLLECTION OF DATABASE OPERATORS (RELATIONAL ALGEBRA OPERATORS) # JSON QUERY EXPRESSION DOCUMENTATION: https://github.com/klahnakoski/jx/tree/master/docs @@ -47,47 +38,44 @@ from mo_dots.objects import DataObject # TODO: USE http://docs.sqlalchemy.org/en/latest/core/tutorial.html AS DOCUMENTATION FRAMEWORK builtin_tuple = tuple +_range = range _Column = None _merge_type = None _ = _expressions -def get(expr): - """ - RETURN FUNCTION FOR EXPRESSION - """ - return jx_expression_to_function(expr) - - def run(query, container=Null): """ THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER, BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer """ if container == None: - container = wrap(query)['from'] + container = wrap(query)["from"] query_op = QueryOp.wrap(query, container=container, namespace=container.schema) else: query_op = QueryOp.wrap(query, container, container.namespace) if container == None: from jx_python.containers.list_usingPythonList import DUAL + return DUAL.query(query_op) elif isinstance(container, Container): return container.query(query_op) - elif isinstance(container, (list, set) + generator_types): + elif is_many(container): container = wrap(list(container)) elif isinstance(container, Cube): if is_aggs(query_op): return cube_aggs(container, query_op) - elif isinstance(container, QueryOp): + elif is_op(container, QueryOp): container = run(container) - elif isinstance(container, Mapping): + elif is_data(container): query = container - container = query['from'] + container = query["from"] container = run(QueryOp.wrap(query, container, container.namespace), container) else: - Log.error("Do not know how to handle {{type}}", type=container.__class__.__name__) + Log.error( + "Do not know how to handle {{type}}", type=container.__class__.__name__ + ) if is_aggs(query_op): container = list_aggs(container, query_op) @@ -110,31 +98,34 @@ def run(query, container=Null): # AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT if query_op.format == "cube": - container = convert.list2cube(container) + container = list2cube(container) elif query_op.format == "table": - container = convert.list2table(container) + container = list2table(container) container.meta.format = "table" else: - container = wrap({ - "meta": {"format": "list"}, - "data": container - }) + container = wrap({"meta": {"format": "list"}, "data": container}) return container groupby = group_by.groupby +chunk = group_by.chunk def index(data, keys=None): -# return dict that uses keys to index data + # return dict that uses keys to index data o = Index(keys) if isinstance(data, Cube): - if data.edges[0].name==keys[0]: - #QUICK PATH + if data.edges[0].name == keys[0]: + # QUICK PATH names = list(data.data.keys()) - for d in (set_default(mo_dots.zip(names, r), {keys[0]: p}) for r, p in zip(zip(*data.data.values()), data.edges[0].domain.partitions.value)): + for d in ( + set_default(mo_dots.zip(names, r), {keys[0]: p}) + for r, p in zip( + zip(*data.data.values()), data.edges[0].domain.partitions.value + ) + ): o.add(d) return o else: @@ -157,19 +148,20 @@ def unique_index(data, keys=None, fail_on_dup=True): o.add(d) except Exception as e: o.add(d) - Log.error("index {{index}} is not unique {{key}} maps to both {{value1}} and {{value2}}", - index= keys, - key= select([d], keys)[0], - value1= o[d], - value2= d, - cause=e + Log.error( + "index {{index}} is not unique {{key}} maps to both {{value1}} and {{value2}}", + index=keys, + key=select([d], keys)[0], + value1=o[d], + value2=d, + cause=e, ) return o def map2set(data, relation): """ - EXPECTING A isinstance(relation, Mapping) THAT MAPS VALUES TO lists + EXPECTING A is_data(relation) THAT MAPS VALUES TO lists THE LISTS ARE EXPECTED TO POINT TO MEMBERS OF A SET A set() IS RETURNED """ @@ -178,7 +170,7 @@ def map2set(data, relation): if isinstance(relation, Data): Log.error("Does not accept a Data") - if isinstance(relation, Mapping): + if is_data(relation): try: # relation[d] is expected to be a list # return set(cod for d in data for cod in relation[d]) @@ -215,20 +207,20 @@ def tuple(data, field_name): if isinstance(data, FlatList): Log.error("not supported yet") - if isinstance(field_name, Mapping) and "value" in field_name: + if is_data(field_name) and "value" in field_name: # SIMPLIFY {"value":value} AS STRING field_name = field_name["value"] # SIMPLE PYTHON ITERABLE ASSUMED - if isinstance(field_name, text_type): + if is_text(field_name): if len(split_field(field_name)) == 1: - return [(d[field_name], ) for d in data] + return [(d[field_name],) for d in data] else: path = split_field(field_name) output = [] flat_list._tuple1(data, path, 0, output) return output - elif isinstance(field_name, list): + elif is_list(field_name): paths = [_select_a_field(f) for f in field_name] output = FlatList() _tuple((), unwrap(data), paths, 0, output) @@ -265,16 +257,16 @@ def _tuple_deep(v, field, depth, record): field = {"name":name, "value":["attribute", "path"]} r[field.name]=v[field.value], BUT WE MUST DEAL WITH POSSIBLE LIST IN field.value PATH """ - if hasattr(field.value, '__call__'): - return 0, None, record + (field.value(v), ) + if hasattr(field.value, "__call__"): + return 0, None, record + (field.value(v),) - for i, f in enumerate(field.value[depth:len(field.value) - 1:]): + for i, f in enumerate(field.value[depth : len(field.value) - 1 :]): v = v.get(f) - if isinstance(v, list): + if is_list(v): return depth + i + 1, v, record f = field.value.last() - return 0, None, record + (v.get(f), ) + return 0, None, record + (v.get(f),) def select(data, field_name): @@ -288,12 +280,14 @@ def select(data, field_name): return data.select(field_name) if isinstance(data, UniqueIndex): - data = data._data.values() # THE SELECT ROUTINE REQUIRES dicts, NOT Data WHILE ITERATING + data = ( + data._data.values() + ) # THE SELECT ROUTINE REQUIRES dicts, NOT Data WHILE ITERATING - if isinstance(data, Mapping): + if is_data(data): return select_one(data, field_name) - if isinstance(field_name, Mapping): + if is_data(field_name): field_name = wrap(field_name) if field_name.value in ["*", "."]: return data @@ -303,7 +297,7 @@ def select(data, field_name): field_name = field_name.value # SIMPLE PYTHON ITERABLE ASSUMED - if isinstance(field_name, text_type): + if is_text(field_name): path = split_field(field_name) if len(path) == 1: return FlatList([d[field_name] for d in data]) @@ -311,7 +305,7 @@ def select(data, field_name): output = FlatList() flat_list._select1(data, path, 0, output) return output - elif isinstance(field_name, list): + elif is_list(field_name): keys = [_select_a_field(wrap(f)) for f in field_name] return _select(Data(), unwrap(data), keys, 0) else: @@ -320,9 +314,9 @@ def select(data, field_name): def _select_a_field(field): - if isinstance(field, text_type): + if is_text(field): return wrap({"name": field, "value": split_field(field)}) - elif isinstance(wrap(field).value, text_type): + elif is_text(wrap(field).value): field = wrap(field) return wrap({"name": field.name, "value": split_field(field.value)}) else: @@ -334,8 +328,8 @@ def _select(template, data, fields, depth): deep_path = [] deep_fields = UniqueIndex(["name"]) for d in data: - if isinstance(d, Data): - Log.error("programmer error, _select can not handle Data") + if d.__class__ is Data: + Log.error("programmer error, _select can not handle Data, only dict") record = template.copy() children = None @@ -364,18 +358,18 @@ def _select_deep(v, field, depth, record): field = {"name":name, "value":["attribute", "path"]} r[field.name]=v[field.value], BUT WE MUST DEAL WITH POSSIBLE LIST IN field.value PATH """ - if hasattr(field.value, '__call__'): + if hasattr(field.value, "__call__"): try: record[field.name] = field.value(wrap(v)) except Exception as e: record[field.name] = None return 0, None - for i, f in enumerate(field.value[depth:len(field.value) - 1:]): + for i, f in enumerate(field.value[depth : len(field.value) - 1 :]): v = v.get(f) if v is None: return 0, None - if isinstance(v, list): + if is_list(v): return depth + i + 1, v f = field.value.last() @@ -385,7 +379,9 @@ def _select_deep(v, field, depth, record): else: record[field.name] = v.get(f) except Exception as e: - Log.error("{{value}} does not have {{field}} property", value= v, field=f, cause=e) + Log.error( + "{{value}} does not have {{field}} property", value=v, field=f, cause=e + ) return 0, None @@ -396,26 +392,31 @@ def _select_deep_meta(field, depth): RETURN FUNCTION THAT PERFORMS THE MAPPING """ name = field.name - if hasattr(field.value, '__call__'): + if hasattr(field.value, "__call__"): try: + def assign(source, destination): destination[name] = field.value(wrap(source)) return 0, None + return assign except Exception as e: + def assign(source, destination): destination[name] = None return 0, None + return assign - prefix = field.value[depth:len(field.value) - 1:] + prefix = field.value[depth : len(field.value) - 1 :] if prefix: + def assign(source, destination): for i, f in enumerate(prefix): source = source.get(f) if source is None: return 0, None - if isinstance(source, list): + if is_list(source): return depth + i + 1, source f = field.value.last() @@ -425,23 +426,38 @@ def _select_deep_meta(field, depth): else: destination[name] = source.get(f) except Exception as e: - Log.error("{{value}} does not have {{field}} property", value= source, field=f, cause=e) + Log.error( + "{{value}} does not have {{field}} property", + value=source, + field=f, + cause=e, + ) return 0, None + return assign else: f = field.value[0] if not f: # NO NAME FIELD INDICATES SELECT VALUE + def assign(source, destination): destination[name] = source return 0, None + return assign else: + def assign(source, destination): try: destination[name] = source.get(f) except Exception as e: - Log.error("{{value}} does not have {{field}} property", value= source, field=f, cause=e) + Log.error( + "{{value}} does not have {{field}} property", + value=source, + field=f, + cause=e, + ) return 0, None + return assign @@ -450,7 +466,12 @@ def get_columns(data, leaves=False): if not leaves: return wrap([{"name": n} for n in UNION(set(d.keys()) for d in data)]) else: - return wrap([{"name": leaf} for leaf in set(leaf for row in data for leaf, _ in row.leaves())]) + return wrap( + [ + {"name": leaf} + for leaf in set(leaf for row in data for leaf, _ in row.leaves()) + ] + ) _ = """ @@ -460,7 +481,7 @@ THE columns DO NOT GET MARKED WITH NESTED (AS THEY SHOULD) type_to_name = { int: "long", str: "string", - text_type: "string", + text: "string", float: "double", Number: "double", Data: "object", @@ -490,23 +511,23 @@ def _deeper_iterator(columns, nested_path, path, data): c = columns.get(leaf) if not c: c = columns[leaf] = _Column(name=leaf, type=type_to_name[v.__class__], table=None, es_column=leaf) - c.type = _merge_type[c.type][type_to_name[v.__class__]] - if c.type == "nested" and not nested_path[0].startswith(leaf + "."): + c.jx_type = _merge_type[c.jx_type][type_to_name[v.__class__]] + if c.jx_type == "nested" and not nested_path[0].startswith(leaf + "."): if leaf.startswith(nested_path[0] + ".") or leaf == nested_path[0] or not nested_path[0]: nested_path[0] = leaf else: Log.error("nested path conflict: {{leaf}} vs {{nested}}", leaf=leaf, nested=nested_path[0]) - if isinstance(v, list) and v: + if is_list(v) and v: if deep_leaf: Log.error("nested path conflict: {{leaf}} vs {{nested}}", leaf=leaf, nested=deep_leaf) deep_leaf = leaf deep_v = v - elif isinstance(v, Mapping): + elif is_data(v): for o in _deeper_iterator(columns, nested_path, leaf, [v]): set_default(output, o) else: - if c.type not in ["object", "nested"]: + if c.jx_type not in ["object", "nested"]: output[leaf] = v if deep_leaf: @@ -517,6 +538,7 @@ def _deeper_iterator(columns, nested_path, path, data): yield output """ + def sort(data, fieldnames=None, already_normalized=False): """ PASS A FIELD NAME, OR LIST OF FIELD NAMES, OR LIST OF STRUCTS WITH {"field":field_name, "sort":direction} @@ -525,15 +547,18 @@ def sort(data, fieldnames=None, already_normalized=False): if data == None: return Null - if not fieldnames: - return wrap(sort_using_cmp(data, value_compare)) - - if already_normalized: - formal = fieldnames + if isinstance(fieldnames, int): + funcs = [(lambda t: t[fieldnames], 1)] else: - formal = query._normalize_sort(fieldnames) + if not fieldnames: + return wrap(sort_using_cmp(data, value_compare)) - funcs = [(jx_expression_to_function(f.value), f.sort) for f in formal] + if already_normalized: + formal = fieldnames + else: + formal = query._normalize_sort(fieldnames) + + funcs = [(get(f.value), f.sort) for f in formal] def comparer(left, right): for func, sort_ in funcs: @@ -545,101 +570,25 @@ def sort(data, fieldnames=None, already_normalized=False): Log.error("problem with compare", e) return 0 - if isinstance(data, list): + if is_list(data): output = FlatList([unwrap(d) for d in sort_using_cmp(data, cmp=comparer)]) + elif is_text(data): + Log.error("Do not know how to handle") elif hasattr(data, "__iter__"): - output = FlatList([unwrap(d) for d in sort_using_cmp(list(data), cmp=comparer)]) + output = FlatList( + [unwrap(d) for d in sort_using_cmp(list(data), cmp=comparer)] + ) else: Log.error("Do not know how to handle") output = None return output except Exception as e: - Log.error("Problem sorting\n{{data}}", data=data, cause=e) + Log.error("Problem sorting\n{{data}}", data=data, cause=e) def count(values): - return sum((1 if v!=None else 0) for v in values) - - -def value_compare(left, right, ordering=1): - """ - SORT VALUES, NULL IS THE LEAST VALUE - :param left: LHS - :param right: RHS - :param ordering: (-1, 0, 0) TO AFFECT SORT ORDER - :return: The return value is negative if x < y, zero if x == y and strictly positive if x > y. - """ - - try: - if isinstance(left, list) or isinstance(right, list): - if left == None: - return ordering - elif right == None: - return - ordering - - left = listwrap(left) - right = listwrap(right) - for a, b in zip(left, right): - c = value_compare(a, b) * ordering - if c != 0: - return c - - if len(left) < len(right): - return - ordering - elif len(left) > len(right): - return ordering - else: - return 0 - - ltype = type(left) - rtype = type(right) - ltype_num = TYPE_ORDER.get(ltype, 10) - rtype_num = TYPE_ORDER.get(rtype, 10) - type_diff = ltype_num - rtype_num - if type_diff != 0: - return ordering if type_diff > 0 else -ordering - - if ltype_num == 9: - return 0 - elif ltype is builtin_tuple: - for a, b in zip(left, right): - c = value_compare(a, b) - if c != 0: - return c * ordering - return 0 - elif ltype in (dict, Data): - for k in sorted(set(left.keys()) | set(right.keys())): - c = value_compare(left.get(k), right.get(k)) * ordering - if c != 0: - return c - return 0 - elif left > right: - return ordering - elif left < right: - return -ordering - else: - return 0 - except Exception as e: - Log.error("Can not compare values {{left}} to {{right}}", left=left, right=right, cause=e) - -TYPE_ORDER = { - boolean_type: 0, - int: 1, - float: 1, - Date: 1, - text_type: 2, - list: 3, - builtin_tuple: 3, - dict: 4, - Data: 4, - none_type: 9, - NullType: 9, - NullOp: 9 -} - -if PY2: - TYPE_ORDER[long] = 1 + return sum((1 if v != None else 0) for v in values) def pairwise(values): @@ -654,6 +603,7 @@ def pairwise(values): yield (a, b) a = b + pairs = pairwise @@ -667,30 +617,62 @@ def filter(data, where): if isinstance(data, Container): return data.filter(where) - if isinstance(data, (list, set)): - temp = jx_expression_to_function(where) + if is_container(data): + temp = get(where) dd = wrap(data) return wrap([unwrap(d) for i, d in enumerate(data) if temp(wrap(d), i, dd)]) else: - Log.error("Do not know how to handle type {{type}}", type=data.__class__.__name__) + Log.error( + "Do not know how to handle type {{type}}", type=data.__class__.__name__ + ) try: return drill_filter(where, data) except Exception as _: # WOW! THIS IS INEFFICIENT! - return wrap([unwrap(d) for d in drill_filter(where, [DataObject(d) for d in data])]) + return wrap( + [unwrap(d) for d in drill_filter(where, [DataObject(d) for d in data])] + ) + + +def drill(data, path): + """ + ITERATE THROUGH ALL OBJECTS FOUND ALONG path + :param data: SOME DATA, OR ITERABLE + :param path: DOT-DELIMITED PATH TO REACH INTO + :return: + """ + def _drill(d, p): + if p: + if is_many(d): + for dd in d: + for v in _drill(dd, p): + yield v + else: + for v in _drill(listwrap(d[p[0]]), p[1:]): + yield v + elif is_many(d): + for dd in d: + for v in _drill(dd, p): + yield v + else: + yield d + + return _drill(data, split_field(path)) def drill_filter(esfilter, data): """ PARTIAL EVALUATE THE FILTER BASED ON DATA GIVEN - TODO: FIX THIS MONUMENALLY BAD IDEA + TODO: FIX THIS MONUMENTALLY BAD IDEA """ esfilter = unwrap(esfilter) primary_nested = [] # track if nested, changes if not primary_column = [] # only one path allowed - primary_branch = [] # CONTAINS LISTS OF RECORDS TO ITERATE: constantly changing as we dfs the tree + primary_branch = ( + [] + ) # CONTAINS LISTS OF RECORDS TO ITERATE: constantly changing as we dfs the tree def parse_field(fieldname, data, depth): """ @@ -703,21 +685,21 @@ def drill_filter(esfilter, data): d = d[c] except Exception as e: Log.error("{{name}} does not exist", name=fieldname) - if isinstance(d, list) and len(col) > 1: - if len(primary_column) <= depth+i: + if is_list(d) and len(col) > 1: + if len(primary_column) <= depth + i: primary_nested.append(True) primary_column.append(c) primary_branch.append(d) - elif primary_nested[depth] and primary_column[depth+i] != c: + elif primary_nested[depth] and primary_column[depth + i] != c: Log.error("only one branch of tree allowed") else: - primary_nested[depth+i] = True - primary_column[depth+i] = c - primary_branch[depth+i] = d + primary_nested[depth + i] = True + primary_column[depth + i] = c + primary_branch[depth + i] = d - return c, join_field(col[i+1:]) + return c, join_field(col[i + 1 :]) else: - if len(primary_column) <= depth+i: + if len(primary_column) <= depth + i: primary_nested.append(False) primary_column.append(c) primary_branch.append([d]) @@ -737,7 +719,7 @@ def drill_filter(esfilter, data): if filter["and"]: result = True output = FlatList() - for a in filter[u"and"]: + for a in filter["and"]: f = pe_filter(a, data, depth) if f is False: result = False @@ -749,7 +731,7 @@ def drill_filter(esfilter, data): return result elif filter["or"]: output = FlatList() - for o in filter[u"or"]: + for o in filter["or"]: f = pe_filter(o, data, depth) if f is True: return True @@ -843,7 +825,7 @@ def drill_filter(esfilter, data): else: return result elif filter.missing: - if isinstance(filter.missing, text_type): + if is_text(filter.missing): field = filter["missing"] else: field = filter["missing"]["field"] @@ -863,7 +845,7 @@ def drill_filter(esfilter, data): first, rest = parse_field(col, data, depth) d = data[first] if not rest: - if d==None or not d.startswith(val): + if d == None or not d.startswith(val): result = False else: output[rest] = val @@ -873,7 +855,7 @@ def drill_filter(esfilter, data): return result elif filter.exists: - if isinstance(filter["exists"], text_type): + if is_text(filter["exists"]): field = filter["exists"] else: field = filter["exists"]["field"] @@ -887,7 +869,7 @@ def drill_filter(esfilter, data): else: return {"exists": rest} else: - Log.error(u"Can not interpret esfilter: {{esfilter}}", {u"esfilter": filter}) + Log.error("Can not interpret esfilter: {{esfilter}}", {"esfilter": filter}) output = [] # A LIST OF OBJECTS MAKING THROUGH THE FILTER @@ -912,7 +894,7 @@ def drill_filter(esfilter, data): # OUTPUT for i, d in enumerate(data): - if isinstance(d, Mapping): + if is_data(d): main([], esfilter, wrap(d), 0) else: Log.error("filter is expecting a dict, not {{type}}", type=d.__class__) @@ -927,6 +909,7 @@ def drill_filter(esfilter, data): # OUTPUT IS A LIST OF ROWS, # WHERE EACH ROW IS A LIST OF VALUES SEEN DURING A WALK DOWN A PATH IN THE HIERARCHY uniform_output = FlatList() + def recurse(row, depth): if depth == max: uniform_output.append(row) @@ -957,21 +940,24 @@ def wrap_function(func): """ RETURN A THREE-PARAMETER WINDOW FUNCTION TO MATCH """ - if isinstance(func, text_type): + if is_text(func): return compile_expression(func) numarg = func.__code__.co_argcount if numarg == 0: + def temp(row, rownum, rows): return func() return temp elif numarg == 1: + def temp(row, rownum, rows): return func(row) return temp elif numarg == 2: + def temp(row, rownum, rows): return func(row, rownum) @@ -985,13 +971,17 @@ def window(data, param): MAYBE WE CAN DO THIS WITH NUMPY (no, the edges of windows are not graceful with numpy) data - list of records """ - name = param.name # column to assign window function result - edges = param.edges # columns to gourp by - where = param.where # DO NOT CONSIDER THESE VALUES - sortColumns = param.sort # columns to sort by - calc_value = jx_expression_to_function(param.value) # function that takes a record and returns a value (for aggregation) + name = param.name # column to assign window function result + edges = param.edges # columns to gourp by + where = param.where # DO NOT CONSIDER THESE VALUES + sortColumns = param.sort # columns to sort by + calc_value = get( + param.value + ) # function that takes a record and returns a value (for aggregation) aggregate = param.aggregate # WindowFunction to apply - _range = param.range # of form {"min":-10, "max":0} to specify the size and relative position of window + _range = ( + param.range + ) # of form {"min":-10, "max":0} to specify the size and relative position of window data = filter(data, where) @@ -1014,7 +1004,7 @@ def window(data, param): if not aggregate or aggregate == "none": for _, values in groupby(data, edge_values): if not values: - continue # CAN DO NOTHING WITH THIS ZERO-SAMPLE + continue # CAN DO NOTHING WITH THIS ZERO-SAMPLE if sortColumns: sequence = sort(values, sortColumns, already_normalized=True) @@ -1027,7 +1017,7 @@ def window(data, param): for keys, values in groupby(data, edge_values): if not values: - continue # CAN DO NOTHING WITH THIS ZERO-SAMPLE + continue # CAN DO NOTHING WITH THIS ZERO-SAMPLE sequence = sort(values, sortColumns) @@ -1052,11 +1042,6 @@ def window(data, param): r["__temp__"] = None # CLEANUP - - - - - def intervals(_min, _max=None, size=1): """ RETURN (min, max) PAIRS OF GIVEN SIZE, WHICH COVER THE _min, _max RANGE @@ -1066,8 +1051,8 @@ def intervals(_min, _max=None, size=1): if _max == None: _max = _min _min = 0 - _max = int(Math.ceiling(_max)) - _min = int(Math.floor(_min)) + _max = int(mo_math.ceiling(_max)) + _min = int(mo_math.floor(_min)) output = ((x, min(x + size, _max)) for x in _range(_min, _max, size)) return output @@ -1076,10 +1061,10 @@ def intervals(_min, _max=None, size=1): def prefixes(vals): """ :param vals: iterable - :return: vals[:1], vals[:1], ... , vals[:n] + :return: vals[:1], vals[:2], ... , vals[:n] """ for i in range(len(vals)): - yield vals[:i + 1] + yield vals[: i + 1] def accumulate(vals): @@ -1092,6 +1077,7 @@ def accumulate(vals): yield sum, v sum += v + def reverse(vals): # TODO: Test how to do this fastest if not hasattr(vals, "len"): @@ -1105,11 +1091,10 @@ def reverse(vals): return wrap(output) + def countdown(vals): remaining = len(vals) - 1 return [(remaining - i, v) for i, v in enumerate(vals)] - - from jx_python.lists.aggs import is_aggs, list_aggs diff --git a/vendor/jx_python/jx_usingDataset.py b/vendor/jx_python/jx_usingDataset.py deleted file mode 100644 index 8a4b024..0000000 --- a/vendor/jx_python/jx_usingDataset.py +++ /dev/null @@ -1,30 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -import dataset - -from jx_python.containers.Table_usingDataset import Table_usingDataset - - -class Dataset(object): - - - def __init__(self): - self.db = dataset.connect('sqlite:///:memory:') - - - def get_or_create_table(self, name, uid): - return Table_usingDataset(name, self.db, primary_id=uid) - - - diff --git a/vendor/jx_python/lists/__init__.py b/vendor/jx_python/lists/__init__.py index 9358683..e69de29 100644 --- a/vendor/jx_python/lists/__init__.py +++ b/vendor/jx_python/lists/__init__.py @@ -1 +0,0 @@ -__author__ = 'kyle' diff --git a/vendor/jx_python/lists/aggs.py b/vendor/jx_python/lists/aggs.py index 137a278..a552091 100644 --- a/vendor/jx_python/lists/aggs.py +++ b/vendor/jx_python/lists/aggs.py @@ -5,29 +5,24 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http:# mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import itertools -from jx_base.query import _normalize_domain - +from jx_base.domains import DefaultDomain, SimpleSetDomain from jx_python import windows -from mo_dots import listwrap, wrap, coalesce -from mo_logs import Log -from mo_math import UNION - -from jx_base.domains import SimpleSetDomain, DefaultDomain -from jx_python.expression_compiler import compile_expression from jx_python.expressions import jx_expression_to_function from mo_collections.matrix import Matrix +from mo_dots import coalesce, listwrap, wrap +from mo_logs import Log +from mo_math import UNION from mo_times.dates import Date _ = Date + def is_aggs(query): if query.edges or query.groupby or any(a != None and a != "none" for a in listwrap(query.select).aggregate): return True @@ -49,7 +44,7 @@ def list_aggs(frum, query): else: pass - s_accessors = [(ss.name, compile_expression(ss.value.to_python())) for ss in select] + s_accessors = [(ss.name, jx_expression_to_function(ss.value)) for ss in select] result = { s.name: Matrix( diff --git a/vendor/jx_python/lists/util.py b/vendor/jx_python/lists/util.py deleted file mode 100644 index 8b13789..0000000 --- a/vendor/jx_python/lists/util.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/vendor/jx_python/meta.py b/vendor/jx_python/meta.py deleted file mode 100644 index 83778ec..0000000 --- a/vendor/jx_python/meta.py +++ /dev/null @@ -1,547 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http:# mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from collections import Mapping -from datetime import date -from datetime import datetime -from decimal import Decimal - -import jx_base -from jx_base import Column, Table -from jx_base.schema import Schema -from jx_python import jx -from mo_collections import UniqueIndex -from mo_dots import Data, concat_field, listwrap, unwraplist, NullType, FlatList, set_default, split_field, join_field, ROOT_PATH, wrap, coalesce -from mo_future import none_type, text_type, long, PY2 -from mo_json.typed_encoder import untype_path, unnest_path, python_type_to_json_type, STRUCT -from mo_logs import Log -from mo_threads import Lock -from mo_times.dates import Date - -singlton = None - - -class ColumnList(Table, jx_base.Container): - """ - OPTIMIZED FOR THE PARTICULAR ACCESS PATTERNS USED - """ - - def __init__(self): - Table.__init__(self, "meta.columns") - self.data = {} # MAP FROM ES_INDEX TO (abs_column_name to COLUMNS) - self.locker = Lock() - self._schema = None - self.extend(METADATA_COLUMNS) - - def find(self, es_index, abs_column_name): - with self.locker: - if es_index.startswith("meta."): - self._update_meta() - - if not abs_column_name: - return [c for cs in self.data.get(es_index, {}).values() for c in cs] - else: - return self.data.get(es_index, {}).get(abs_column_name, []) - - def extend(self, columns): - self.dirty = True - with self.locker: - for column in columns: - self._add(column) - - def add(self, column): - self.dirty = True - with self.locker: - return self._add(column) - - def _add(self, column): - columns_for_table = self.data.setdefault(column.es_index, {}) - existing_columns = columns_for_table.setdefault(column.names["."], []) - - for canonical in existing_columns: - if canonical is column: - return canonical - if canonical.es_type == column.es_type: - set_default(column.names, canonical.names) - for key in Column.__slots__: - canonical[key] = column[key] - return canonical - existing_columns.append(column) - return column - - def _update_meta(self): - if not self.dirty: - return - - for mcl in self.data.get("meta.columns").values(): - for mc in mcl: - count = 0 - values = set() - objects = 0 - multi = 1 - for column in self._all_columns(): - value = column[mc.names["."]] - if value == None: - pass - else: - count += 1 - if isinstance(value, list): - multi = max(multi, len(value)) - try: - values |= set(value) - except Exception: - objects += len(value) - elif isinstance(value, Mapping): - objects += 1 - else: - values.add(value) - mc.count = count - mc.cardinality = len(values) + objects - mc.partitions = jx.sort(values) - mc.multi = multi - mc.last_updated = Date.now() - self.dirty = False - - def _all_columns(self): - return [ - column - for t, cs in self.data.items() - for _, css in cs.items() - for column in css - ] - - def __iter__(self): - with self.locker: - self._update_meta() - return iter(self._all_columns()) - - def __len__(self): - return self.data['meta.columns']['es_index'].count - - def update(self, command): - self.dirty = True - try: - command = wrap(command) - eq = command.where.eq - if eq.es_index: - all_columns = self.data.get(eq.es_index, {}).values() - if len(eq) == 1: - # FASTEST - with self.locker: - columns = [ - c - for cs in all_columns - for c in cs - ] - elif eq.es_column and len(eq) == 2: - # FASTER - with self.locker: - columns = [ - c - for cs in all_columns - for c in cs - if c.es_column == eq.es_column - ] - - else: - # SLOWER - with self.locker: - columns = [ - c - for cs in all_columns - for c in cs - if all(c[k] == v for k, v in eq.items()) # THIS LINE IS VERY SLOW - ] - else: - columns = list(self) - columns = jx.filter(columns, command.where) - - with self.locker: - for col in columns: - for k in command["clear"]: - if k == ".": - lst = self.data[col.es_index] - cols = lst[col.names['.']] - cols.remove(col) - if len(cols) == 0: - del lst[col.names['.']] - if len(lst) == 0: - del self.data[col.es_index] - else: - col[k] = None - - for k, v in command.set.items(): - col[k] = v - except Exception as e: - Log.error("should not happen", cause=e) - - def query(self, query): - # NOT EXPECTED TO BE RUN - Log.error("not") - with self.locker: - self._update_meta() - if not self._schema: - self._schema = Schema(".", [c for cs in self.data["meta.columns"].values() for c in cs]) - snapshot = self._all_columns() - - from jx_python.containers.list_usingPythonList import ListContainer - query.frum = ListContainer("meta.columns", snapshot, self._schema) - return jx.run(query) - - def groupby(self, keys): - with self.locker: - self._update_meta() - return jx.groupby(self.__iter__(), keys) - - @property - def schema(self): - if not self._schema: - with self.locker: - self._update_meta() - self._schema = Schema(".", [c for cs in self.data["meta.columns"].values() for c in cs]) - return self._schema - - @property - def namespace(self): - return self - - def get_table(self, table_name): - if table_name != "meta.columns": - Log.error("this container has only the meta.columns") - return self - - def denormalized(self): - """ - THE INTERNAL STRUCTURE FOR THE COLUMN METADATA IS VERY DIFFERENT FROM - THE DENORMALIZED PERSPECITVE. THIS PROVIDES THAT PERSPECTIVE FOR QUERIES - """ - with self.locker: - self._update_meta() - output = [ - { - "table": concat_field(c.es_index, untype_path(table)), - "name": untype_path(name), - "cardinality": c.cardinality, - "es_column": c.es_column, - "es_index": c.es_index, - "last_updated": c.last_updated, - "count": c.count, - "nested_path": [unnest_path(n) for n in c.nested_path], - "es_type": c.es_type, - "type": c.jx_type - } - for tname, css in self.data.items() - for cname, cs in css.items() - for c in cs - if c.jx_type not in STRUCT # and c.es_column != "_id" - for table, name in c.names.items() - ] - - from jx_python.containers.list_usingPythonList import ListContainer - return ListContainer( - self.name, - data=output, - schema=jx_base.Schema( - "meta.columns", - SIMPLE_METADATA_COLUMNS - ) - ) - - -def get_schema_from_list(table_name, frum): - """ - SCAN THE LIST FOR COLUMN TYPES - """ - columns = UniqueIndex(keys=("names.\\.",)) - _get_schema_from_list(frum, ".", parent=".", nested_path=ROOT_PATH, columns=columns) - return Schema(table_name=table_name, columns=list(columns)) - - -def _get_schema_from_list(frum, table_name, parent, nested_path, columns): - """ - :param frum: The list - :param table_name: Name of the table this list holds records for - :param prefix_path: parent path - :param nested_path: each nested array, in reverse order - :param columns: map from full name to column definition - :return: - """ - - for d in frum: - row_type = _type_to_name[d.__class__] - - if row_type != "object": - full_name = parent - column = columns[full_name] - if not column: - column = Column( - names={table_name: full_name}, - es_column=full_name, - es_index=".", - jx_type=python_type_to_json_type[d.__class__], - es_type=row_type, - nested_path=nested_path - ) - columns.add(column) - column.es_type = _merge_type[column.es_type][row_type] - column.jx_type = _merge_type[coalesce(column.jx_type, "undefined")][row_type] - else: - for name, value in d.items(): - full_name = concat_field(parent, name) - column = columns[full_name] - if not column: - column = Column( - names={table_name: full_name}, - es_column=full_name, - es_index=".", - es_type="undefined", - nested_path=nested_path - ) - columns.add(column) - if isinstance(value, (list, set)): # GET TYPE OF MULTIVALUE - v = list(value) - if len(v) == 0: - this_type = "undefined" - elif len(v) == 1: - this_type = _type_to_name[v[0].__class__] - else: - this_type = _type_to_name[v[0].__class__] - if this_type == "object": - this_type = "nested" - else: - this_type = _type_to_name[value.__class__] - new_type = _merge_type[column.es_type][this_type] - column.es_type = new_type - - if this_type == "object": - _get_schema_from_list([value], table_name, full_name, nested_path, columns) - elif this_type == "nested": - np = listwrap(nested_path) - newpath = unwraplist([join_field(split_field(np[0]) + [name])] + np) - _get_schema_from_list(value, table_name, full_name, newpath, columns) - - -METADATA_COLUMNS = ( - [ - Column( - names={".": c}, - es_index="meta.columns", - es_column=c, - es_type="string", - nested_path=ROOT_PATH - ) - for c in ["es_type", "jx_type", "nested_path", "es_column", "es_index"] - ] + [ - Column( - es_index="meta.columns", - names={".": c}, - es_column=c, - es_type="object", - nested_path=ROOT_PATH - ) - for c in ["names", "partitions"] - ] + [ - Column( - names={".": c}, - es_index="meta.columns", - es_column=c, - es_type="long", - nested_path=ROOT_PATH - ) - for c in ["count", "cardinality", "multi"] - ] + [ - Column( - names={".": "last_updated"}, - es_index="meta.columns", - es_column="last_updated", - es_type="time", - nested_path=ROOT_PATH - ) - ] -) - -SIMPLE_METADATA_COLUMNS = ( - [ - Column( - names={".": c}, - es_index="meta.columns", - es_column=c, - es_type="string", - nested_path=ROOT_PATH - ) - for c in ["table", "name", "type", "nested_path"] - ] + [ - Column( - names={".": c}, - es_index="meta.columns", - es_column=c, - es_type="long", - nested_path=ROOT_PATH - ) - for c in ["count", "cardinality", "multi"] - ] + [ - Column( - names={".": "last_updated"}, - es_index="meta.columns", - es_column="last_updated", - es_type="time", - nested_path=ROOT_PATH - ) - ] -) - - -_type_to_name = { - none_type: "undefined", - NullType: "undefined", - bool: "boolean", - str: "string", - text_type: "string", - int: "integer", - float: "double", - Data: "object", - dict: "object", - set: "nested", - list: "nested", - FlatList: "nested", - Date: "double", - Decimal: "double", - datetime: "double", - date: "double" -} - -if PY2: - _type_to_name[long] = "integer" - -_merge_type = { - "undefined": { - "undefined": "undefined", - "boolean": "boolean", - "integer": "integer", - "long": "long", - "float": "float", - "double": "double", - "number": "number", - "string": "string", - "object": "object", - "nested": "nested" - }, - "boolean": { - "undefined": "boolean", - "boolean": "boolean", - "integer": "integer", - "long": "long", - "float": "float", - "double": "double", - "number": "number", - "string": "string", - "object": None, - "nested": None - }, - "integer": { - "undefined": "integer", - "boolean": "integer", - "integer": "integer", - "long": "long", - "float": "float", - "double": "double", - "number": "number", - "string": "string", - "object": None, - "nested": None - }, - "long": { - "undefined": "long", - "boolean": "long", - "integer": "long", - "long": "long", - "float": "double", - "double": "double", - "number": "number", - "string": "string", - "object": None, - "nested": None - }, - "float": { - "undefined": "float", - "boolean": "float", - "integer": "float", - "long": "double", - "float": "float", - "double": "double", - "number": "number", - "string": "string", - "object": None, - "nested": None - }, - "double": { - "undefined": "double", - "boolean": "double", - "integer": "double", - "long": "double", - "float": "double", - "double": "double", - "number": "number", - "string": "string", - "object": None, - "nested": None - }, - "number": { - "undefined": "number", - "boolean": "number", - "integer": "number", - "long": "number", - "float": "number", - "double": "number", - "number": "number", - "string": "string", - "object": None, - "nested": None - }, - "string": { - "undefined": "string", - "boolean": "string", - "integer": "string", - "long": "string", - "float": "string", - "double": "string", - "number": "string", - "string": "string", - "object": None, - "nested": None - }, - "object": { - "undefined": "object", - "boolean": None, - "integer": None, - "long": None, - "float": None, - "double": None, - "number": None, - "string": None, - "object": "object", - "nested": "nested" - }, - "nested": { - "undefined": "nested", - "boolean": None, - "integer": None, - "long": None, - "float": None, - "double": None, - "number": None, - "string": None, - "object": "nested", - "nested": "nested" - } -} diff --git a/vendor/jx_python/namespace/normal.py b/vendor/jx_python/namespace/normal.py index bef4bbd..9ba904a 100644 --- a/vendor/jx_python/namespace/normal.py +++ b/vendor/jx_python/namespace/normal.py @@ -5,28 +5,25 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from collections import Mapping +from jx_base.expressions import Variable +from jx_base.language import is_op +from mo_future import is_text, is_binary from copy import copy -from mo_dots import Data -from mo_dots import FlatList -from mo_dots import coalesce, Null -from mo_dots import wrap, listwrap -from mo_logs import Log -from mo_math import Math - from jx_base.dimensions import Dimension from jx_base.domains import Domain +from jx_base.query import QueryOp, get_all_vars from jx_python.containers import Container from jx_python.expressions import TRUE from jx_python.namespace import Namespace, convert_list -from jx_base.query import QueryOp, get_all_vars +from mo_dots import Data, FlatList, Null, coalesce, is_data, is_list, listwrap, wrap +from mo_future import text +from mo_logs import Log +import mo_math DEFAULT_LIMIT = 10 @@ -37,7 +34,7 @@ class Normal(Namespace): """ def convert(self, expr): - if isinstance(expr, Mapping) and expr["from"]: + if is_data(expr) and expr["from"]: return self._convert_query(expr) return expr @@ -47,7 +44,7 @@ class Normal(Namespace): # Log.error('Expecting from clause to be a Container') query = wrap(query) - output = QueryOp("from", None) + output = QueryOp(None) output["from"] = self._convert_from(query["from"]) output.format = query.format @@ -77,11 +74,9 @@ class Normal(Namespace): output.sort = self._convert_sort(query.sort) output.limit = coalesce(query.limit, DEFAULT_LIMIT) - if not Math.is_integer(output.limit) or output.limit < 0: + if not mo_math.is_integer(output.limit) or output.limit < 0: Log.error("Expecting limit >= 0") - output.isLean = query.isLean - # DEPTH ANALYSIS - LOOK FOR COLUMN REFERENCES THAT MAY BE DEEPER THAN # THE from SOURCE IS. vars = get_all_vars(output, exclude_where=True) # WE WILL EXCLUDE where VARIABLES @@ -89,20 +84,18 @@ class Normal(Namespace): if c.name in vars and len(c.nested_path) != 1: Log.error("This query, with variable {{var_name}} is too deep", var_name=c.name) - output.having = convert_list(self._convert_having, query.having) - return output def _convert_from(self, frum): - if isinstance(frum, text_type): + if is_text(frum): return Data(name=frum) - elif isinstance(frum, (Container, QueryOp)): + elif is_op(frum, (Container, Variable)): return frum else: Log.error("Expecting from clause to be a name, or a container") def _convert_select(self, select): - if isinstance(select, text_type): + if is_text(select): return Data( name=select.rstrip("."), # TRAILING DOT INDICATES THE VALUE, BUT IS INVALID FOR THE NAME value=select, @@ -111,7 +104,7 @@ class Normal(Namespace): else: select = wrap(select) output = copy(select) - if not select.value or isinstance(select.value, text_type): + if not select.value or is_text(select.value): if select.value == ".": output.name = coalesce(select.name, select.aggregate) else: @@ -126,7 +119,7 @@ class Normal(Namespace): return output def _convert_edge(self, edge): - if isinstance(edge, text_type): + if is_text(edge): return Data( name=edge, value=edge, @@ -134,10 +127,10 @@ class Normal(Namespace): ) else: edge = wrap(edge) - if not edge.name and not isinstance(edge.value, text_type): + if not edge.name and not is_text(edge.value): Log.error("You must name compound edges: {{edge}}", edge= edge) - if isinstance(edge.value, (Mapping, list)) and not edge.domain: + if edge.value.__class__ in (Data, dict, list, FlatList) and not edge.domain: # COMPLEX EDGE IS SHORT HAND domain =self._convert_domain() domain.dimension = Data(fields=edge.value) @@ -158,7 +151,7 @@ class Normal(Namespace): ) def _convert_group(self, column): - if isinstance(column, text_type): + if is_text(column): return wrap({ "name": column, "value": column, @@ -169,7 +162,7 @@ class Normal(Namespace): if (column.domain and column.domain.type != "default") or column.allowNulls != None: Log.error("groupby does not accept complicated domains") - if not column.name and not isinstance(column.value, text_type): + if not column.name and not is_text(column.value): Log.error("You must name compound edges: {{edge}}", edge= column) return wrap({ @@ -191,7 +184,7 @@ class Normal(Namespace): domain = domain.copy() domain.name = domain.type - if not isinstance(domain.partitions, list): + if not is_list(domain.partitions): domain.partitions = list(domain.partitions) return Domain(**domain) @@ -237,7 +230,7 @@ def normalize_sort(sort=None): output = FlatList() for s in listwrap(sort): - if isinstance(s, text_type) or Math.is_integer(s): + if is_text(s) or mo_math.is_integer(s): output.append({"value": s, "sort": 1}) elif not s.field and not s.value and s.sort==None: #ASSUME {name: sort} FORM @@ -255,8 +248,7 @@ sort_direction = { 1: 1, 0: 0, -1: -1, - None: 1, - Null: 1 + None: 1 } canonical_aggregates = { diff --git a/vendor/jx_python/namespace/rename.py b/vendor/jx_python/namespace/rename.py index a3cb1d1..9d91d86 100644 --- a/vendor/jx_python/namespace/rename.py +++ b/vendor/jx_python/namespace/rename.py @@ -5,24 +5,22 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from collections import Mapping from copy import copy -from mo_dots import set_default, wrap, coalesce, Data, listwrap, unwraplist -from mo_logs import Log -from mo_math import Math -from mo_times.dates import Date - from jx_base.dimensions import Dimension -from jx_base.queries import is_variable_name -from jx_python.namespace import Namespace, convert_list +from jx_base.utils import is_variable_name from jx_base.query import QueryOp +from jx_base.language import is_op +from jx_python.namespace import Namespace, convert_list +from mo_dots import Data, coalesce, is_data, is_list, listwrap, set_default, unwraplist, wrap, is_many +from mo_future import is_text +from mo_logs import Log +from mo_math import is_number +from mo_times.dates import Date class Rename(Namespace): @@ -32,7 +30,7 @@ class Rename(Namespace): EXPECTING A LIST OF {"name":name, "value":value} OBJECTS TO PERFORM A MAPPING """ dimensions = wrap(dimensions) - if isinstance(dimensions, Mapping) and dimensions.name == None: + if is_data(dimensions) and dimensions.name == None: # CONVERT TO A REAL DIMENSION DEFINITION dimensions = {"name": ".", "type": "set", "edges":[{"name": k, "field": v} for k, v in dimensions.items()]} @@ -44,19 +42,19 @@ class Rename(Namespace): """ if expr is True or expr == None or expr is False: return expr - elif Math.is_number(expr): + elif is_number(expr): return expr elif expr == ".": return "." elif is_variable_name(expr): return coalesce(self.dimensions[expr], expr) - elif isinstance(expr, text_type): + elif is_text(expr): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr - elif isinstance(expr, QueryOp): + elif is_op(expr, QueryOp): return self._convert_query(expr) - elif isinstance(expr, Mapping): + elif is_data(expr): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: @@ -66,7 +64,7 @@ class Rename(Namespace): # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return converter_map.get(k, self._convert_bop)(self, k, v) - elif isinstance(expr, (list, set, tuple)): + elif is_many(expr): return wrap([self.convert(value) for value in expr]) else: return expr @@ -77,7 +75,6 @@ class Rename(Namespace): output.where = self.convert(query.where) output.frum = self._convert_from(query.frum) output.edges = convert_list(self._convert_edge, query.edges) - output.having = convert_list(self._convert_having, query.having) output.window = convert_list(self._convert_window, query.window) output.sort = self._convert_clause(query.sort) output.format = query.format @@ -88,16 +85,16 @@ class Rename(Namespace): def _convert_bop(self, op, term): - if isinstance(term, list): - return {op: map(self.convert, term)} + if is_list(term): + return {op: list(map(self.convert, term))} return {op: {self.convert(var): val for var, val in term.items()}} def _convert_many(self, k, v): - return {k: map(self.convert, v)} + return {k: list(map(self.convert, v))} def _convert_from(self, frum): - if isinstance(frum, Mapping): + if is_data(frum): return Data(name=self.convert(frum.name)) else: return self.convert(frum) @@ -126,7 +123,7 @@ class Rename(Namespace): if clause == None: return None - elif isinstance(clause, Mapping): + elif is_data(clause): return set_default({"value": self.convert(clause.value)}, clause) else: return [set_default({"value": self.convert(c.value)}, c) for c in clause] diff --git a/vendor/jx_python/records.py b/vendor/jx_python/records.py index d32eb24..2836c11 100644 --- a/vendor/jx_python/records.py +++ b/vendor/jx_python/records.py @@ -5,11 +5,10 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import +from __future__ import absolute_import, division, unicode_literals + from mo_dots import listwrap diff --git a/vendor/jx_python/table.py b/vendor/jx_python/table.py index 445a3df..ca67d7e 100644 --- a/vendor/jx_python/table.py +++ b/vendor/jx_python/table.py @@ -5,11 +5,9 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import +from __future__ import absolute_import, division, unicode_literals import jx_base from mo_dots import Data diff --git a/vendor/jx_python/windows.py b/vendor/jx_python/windows.py index cfbcf4f..a214394 100644 --- a/vendor/jx_python/windows.py +++ b/vendor/jx_python/windows.py @@ -5,23 +5,19 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -import functools from copy import copy +import functools -import mo_math from mo_collections.multiset import Multiset -from mo_dots.lists import FlatList +from mo_dots import FlatList from mo_logs import Log -from mo_math import MIN -from mo_math import Math -from mo_math import stats +import mo_math +from mo_math import MIN, stats from mo_math.stats import ZeroMoment, ZeroMoment2Stats @@ -147,7 +143,7 @@ class _Stats(WindowFunction): Log.error("Do not know how to handle") def end(self): - ignore = Math.ceiling(len(self.samples) * (1 - self.middle) / 2) + ignore = mo_math.ceiling(len(self.samples) * (1 - self.middle) / 2) if ignore * 2 >= len(self.samples): return stats.Stats() output = stats.Stats(samples=sorted(self.samples)[ignore:len(self.samples) - ignore:]) @@ -323,6 +319,9 @@ class List(WindowFunction): return copy(self.agg) +def median(*args, **kwargs): + return Percentile(0.5, *args, **kwargs) + name2accumulator = { "count": Count, "sum": Sum, @@ -332,6 +331,7 @@ name2accumulator = { "list": List, "min": Min, "minimum": Min, + "median": median, "percentile": Percentile, "one": One } diff --git a/vendor/mo_collections/__init__.py b/vendor/mo_collections/__init__.py index d079fc9..b65f53a 100644 --- a/vendor/mo_collections/__init__.py +++ b/vendor/mo_collections/__init__.py @@ -5,15 +5,13 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - +from __future__ import absolute_import, division, unicode_literals from mo_collections.unique_index import UniqueIndex + def reverse(values): """ REVERSE - WITH NO SIDE EFFECTS! @@ -23,3 +21,41 @@ def reverse(values): return output +def right(values, num): + """ + KEEP num ELEMENTS FROM THE RIGHT + """ + if num <= 0: + return values[:0] + else: + return values[-num:] + + +def not_right(values, num): + """ + REMOVE num ELEMENTS FROM THE RIGHT + """ + if num <= 0: + return values + else: + return values[:-num] + + +def left(values, num): + """ + KEEP num ELEMENTS FROM THE LEFT + """ + if num <= 0: + return values[:0] + else: + return values[:num] + + +def not_left(values, num): + """ + REMOVE num ELEMENTS FROM THE LEFT + """ + if num <= 0: + return values + else: + return values[num:] diff --git a/vendor/mo_collections/array.py b/vendor/mo_collections/array.py index 8c49639..7446d84 100644 --- a/vendor/mo_collections/array.py +++ b/vendor/mo_collections/array.py @@ -5,16 +5,15 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # # REPLACE NUMPY ARRAY FUNCTIONS # THIS CODE IS FASTER THAN NUMPY WHEN USING PYPY *AND* THE ARRAYS ARE SMALL -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals +from mo_future import is_text from mo_logs import Log @@ -40,7 +39,9 @@ def ones(dim): def _apply(func): def output(value): - if hasattr(value, "__iter__"): + if is_text(value): + return func(value) + elif hasattr(value, "__iter__"): return [output(v) for v in value] else: return func(value) @@ -53,7 +54,9 @@ def _reduce(func): if depth == axis: return func - if hasattr(values[0], "__iter__"): + if is_text(values[0]): + return func(values) + elif hasattr(values[0], "__iter__"): return [func(v) for v in values] else: return func(values) @@ -100,6 +103,7 @@ MORE_MATH = { "subtract": lambda a, b: a - b, "sub": lambda a, b: a - b, "multiply": lambda a, b: a * b, + "mul": lambda a, b: a * b, "mult": lambda a, b: a * b, "divide": lambda a, b: a / b, "div": lambda a, b: a / b @@ -141,6 +145,8 @@ def seterr(*args, **kwargs): def allclose(a, b): try: + from mo_testing.fuzzytestcase import assertAlmostEqual + assertAlmostEqual(a, b) return True except Exception as e: diff --git a/vendor/mo_collections/index.py b/vendor/mo_collections/index.py index 36f638a..92c2a53 100644 --- a/vendor/mo_collections/index.py +++ b/vendor/mo_collections/index.py @@ -5,17 +5,14 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from collections import Mapping from copy import copy -from mo_dots import wrap, unwrap, tuplewrap, get_attr +from mo_dots import get_attr, is_data, is_sequence, tuplewrap, unwrap, wrap from mo_logs import Log @@ -36,7 +33,7 @@ class Index(object): def __getitem__(self, key): try: - if isinstance(key, (list, tuple)) and len(key) < len(self._keys): + if is_sequence(key) and len(key) < len(self._keys): # RETURN ANOTHER Index raise NotImplementedError() @@ -67,7 +64,7 @@ class Index(object): def _test_contains(self, key): try: - if isinstance(key, (list, tuple)) and len(key) < len(self._keys): + if is_sequence(key) and len(key) < len(self._keys): # RETURN ANOTHER Index length = len(key) key = value2key(self._keys[0:length:], key) @@ -158,15 +155,15 @@ class Index(object): def value2key(keys, val): if len(keys) == 1: - if isinstance(val, Mapping): + if is_data(val): return get_attr(val, keys[0]), - elif isinstance(val, (list, tuple)): + elif is_sequence(val): return val[0], return val, else: - if isinstance(val, Mapping): + if is_data(val): return tuple(val[k] for k in keys) - elif isinstance(val, (list, tuple)): + elif is_sequence(val): return tuple(val) else: Log.error("do not know what to do here") diff --git a/vendor/mo_collections/matrix.py b/vendor/mo_collections/matrix.py index 32251b2..5a35cc9 100644 --- a/vendor/mo_collections/matrix.py +++ b/vendor/mo_collections/matrix.py @@ -5,17 +5,13 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from mo_future import text_type, xrange, transpose -from mo_dots import Null, Data, coalesce, get_module -from mo_kwargs import override +from mo_dots import Data, Null, coalesce, get_module, is_sequence +from mo_future import text, transpose, xrange from mo_logs import Log -from mo_logs.exceptions import suppress_exception class Matrix(object): @@ -40,7 +36,7 @@ class Matrix(object): self.num = len(dims) self.dims = tuple(dims) if zeros != None: - if self.num == 0 or any(d == 0 for d in dims): #NO DIMS, OR HAS A ZERO DIM, THEN IT IS A NULL CUBE + if self.num == 0 or any(d == 0 for d in dims): # NO DIMS, OR HAS A ZERO DIM, THEN IT IS A NULL CUBE if hasattr(zeros, "__call__"): self.cube = zeros() else: @@ -61,7 +57,7 @@ class Matrix(object): return output def __getitem__(self, index): - if not isinstance(index, (list, tuple)): + if not is_sequence(index): if isinstance(index, slice): sub = self.cube[index] output = Matrix() @@ -171,10 +167,11 @@ class Matrix(object): def __iter__(self): if not self.dims: - return [self.value].__iter__() + yield (tuple(), self.value) else: # TODO: MAKE THIS FASTER BY NOT CALLING __getitem__ (MAKES CUBE OBJECTS) - return ((c, self[c]) for c in self._all_combos()) + for c in self._all_combos(): + yield (c, self[c]) def __float__(self): return self.value @@ -382,10 +379,10 @@ def index_to_coordinate(dims): coords = [] for i in domain: if i == num_dims - 1: - commands.append("\tc" + text_type(i) + " = index") + commands.append("\tc" + text(i) + " = index") else: - commands.append("\tc" + text_type(i) + ", index = divmod(index, " + text_type(prod[i]) + ")") - coords.append("c" + text_type(i)) + commands.append("\tc" + text(i) + ", index = divmod(index, " + text(prod[i]) + ")") + coords.append("c" + text(i)) output = None if num_dims == 1: code = ( @@ -400,8 +397,9 @@ def index_to_coordinate(dims): "\treturn " + ", ".join(coords) ) - exec(code) - return output + fake_locals = {} + exec(code, globals(), fake_locals) + return fake_locals["output"] def _product(values): diff --git a/vendor/mo_collections/multiset.py b/vendor/mo_collections/multiset.py index d10dc62..e41f92f 100644 --- a/vendor/mo_collections/multiset.py +++ b/vendor/mo_collections/multiset.py @@ -5,13 +5,10 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - +from __future__ import absolute_import, division, unicode_literals class Multiset(object): @@ -27,6 +24,7 @@ class Multiset(object): | No | No | Multiset | +------------+---------+----------+ """ + def __new__(cls, list=None, key_field=None, count_field=None, allow_negative=False): try: if allow_negative: @@ -49,11 +47,9 @@ class Multiset(object): class _Multiset(Multiset): - def __new__(cls, *args): return object.__new__(cls) - def __init__(self, list=None, key_field=None, count_field=None, **kwargs): if not key_field and not count_field: self.dic = dict() @@ -64,13 +60,11 @@ class _Multiset(Multiset): else: self.dic = {i[key_field]: i[count_field] for i in list} - def __iter__(self): for k, m in self.dic.items(): for i in range(m): yield k - def items(self): return self.dic.items() @@ -92,7 +86,7 @@ class _Multiset(Multiset): if value not in self.dic: from mo_logs import Log - Log.error("{{value}} is not in multiset", value= value) + Log.error("{{value}} is not in multiset", value=value) self._remove(value) def copy(self): @@ -100,7 +94,6 @@ class _Multiset(Multiset): output.dic = self.dic.copy() return output - def _remove(self, value): count = self.dic.get(value) if count == None: @@ -108,7 +101,7 @@ class _Multiset(Multiset): count -= 1 if count == 0: - del (self.dic[value]) + del self.dic[value] else: self.dic[value] = count @@ -140,7 +133,6 @@ class _Multiset(Multiset): return True return False - def count(self, value): if value in self.dic: return self.dic[value] @@ -150,7 +142,7 @@ class _Multiset(Multiset): class _NegMultiset(Multiset): def __new__(cls, *args, **kwargs): - return object.__new__(cls) + return object.__new__(cls) def __init__(self, list=None, key_field=None, count_field=None, **kwargs): if not key_field and not count_field: @@ -162,13 +154,11 @@ class _NegMultiset(Multiset): else: self.dic = {i[key_field]: i[count_field] for i in list} - # def __iter__(self): # for k, m in self.dic.items(): # for i in range(m): # yield k - def items(self): return self.dic.items() @@ -185,7 +175,7 @@ class _NegMultiset(Multiset): if not count: self.dic[value] = amount elif count == -amount: - del (self.dic[value]) + del self.dic[value] else: self.dic[value] = count + amount @@ -198,13 +188,11 @@ class _NegMultiset(Multiset): def remove(self, value): return self.add(value, -1) - def copy(self): output = _NegMultiset() output.dic = self.dic.copy() return output - def __add__(self, other): output = self.copy() @@ -237,7 +225,6 @@ class _NegMultiset(Multiset): return True return False - def count(self, value): if value in self.dic: return self.dic[value] diff --git a/vendor/mo_collections/persistent_queue.py b/vendor/mo_collections/persistent_queue.py index 96d0f4f..d449af6 100644 --- a/vendor/mo_collections/persistent_queue.py +++ b/vendor/mo_collections/persistent_queue.py @@ -5,16 +5,14 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -import mo_json from mo_dots import Data, wrap from mo_files import File +import mo_json from mo_logs import Log from mo_logs.exceptions import suppress_exception from mo_math.randoms import Random diff --git a/vendor/mo_collections/queue.py b/vendor/mo_collections/queue.py index c81340f..9f19adf 100644 --- a/vendor/mo_collections/queue.py +++ b/vendor/mo_collections/queue.py @@ -6,14 +6,14 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import +from __future__ import absolute_import, division, unicode_literals +from mo_future import is_text, is_binary from collections import deque +from copy import copy class Queue(object): @@ -50,6 +50,14 @@ class Queue(object): return other - self.set return set(o for o in other if o not in self.set) + def __add__(self, other): + output = Queue() + output.set = copy(self.set) + output.list = copy(self.list) + for v in other: + output.add(v) + return output + def __data__(self): return list(self.list) diff --git a/vendor/mo_collections/relation.py b/vendor/mo_collections/relation.py index b848608..9e381c8 100644 --- a/vendor/mo_collections/relation.py +++ b/vendor/mo_collections/relation.py @@ -5,13 +5,12 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import +from __future__ import absolute_import, division, unicode_literals +from mo_future import is_text, is_binary from mo_logs import Log diff --git a/vendor/mo_collections/unique_index.py b/vendor/mo_collections/unique_index.py index 81aa14a..8b08350 100644 --- a/vendor/mo_collections/unique_index.py +++ b/vendor/mo_collections/unique_index.py @@ -5,18 +5,14 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from collections import Mapping, Iterable, Set - -from mo_dots import unwrap, tuplewrap, wrap +from mo_dots import is_data, is_sequence, tuplewrap, unwrap, wrap from mo_dots.objects import datawrap -from mo_future import PY2, iteritems +from mo_future import PY2, iteritems, Set, Mapping, Iterable, first from mo_logs import Log from mo_logs.exceptions import suppress_exception @@ -73,7 +69,7 @@ class UniqueIndex(Set, Mapping): return self._data.keys() def pop(self): - output = iteritems(self._data).next()[1] + output = first(iteritems(self._data))[1] self.remove(output) return wrap(output) @@ -83,7 +79,11 @@ class UniqueIndex(Set, Mapping): if key == None: Log.error("Expecting key to be not None") - d = self._data.get(key) + try: + d = self._data.get(key) + except Exception as e: + key = value2key(self._keys, val) + if d is None: self._data[key] = unwrap(val) self.count += 1 @@ -175,16 +175,16 @@ class UniqueIndex(Set, Mapping): def value2key(keys, val): if len(keys) == 1: - if isinstance(val, Mapping): + if is_data(val): return val[keys[0]] - elif isinstance(val, (list, tuple)): + elif is_sequence(val): return val[0] else: return val else: - if isinstance(val, Mapping): + if is_data(val): return datawrap({k: val[k] for k in keys}) - elif isinstance(val, (list, tuple)): + elif is_sequence(val): return datawrap(dict(zip(keys, val))) else: Log.error("do not know what to do here") diff --git a/vendor/mo_dots/__init__.py b/vendor/mo_dots/__init__.py index 4dcab2a..41f63b9 100644 --- a/vendor/mo_dots/__init__.py +++ b/vendor/mo_dots/__init__.py @@ -4,26 +4,23 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import sys -from collections import Mapping -from mo_dots.utils import get_logger, get_module -from mo_future import text_type, binary_type, generator_types +from mo_future import binary_type, generator_types, is_binary, is_text, text, OrderedDict + +from mo_dots.utils import CLASS, OBJ, get_logger, get_module none_type = type(None) ModuleType = type(sys.modules[__name__]) _builtin_zip = zip -SELF_PATH = "." -ROOT_PATH = [SELF_PATH] +ROOT_PATH = ["."] _get = object.__getattribute__ @@ -63,6 +60,14 @@ def zip(keys, values): return output +def missing(value): + return value == None or value == '' + + +def exists(value): + return value != None and value != '' + + def literal_field(field): """ RETURN SAME WITH DOTS (`.`) ESCAPED @@ -88,13 +93,34 @@ def unliteral_field(field): return field.replace("\\.", ".") +def tail_field(field): + """ + RETURN THE FIRST STEP IN PATH, ALONG WITH THE REMAINING TAIL + IN (first, rest) PAIR + """ + if field == "." or field==None: + return ".", "." + elif "." in field: + if "\\." in field: + path = field.replace("\\.", "\a").split(".", 1) + if len(path) == 1: + return path[0].replace("\a", "."), "." + else: + return tuple(k.replace("\a", ".") for k in path) + else: + return field.split(".", 1) + else: + return field, "." + + + def split_field(field): """ RETURN field AS ARRAY OF DOT-SEPARATED FIELDS """ if field == "." or field==None: return [] - elif isinstance(field, text_type) and "." in field: + elif is_text(field) and "." in field: if field.startswith(".."): remainder = field.lstrip(".") back = len(field) - len(remainder) - 1 @@ -105,14 +131,17 @@ def split_field(field): return [field] -def join_field(field): +def join_field(path): """ RETURN field SEQUENCE AS STRING """ - potent = [f for f in field if f != "."] - if not potent: - return "." - return ".".join([f.replace(".", "\\.") for f in potent]) + output = ".".join([f.replace(".", "\\.") for f in path if f != None]) + return output if output else "." + + # potent = [f for f in path if f != "."] + # if not potent: + # return "." + # return ".".join([f.replace(".", "\\.") for f in potent]) def concat_field(prefix, suffix): @@ -132,8 +161,16 @@ def startswith_field(field, prefix): """ RETURN True IF field PATH STRING STARTS WITH prefix PATH STRING """ - if prefix == ".": + if prefix == None: + return False + if prefix.startswith("."): return True + # f_back = len(field) - len(field.strip(".")) + # p_back = len(prefix) - len(prefix.strip(".")) + # if f_back > p_back: + # return False + # else: + # return True if field.startswith(prefix): if len(field) == len(prefix) or field[len(prefix)] == ".": @@ -164,34 +201,26 @@ def relative_field(field, parent): def hash_value(v): - if isinstance(v, (set, tuple, list)): + if is_many(v): return hash(tuple(hash_value(vv) for vv in v)) - elif not isinstance(v, Mapping): + elif _get(v, CLASS) not in data_types: return hash(v) else: return hash(tuple(sorted(hash_value(vv) for vv in v.values()))) - -def _setdefault(obj, key, value): - """ - DO NOT USE __dict__.setdefault(obj, key, value), IT DOES NOT CHECK FOR obj[key] == None - """ - v = obj.get(key) - if v == None: - obj[key] = value - return value - return v - - def set_default(*params): """ - INPUT dicts IN PRIORITY ORDER UPDATES FIRST dict WITH THE MERGE RESULT, WHERE MERGE RESULT IS DEFINED AS: FOR EACH LEAF, RETURN THE HIGHEST PRIORITY LEAF VALUE + + RECURSIVE VERSION OF params[0].update(*reversed(params)); + + :param params: dicts IN PRIORITY ORDER, FIRST IS HIGHES PRIORITY + :return: FIRST dict OR NEW dict WITH PROPERTIES SET """ p0 = params[0] - agg = p0 if p0 or isinstance(p0, Mapping) else {} + agg = p0 if p0 or _get(p0, CLASS) in data_types else {} for p in params[1:]: p = unwrap(p) if p is None: @@ -207,18 +236,21 @@ def _all_default(d, default, seen=None): """ if default is None: return - if isinstance(default, Data): + if _get(default, CLASS) is Data: default = object.__getattribute__(default, SLOT) # REACH IN AND GET THE dict # Log = _late_import() - # Log.error("strictly dict (or object) allowed: got {{type}}", type=default.__class__.__name__) + # Log.error("strictly dict (or object) allowed: got {{type}}", type=_get(default, CLASS).__name__) for k, default_value in default.items(): default_value = unwrap(default_value) # TWO DIFFERENT Dicts CAN SHARE id() BECAUSE THEY ARE SHORT LIVED - existing_value = _get_attr(d, [k]) + if is_data(d): + existing_value = d.get(k) + else: + existing_value = _get_attr(d, [k]) if existing_value == None: if default_value != None: - if isinstance(default_value, Mapping): + if _get(default_value, CLASS) in data_types: df = seen.get(id(default_value)) if df is not None: _set_attr(d, [k], df) @@ -234,10 +266,10 @@ def _all_default(d, default, seen=None): except Exception as e: if PATH_NOT_FOUND not in e: get_logger().error("Can not set attribute {{name}}", name=k, cause=e) - elif isinstance(existing_value, list) or isinstance(default_value, list): + elif is_list(existing_value) or is_list(default_value): _set_attr(d, [k], None) _set_attr(d, [k], listwrap(existing_value) + listwrap(default_value)) - elif (hasattr(existing_value, "__setattr__") or isinstance(existing_value, Mapping)) and isinstance(default_value, Mapping): + elif (hasattr(existing_value, "__setattr__") or _get(existing_value, CLASS) in data_types) and _get(default_value, CLASS) in data_types: df = seen.get(id(default_value)) if df is not None: _set_attr(d, [k], df) @@ -246,7 +278,7 @@ def _all_default(d, default, seen=None): _all_default(existing_value, default_value, seen) -def _getdefault(obj, key): +def _get_dict_default(obj, key): """ obj MUST BE A DICT key IS EXPECTED TO BE LITERAL (NO ESCAPING) @@ -258,7 +290,28 @@ def _getdefault(obj, key): pass try: - return getattr(obj, key) + if float(key) == round(float(key), 0): + return obj[int(key)] + except Exception as f: + pass + + return NullType(obj, key) + + +def _getdefault(obj, key): + """ + obj ANY OBJECT + key IS EXPECTED TO BE LITERAL (NO ESCAPING) + TRY BOTH ATTRIBUTE AND ITEM ACCESS, OR RETURN Null + """ + try: + return obj[key] + except Exception as f: + pass + + try: + if obj.__class__ is not dict: + return getattr(obj, key) except Exception as f: pass @@ -272,7 +325,7 @@ def _getdefault(obj, key): # TODO: FIGURE OUT WHY THIS WAS EVER HERE (AND MAKE A TEST) # try: - # return eval("obj."+text_type(key)) + # return eval("obj."+text(key)) # except Exception as f: # pass return NullType(obj, key) @@ -384,14 +437,15 @@ def _set_attr(obj_, path, value): # ACTUAL SETTING OF VALUE try: old_value = _get_attr(obj, [attr_name]) - if old_value == None: + old_type = _get(old_value, CLASS) + if old_value == None or old_type in (bool, int, float, text, binary_type): old_value = None new_value = value elif value == None: new_value = None else: - new_value = old_value.__class__(value) # TRY TO MAKE INSTANCE OF SAME CLASS - except Exception as e: + new_value = _get(old_value, CLASS)(value) # TRY TO MAKE INSTANCE OF SAME CLASS + except Exception: old_value = None new_value = value @@ -403,11 +457,11 @@ def _set_attr(obj_, path, value): obj[attr_name] = new_value return old_value except Exception as f: - get_logger().error(PATH_NOT_FOUND, cause=e) + get_logger().error(PATH_NOT_FOUND, cause=[f, e]) def lower_match(value, candidates): - return [v for v in candidates if v.lower()==value.lower()] + return [v for v in candidates if v.lower() == value.lower()] def wrap(v): @@ -417,9 +471,9 @@ def wrap(v): :return: Data INSTANCE """ - type_ = v.__class__ + type_ = _get(v, CLASS) - if type_ is dict: + if type_ in (dict, OrderedDict): m = object.__new__(Data) _set(m, SLOT, v) return m @@ -443,10 +497,12 @@ def wrap_leaves(value): def _wrap_leaves(value): if value == None: return None - if isinstance(value, (text_type, binary_type, int, float)): + + class_ = _get(value, CLASS) + if class_ in (text, binary_type, int, float): return value - if isinstance(value, Mapping): - if isinstance(value, Data): + if class_ in data_types: + if class_ is Data: value = unwrap(value) output = {} @@ -455,7 +511,7 @@ def _wrap_leaves(value): if key == "": get_logger().error("key is empty string. Probably a bad idea") - if isinstance(key, binary_type): + if is_binary(key): key = key.decode("utf8") d = output @@ -487,17 +543,19 @@ def _wrap_leaves(value): def unwrap(v): - _type = _get(v, "__class__") - if _type is Data: + if v is None: + return None + _type = _get(v, CLASS) + if _type is NullType: + return None + elif _type is Data: d = _get(v, SLOT) return d elif _type is FlatList: return v.list - elif _type is NullType: - return None elif _type is DataObject: - d = _get(v, "_obj") - if isinstance(d, Mapping): + d = _get(v, OBJ) + if _get(d, CLASS) in data_types: return d else: return v @@ -514,7 +572,7 @@ def listwrap(value): value -> [value] [...] -> [...] (unchanged list) - ##MOTIVATION## + ## MOTIVATION ## OFTEN IT IS NICE TO ALLOW FUNCTION PARAMETERS TO BE ASSIGNED A VALUE, OR A list-OF-VALUES, OR NULL. CHECKING FOR WHICH THE CALLER USED IS TEDIOUS. INSTEAD WE CAST FROM THOSE THREE CASES TO THE SINGLE CASE @@ -537,9 +595,9 @@ def listwrap(value): """ if value == None: return FlatList() - elif isinstance(value, list): + elif is_list(value): return wrap(value) - elif isinstance(value, set): + elif is_many(value): return wrap(list(value)) else: return wrap([unwrap(value)]) @@ -548,7 +606,7 @@ def unwraplist(v): """ LISTS WITH ZERO AND ONE element MAP TO None AND element RESPECTIVELY """ - if isinstance(v, list): + if is_list(v): if len(v) == 0: return None elif len(v) == 1: @@ -563,12 +621,18 @@ def tuplewrap(value): """ INTENDED TO TURN lists INTO tuples FOR USE AS KEYS """ - if isinstance(value, (list, set, tuple) + generator_types): - return tuple(tuplewrap(v) if isinstance(v, (list, tuple)) else v for v in value) + if is_many(value): + return tuple(tuplewrap(v) if is_sequence(v) else v for v in value) return unwrap(value), +from mo_dots.datas import Data, SLOT, data_types, is_data from mo_dots.nones import Null, NullType -from mo_dots.datas import Data, SLOT -from mo_dots.lists import FlatList +from mo_dots.lists import FlatList, is_list, is_sequence, is_container, is_many from mo_dots.objects import DataObject + +# EXPORT +import mo_dots.nones as temp +temp.wrap = wrap +temp.is_sequence = is_sequence +del temp diff --git a/vendor/mo_dots/datas.py b/vendor/mo_dots/datas.py index c9db542..6a401f8 100644 --- a/vendor/mo_dots/datas.py +++ b/vendor/mo_dots/datas.py @@ -4,21 +4,17 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from collections import MutableMapping, Mapping -from copy import deepcopy +from copy import copy, deepcopy from decimal import Decimal -from mo_future import text_type, PY2, iteritems, none_type, generator_types, long - -from mo_dots import _getdefault, hash_value, literal_field, coalesce, listwrap, get_logger -from mo_dots.lists import FlatList +from mo_dots import _getdefault, coalesce, get_logger, hash_value, listwrap, literal_field +from mo_dots.utils import CLASS +from mo_future import generator_types, iteritems, long, none_type, text, MutableMapping, OrderedDict _get = object.__getattribute__ _set = object.__setattr__ @@ -27,7 +23,7 @@ SLOT = str("_internal_dict") DEBUG = False -class Data(MutableMapping): +class Data(object): """ Please see README.md """ @@ -46,10 +42,11 @@ class Data(MutableMapping): else: if args: args0 = args[0] - if isinstance(args0, Data): - _set(self, SLOT, _get(args0, SLOT)) - elif isinstance(args0, dict): + class_ = _get(args0, CLASS) + if class_ is dict: _set(self, SLOT, args0) + elif class_ is Data: + _set(self, SLOT, _get(args0, SLOT)) else: _set(self, SLOT, dict(args0)) elif kwargs: @@ -59,21 +56,21 @@ class Data(MutableMapping): def __bool__(self): d = self._internal_dict - if isinstance(d, dict): + if _get(d, CLASS) is dict: return bool(d) else: return d != None def __nonzero__(self): d = self._internal_dict - if isinstance(d, dict): + if _get(d, CLASS) is dict: return True if d else False else: return d != None def __contains__(self, item): value = Data.__getitem__(self, item) - if isinstance(value, Mapping) or value: + if _get(value, CLASS) in data_types or value: return True return False @@ -86,20 +83,20 @@ class Data(MutableMapping): return Null if key == ".": output = self._internal_dict - if isinstance(output, Mapping): + if _get(output, CLASS) in data_types: return self else: return output - key = text_type(key) + key = text(key) d = self._internal_dict if key.find(".") >= 0: seq = _split_field(key) for n in seq: - if isinstance(d, NullType): + if _get(d, CLASS) is NullType: d = NullType(d, n) # OH DEAR, Null TREATS n AS PATH, NOT LITERAL - elif isinstance(d, list): + elif is_list(d): d = [_getdefault(dd, n) for dd in d] else: d = _getdefault(d, n) # EVERYTHING ELSE TREATS n AS LITERAL @@ -148,12 +145,13 @@ class Data(MutableMapping): d[seq[-1]] = value return self except Exception as e: - raise e + from mo_logs import Log + Log.error("can not set key={{key}}", key=key, cause=e) def __getattr__(self, key): d = self._internal_dict v = d.get(key) - t = v.__class__ + t = _get(v, CLASS) # OPTIMIZED wrap() if t is dict: @@ -188,6 +186,48 @@ class Data(MutableMapping): def __iadd__(self, other): return _iadd(self, other) + def __or__(self, other): + """ + RECURSIVE COALESCE OF DATA PROPERTIES + """ + if not _get(other, CLASS) in data_types: + get_logger().error("Expecting Data") + + d = self._internal_dict + output = Data(**d) + output.__ior__(other) + return output + + def __ror__(self, other): + """ + RECURSIVE COALESCE OF DATA PROPERTIES + """ + if not _get(other, CLASS) in data_types: + get_logger().error("Expecting Data") + + return wrap(other).__or__(self) + + def __ior__(self, other): + """ + RECURSIVE COALESCE OF DATA PROPERTIES + """ + if not _get(other, CLASS) in data_types: + get_logger().error("Expecting Data") + d = self._internal_dict + for ok, ov in other.items(): + if ov == None: + continue + + sv = d.get(ok) + if sv == None: + d[ok] = ov + elif isinstance(sv, Data): + sv |= ov + elif is_data(sv): + wv = object.__new__(Data) + _set(wv, SLOT, sv) + wv |= ov + return self def __hash__(self): d = self._internal_dict @@ -198,13 +238,13 @@ class Data(MutableMapping): return True d = self._internal_dict - if not isinstance(d, dict): + if _get(d, CLASS) is not dict: return d == other if not d and other == None: return False - if not isinstance(other, Mapping): + if _get(other, CLASS) not in data_types: return False e = unwrap(other) for k, v in d.items(): @@ -224,7 +264,7 @@ class Data(MutableMapping): def items(self): d = self._internal_dict - return [(k, wrap(v)) for k, v in d.items() if v != None or isinstance(v, Mapping)] + return [(k, wrap(v)) for k, v in d.items() if v != None or _get(v, CLASS) in data_types] def leaves(self, prefix=None): """ @@ -253,11 +293,18 @@ class Data(MutableMapping): return dict.__len__(d) def copy(self): - return Data(**self) + d = self._internal_dict + if _get(d, CLASS) is dict: + return Data(**d) + else: + return copy(d) def __copy__(self): d = self._internal_dict - return Data(**d) + if _get(d, CLASS) is dict: + return Data(**self) + else: + return copy(d) def __deepcopy__(self, memo): d = self._internal_dict @@ -276,7 +323,7 @@ class Data(MutableMapping): d.pop(seq[-1], None) def __delattr__(self, key): - key = text_type(key) + key = text(key) d = self._internal_dict d.pop(key, None) @@ -291,6 +338,10 @@ class Data(MutableMapping): except Exception: return "{}" + def __dir__(self): + d = self._internal_dict + return d.keys() + def __repr__(self): try: return "Data("+dict.__repr__(self._internal_dict)+")" @@ -298,6 +349,9 @@ class Data(MutableMapping): return "Data()" +MutableMapping.register(Data) + + def leaves(value, prefix=None): """ LIKE items() BUT RECURSIVE, AND ONLY FOR THE LEAVES (non dict) VALUES @@ -311,7 +365,7 @@ def leaves(value, prefix=None): output = [] for k, v in value.items(): try: - if isinstance(v, Mapping): + if _get(v, CLASS) in data_types: output.extend(leaves(v, prefix=prefix + literal_field(k) + ".")) else: output.append((prefix + literal_field(k), unwrap(v))) @@ -327,213 +381,16 @@ def _split_field(field): return [k.replace("\a", ".") for k in field.replace("\\.", "\a").split(".")] -class _DictUsingSelf(dict): - - def __init__(self, **kwargs): - """ - CALLING Data(**something) WILL RESULT IN A COPY OF something, WHICH - IS UNLIKELY TO BE USEFUL. USE wrap() INSTEAD - """ - dict.__init__(self) - - def __bool__(self): - return True - - def __getitem__(self, key): - if key == None: - return Null - if isinstance(key, str): - key = key.decode("utf8") - - d=self - if key.find(".") >= 0: - seq = _split_field(key) - for n in seq: - d = _getdefault(self, n) - return wrap(d) - else: - o = dict.get(d, None) - - if o == None: - return NullType(d, key) - return wrap(o) - - def __setitem__(self, key, value): - if key == "": - get_logger().error("key is empty string. Probably a bad idea") - if isinstance(key, str): - key = key.decode("utf8") - d=self - try: - value = unwrap(value) - if key.find(".") == -1: - if value is None: - dict.pop(d, key, None) - else: - dict.__setitem__(d, key, value) - return self - - seq = _split_field(key) - for k in seq[:-1]: - d = _getdefault(d, k) - if value == None: - dict.pop(d, seq[-1], None) - else: - dict.__setitem__(d, seq[-1], value) - return self - except Exception as e: - raise e - - def __getattr__(self, key): - if isinstance(key, str): - ukey = key.decode("utf8") - else: - ukey = key - - d = self - o = dict.get(d, ukey, None) - if o == None: - return NullType(d, ukey) - return wrap(o) - - def __setattr__(self, key, value): - if isinstance(key, str): - ukey = key.decode("utf8") - else: - ukey = key - - d = self - value = unwrap(value) - if value is None: - dict.pop(d, key, None) - else: - dict.__setitem__(d, ukey, value) - return self - - def __hash__(self): - return hash_value(self) - - def __eq__(self, other): - if self is other: - return True - - d = self - if not d and other == None: - return True - - if not isinstance(other, Mapping): - return False - e = unwrap(other) - for k, v in dict.items(d): - if e.get(k) != v: - return False - for k, v in e.items(): - if dict.get(d, k, None) != v: - return False - return True - - def __ne__(self, other): - return not self.__eq__(other) - - def get(self, key, default=None): - return wrap(dict.get(self, key, default)) - - def items(self): - return [(k, wrap(v)) for k, v in dict.items(self) if v != None or isinstance(v, Mapping)] - - def leaves(self, prefix=None): - """ - LIKE items() BUT RECURSIVE, AND ONLY FOR THE LEAVES (non dict) VALUES - """ - prefix = coalesce(prefix, "") - output = [] - for k, v in self.items(): - if isinstance(v, Mapping): - output.extend(wrap(v).leaves(prefix=prefix + literal_field(k) + ".")) - else: - output.append((prefix + literal_field(k), v)) - return output - - if PY2: - def iteritems(self): - for k, v in dict.iteritems(self): - yield k, wrap(v) - else: - def iteritems(self): - for k, v in dict.items(self): - yield k, wrap(v) - - - def keys(self): - return set(dict.keys(self)) - - def values(self): - return listwrap(dict.values(self)) - - def clear(self): - get_logger().error("clear() not supported") - - def __len__(self): - d = self._internal_dict - return d.__len__() - - def copy(self): - return Data(**self) - - def __copy__(self): - return Data(**self) - - def __deepcopy__(self, memo): - return wrap(dict.__deepcopy__(self, memo)) - - def __delitem__(self, key): - if isinstance(key, str): - key = key.decode("utf8") - - if key.find(".") == -1: - dict.pop(self, key, None) - return - - d = self - seq = _split_field(key) - for k in seq[:-1]: - d = d[k] - d.pop(seq[-1], None) - - def __delattr__(self, key): - if isinstance(key, str): - key = key.decode("utf8") - - dict.pop(self, key, None) - - def setdefault(self, k, d=None): - if self[k] == None: - self[k] = d - return self - - def __str__(self): - try: - return dict.__str__(self) - except Exception as e: - return "{}" - - def __repr__(self): - try: - return "Data("+dict.__repr__(self)+")" - except Exception as e: - return "Data()" - - def _str(value, depth): """ FOR DEBUGGING POSSIBLY RECURSIVE STRUCTURES """ output = [] - if depth >0 and isinstance(value, Mapping): + if depth >0 and _get(value, CLASS) in data_types: for k, v in value.items(): output.append(str(k) + "=" + _str(v, depth - 1)) return "{" + ",\n".join(output) + "}" - elif depth >0 and isinstance(value, list): + elif depth >0 and is_list(value): for v in value: output.append(_str(v, depth-1)) return "[" + ",\n".join(output) + "]" @@ -542,47 +399,76 @@ def _str(value, depth): def _iadd(self, other): - if not isinstance(other, Mapping): - get_logger().error("Expecting a Mapping") + """ + RECURSIVE ADDITION OF DATA PROPERTIES + * LISTS ARE CONCATENATED + * SETS ARE UNIONED + * NUMBERS ARE ADDED + """ + + if not _get(other, CLASS) in data_types: + get_logger().error("Expecting Data") d = unwrap(self) for ok, ov in other.items(): sv = d.get(ok) if sv == None: d[ok] = deepcopy(ov) elif isinstance(ov, (Decimal, float, long, int)): - if isinstance(sv, Mapping): + if _get(sv, CLASS) in data_types: get_logger().error( "can not add {{stype}} with {{otype}", - stype=sv.__class__.__name__, - otype=ov.__class__.__name__ + stype=_get(sv, CLASS).__name__, + otype=_get(ov, CLASS).__name__ ) - elif isinstance(sv, list): + elif is_list(sv): d[ok].append(ov) else: d[ok] = sv + ov - elif isinstance(ov, list): + elif is_list(ov): d[ok] = listwrap(sv) + ov - elif isinstance(ov, Mapping): - if isinstance(sv, Mapping): + elif _get(ov, CLASS) in data_types: + if _get(sv, CLASS) in data_types: _iadd(sv, ov) - elif isinstance(sv, list): + elif is_list(sv): d[ok].append(ov) else: get_logger().error( "can not add {{stype}} with {{otype}", - stype=sv.__class__.__name__, - otype=ov.__class__.__name__ + stype=_get(sv, CLASS).__name__, + otype=_get(ov, CLASS).__name__ ) else: - if isinstance(sv, Mapping): + if _get(sv, CLASS) in data_types: get_logger().error( "can not add {{stype}} with {{otype}", - stype=sv.__class__.__name__, - otype=ov.__class__.__name__ + stype=_get(sv, CLASS).__name__, + otype=_get(ov, CLASS).__name__ ) else: d[ok].append(ov) return self + +data_types = (Data, dict, OrderedDict) # TYPES TO HOLD DATA + + +def register_data(type_): + """ + :param type_: ADD OTHER TYPE THAT HOLDS DATA + :return: + """ + global data_types + data_types = tuple(set(data_types + (type_,))) + + +def is_data(d): + """ + :param d: + :return: True IF d IS A TYPE THAT HOLDS DATA + """ + return d.__class__ in data_types + + from mo_dots.nones import Null, NullType +from mo_dots.lists import is_list, FlatList from mo_dots import unwrap, wrap diff --git a/vendor/mo_dots/lists.py b/vendor/mo_dots/lists.py index 6bb860e..dddf373 100644 --- a/vendor/mo_dots/lists.py +++ b/vendor/mo_dots/lists.py @@ -4,23 +4,25 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals +import types from copy import deepcopy -from mo_dots import wrap, unwrap, coalesce +from mo_future import generator_types, text, first + +from mo_dots import CLASS, coalesce, unwrap, wrap from mo_dots.nones import Null +LIST = text("list") + _get = object.__getattribute__ -_get_list = lambda self: _get(self, "list") +_get_list = lambda self: _get(self, LIST) _set = object.__setattr__ _emit_slice_warning = True - _datawrap = None Log = None @@ -30,6 +32,7 @@ def _late_import(): global Log from mo_dots.objects import datawrap as _datawrap + try: from mo_logs import Log except Exception: @@ -44,6 +47,7 @@ class FlatList(list): ENCAPSULATES FLAT SLICES ([::]) FOR USE IN WINDOW FUNCTIONS https://github.com/klahnakoski/mo-dots/tree/dev/docs#flatlist-is-flat """ + EMPTY = None def __init__(self, vals=None): @@ -51,18 +55,20 @@ class FlatList(list): # list.__init__(self) if vals == None: self.list = [] - elif isinstance(vals, FlatList): + elif vals.__class__ is FlatList: self.list = vals.list else: self.list = vals def __getitem__(self, index): - if isinstance(index, slice): + if _get(index, CLASS) is slice: # IMPLEMENT FLAT SLICES (for i not in range(0, len(self)): assert self[i]==None) if index.step is not None: if not Log: _late_import() - Log.error("slice step must be None, do not know how to deal with values") + Log.error( + "slice step must be None, do not know how to deal with values" + ) length = len(_get_list(self)) i = index.start @@ -77,7 +83,7 @@ class FlatList(list): j = max(min(j, length), 0) return FlatList(_get_list(self)[i:j]) - if index < 0 or len(_get_list(self)) <= index: + if not isinstance(index, int) or index < 0 or len(_get_list(self)) <= index: return Null return wrap(_get_list(self)[index]) @@ -109,8 +115,9 @@ class FlatList(list): """ if not Log: _late_import() - - return FlatList(vals=[unwrap(coalesce(_datawrap(v), Null)[key]) for v in _get_list(self)]) + return FlatList( + vals=[unwrap(coalesce(_datawrap(v), Null)[key]) for v in _get_list(self)] + ) def select(self, key): if not Log: @@ -118,12 +125,16 @@ class FlatList(list): Log.error("Not supported. Use `get()`") def filter(self, _filter): - return FlatList(vals=[unwrap(u) for u in (wrap(v) for v in _get_list(self)) if _filter(u)]) + return FlatList( + vals=[unwrap(u) for u in (wrap(v) for v in _get_list(self)) if _filter(u)] + ) def __delslice__(self, i, j): if not Log: _late_import() - Log.error("Can not perform del on slice: modulo arithmetic was performed on the parameters. You can try using clear()") + Log.error( + "Can not perform del on slice: modulo arithmetic was performed on the parameters. You can try using clear()" + ) def __clear__(self): self.list = [] @@ -152,7 +163,11 @@ class FlatList(list): _emit_slice_warning = False if not Log: _late_import() - Log.warning("slicing is broken in Python 2.7: a[i:j] == a[i+len(a), j] sometimes. Use [start:stop:step] (see https://github.com/klahnakoski/pyLibrary/blob/master/pyLibrary/dot/README.md#the-slice-operator-in-python27-is-inconsistent)") + Log.warning( + "slicing is broken in Python 2.7: a[i:j] == a[i+len(a), j] sometimes. Use [start:stop:step] (see " + "https://github.com/klahnakoski/mo-dots/tree/dev/docs#the-slice-operator-in-python27-is-inconsistent" + ")" + ) return self[i:j:] def __list__(self): @@ -185,17 +200,18 @@ class FlatList(list): return wrap(_get_list(self).pop(index)) def __eq__(self, other): - if isinstance(other, FlatList): - other = _get_list(other) lst = _get_list(self) if other == None and len(lst) == 0: return True - if not isinstance(other, list): + other_class = _get(other, CLASS) + if other_class is FlatList: + other = _get_list(other) + try: + if len(lst) != len(other): + return False + return all([s == o for s, o in zip(lst, other)]) + except Exception: return False - if len(lst) != len(other): - return False - return all([s == o for s, o in zip(lst, other)]) - def __add__(self, value): if value == None: @@ -215,7 +231,7 @@ class FlatList(list): return FlatList(vals=output) def __iadd__(self, other): - if isinstance(other, list): + if is_list(other): self.extend(other) else: self.append(other) @@ -226,7 +242,7 @@ class FlatList(list): WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE RIGHT [-num:] """ if num == None: - return FlatList([_get_list(self)[-1]]) + return self if num <= 0: return Null @@ -237,7 +253,7 @@ class FlatList(list): NOT REQUIRED, BUT EXISTS AS OPPOSITE OF right() """ if num == None: - return FlatList([_get_list(self)[0]]) + return self if num <= 0: return Null @@ -248,7 +264,7 @@ class FlatList(list): WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE LEFT [:-num:] """ if num == None: - return FlatList([_get_list(self)[:-1:]]) + return self if num <= 0: return FlatList.EMPTY @@ -259,7 +275,7 @@ class FlatList(list): NOT REQUIRED, EXISTS AS OPPOSITE OF not_right() """ if num == None: - return FlatList([_get_list(self)[-1]]) + return self if num <= 0: return self @@ -281,4 +297,67 @@ class FlatList(list): return FlatList([oper(v) for v in _get_list(self) if v != None]) +def last(values): + if is_many(values): + if not values: + return Null + if isinstance(values, FlatList): + return values.last() + elif is_list(values): + if not values: + return Null + return values[-1] + elif is_sequence(values): + l = Null + for i in values: + l = i + return l + else: + return first(values) + + return values + + FlatList.EMPTY = Null + +list_types = (list, FlatList) +container_types = (list, FlatList, set) +sequence_types = (list, FlatList, tuple) + generator_types +many_types = tuple(set(list_types + container_types + sequence_types)) + +not_many_names = ("str", "unicode", "binary", "NullType", "NoneType", "dict", "Data") # ITERATORS THAT ARE CONSIDERED PRIMITIVE + + +def is_list(l): + # ORDERED, AND CAN CHANGE CONTENTS + return l.__class__ in list_types + + +def is_container(l): + # CAN ADD AND REMOVE ELEMENTS + return l.__class__ in container_types + + +def is_sequence(l): + # HAS AN ORDER, INCLUDES GENERATORS + return l.__class__ in sequence_types + + +def is_many(value): + # REPRESENTS MULTIPLE VALUES + # TODO: CLEAN UP THIS LOGIC + # THIS IS COMPLICATED BECAUSE I AM UNSURE ABOUT ALL THE "PRIMITIVE TYPES" + # I WOULD LIKE TO POSITIVELY CATCH many_types, BUT MAYBE IT IS EASIER TO DETECT: Iterable, BUT NOT PRIMITIVE + # UNTIL WE HAVE A COMPLETE LIST, WE KEEP ALL THIS warning() CODE + global many_types + type_ = value.__class__ + if type_ in many_types: + return True + + if issubclass(type_, types.GeneratorType): + if not Log: + _late_import() + many_types = many_types + (type_,) + Log.warning("is_many() can not detect generator {{type}}", type=type_.__name__) + return True + return False diff --git a/vendor/mo_dots/nones.py b/vendor/mo_dots/nones.py index 225b3ad..f9e1280 100644 --- a/vendor/mo_dots/nones.py +++ b/vendor/mo_dots/nones.py @@ -4,19 +4,21 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from mo_dots import _setdefault, wrap, split_field -from mo_future import text_type, binary_type +from mo_future import is_binary, text, none_type +from mo_dots.utils import CLASS, OBJ + +wrap = None +is_sequence = None _get = object.__getattribute__ _set = object.__setattr__ _zero_list = [] +_null_hash = hash(None) class NullType(object): @@ -35,7 +37,7 @@ class NullType(object): key - THE dict ITEM REFERENCE (DOT(.) IS NOT ESCAPED) """ d = _get(self, "__dict__") - d["_obj"] = obj + d[OBJ] = obj d["__key__"] = key def __bool__(self): @@ -45,7 +47,7 @@ class NullType(object): return False def __add__(self, other): - if isinstance(other, list): + if is_sequence(other): return other return Null @@ -58,7 +60,7 @@ class NullType(object): def __iadd__(self, other): try: d = _get(self, "__dict__") - o = d["_obj"] + o = d[OBJ] if o is None: return self key = d["__key__"] @@ -108,10 +110,16 @@ class NullType(object): return Null def __eq__(self, other): - return other == None or isinstance(other, NullType) + class_ = _get(other, CLASS) + if class_ in (none_type, NullType): + return True + elif class_ is list and not other: + return True + else: + return other == None def __ne__(self, other): - return other is not None and not isinstance(other, NullType) + return other is not None and _get(other, CLASS) is not NullType and other != None def __or__(self, other): if other is True: @@ -153,7 +161,7 @@ class NullType(object): def __getitem__(self, key): if isinstance(key, slice): return Null - elif isinstance(key, binary_type): + elif is_binary(key): key = key.decode("utf8") elif isinstance(key, int): return NullType(self, key) @@ -165,14 +173,14 @@ class NullType(object): return output def __getattr__(self, key): - key = text_type(key) + key = text(key) d = _get(self, "__dict__") - o = wrap(d["_obj"]) + o = wrap(d[OBJ]) k = d["__key__"] if o is None: return Null - elif isinstance(o, NullType): + elif _get(o, CLASS) is NullType: return NullType(self, key) v = o.get(k) if v == None: @@ -184,10 +192,10 @@ class NullType(object): Log.error("not expected", cause=e) def __setattr__(self, key, value): - key = text_type(key) + key = text(key) d = _get(self, "__dict__") - o = wrap(d["_obj"]) + o = wrap(d[OBJ]) k = d["__key__"] seq = [k] + [key] @@ -195,7 +203,7 @@ class NullType(object): def __setitem__(self, key, value): d = _get(self, "__dict__") - o = d["_obj"] + o = d[OBJ] if o is None: return k = d["__key__"] @@ -225,7 +233,7 @@ class NullType(object): return "Null" def __hash__(self): - return hash(None) + return _null_hash Null = NullType() # INSTEAD OF None!!! @@ -240,9 +248,9 @@ def _assign_to_null(obj, path, value, force=True): try: if obj is Null: return - if isinstance(obj, NullType): + if _get(obj, CLASS) is NullType: d = _get(obj, "__dict__") - o = d["_obj"] + o = d[OBJ] p = d["__key__"] s = [p]+path return _assign_to_null(o, s, value) @@ -276,3 +284,16 @@ def _split_field(field): return [] else: return [k.replace("\a", ".") for k in field.replace("\\.", "\a").split(".")] + + +def _setdefault(obj, key, value): + """ + DO NOT USE __dict__.setdefault(obj, key, value), IT DOES NOT CHECK FOR obj[key] == None + """ + v = obj.get(key) + if v == None: + obj[key] = value + return value + return v + + diff --git a/vendor/mo_dots/objects.py b/vendor/mo_dots/objects.py index e7beee0..7b5bd5f 100644 --- a/vendor/mo_dots/objects.py +++ b/vendor/mo_dots/objects.py @@ -4,19 +4,20 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals from collections import Mapping from datetime import date, datetime from decimal import Decimal -from mo_dots import wrap, unwrap, Data, FlatList, NullType, get_attr, set_attr, SLOT -from mo_future import text_type, binary_type, get_function_defaults, get_function_arguments, none_type, generator_types +from mo_future import binary_type, generator_types, get_function_arguments, get_function_defaults, none_type, text + +from mo_dots import Data, FlatList, NullType, SLOT, get_attr, set_attr, unwrap, wrap +from mo_dots.datas import register_data +from mo_dots.utils import CLASS, OBJ _get = object.__getattribute__ _set = object.__setattr__ @@ -29,31 +30,31 @@ class DataObject(Mapping): """ def __init__(self, obj): - _set(self, "_obj", obj) + _set(self, OBJ, obj) def __getattr__(self, item): - obj = _get(self, "_obj") + obj = _get(self, OBJ) output = get_attr(obj, item) return datawrap(output) def __setattr__(self, key, value): - obj = _get(self, "_obj") + obj = _get(self, OBJ) set_attr(obj, key, value) def __getitem__(self, item): - obj = _get(self, "_obj") + obj = _get(self, OBJ) output = get_attr(obj, item) return datawrap(output) def keys(self): - obj = _get(self, "_obj") + obj = _get(self, OBJ) try: return obj.__dict__.keys() except Exception as e: raise e def items(self): - obj = _get(self, "_obj") + obj = _get(self, OBJ) try: return obj.__dict__.items() except Exception as e: @@ -64,7 +65,7 @@ class DataObject(Mapping): ] def iteritems(self): - obj = _get(self, "_obj") + obj = _get(self, OBJ) try: return obj.__dict__.iteritems() except Exception as e: @@ -82,43 +83,40 @@ class DataObject(Mapping): return (k for k in self.keys()) def __unicode__(self): - obj = _get(self, "_obj") - return text_type(obj) + obj = _get(self, OBJ) + return text(obj) def __str__(self): - obj = _get(self, "_obj") + obj = _get(self, OBJ) return str(obj) def __len__(self): - obj = _get(self, "_obj") + obj = _get(self, OBJ) return len(obj) def __call__(self, *args, **kwargs): - obj = _get(self, "_obj") + obj = _get(self, OBJ) return obj(*args, **kwargs) +register_data(DataObject) + + def datawrap(v): - type_ = _get(v, "__class__") + type_ = _get(v, CLASS) if type_ is dict: m = Data() _set(m, SLOT, v) # INJECT m.__dict__=v SO THERE IS NO COPY return m - elif type_ is Data: - return v - elif type_ is DataObject: - return v - elif type_ is none_type: - return None # So we allow `is None` elif type_ is list: return FlatList(v) + elif type_ in (Data, DataObject, none_type, FlatList, text, binary_type, int, float, Decimal, datetime, date, NullType, none_type): + return v elif type_ in generator_types: return (wrap(vv) for vv in v) - elif isinstance(v, (text_type, binary_type, int, float, Decimal, datetime, date, Data, FlatList, NullType, none_type)): + elif isinstance(v, (text, binary_type, int, float, Decimal, datetime, date, FlatList, NullType, Mapping, none_type)): return v - elif isinstance(v, Mapping): - return DataObject(v) elif hasattr(v, "__data__"): return v.__data__() else: @@ -156,7 +154,7 @@ def params_pack(params, *args): settings = {} for a in args: for k, v in a.items(): - k = text_type(k) + k = text(k) if k in settings: continue settings[k] = v diff --git a/vendor/mo_dots/utils.py b/vendor/mo_dots/utils.py index acf4071..6687ac4 100644 --- a/vendor/mo_dots/utils.py +++ b/vendor/mo_dots/utils.py @@ -4,17 +4,18 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import importlib import sys -from mo_future import PY2 +from mo_future import PY2, text + +OBJ = text("_obj") +CLASS = text("__class__") _Log = None diff --git a/vendor/mo_fabric/__init__.py b/vendor/mo_fabric/__init__.py index 449628d..84b4128 100644 --- a/vendor/mo_fabric/__init__.py +++ b/vendor/mo_fabric/__init__.py @@ -6,28 +6,26 @@ # # Author: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals +from datetime import datetime import os import sys -from contextlib import contextmanager -from datetime import datetime -from fabric2 import Config -from fabric2 import Connection as _Connection -from mo_math.randoms import Random +from fabric2 import Config, Connection as _Connection, Result +from mo_logs.exceptions import Except -from mo_dots import set_default, unwrap, wrap -from mo_files import File, TempFile -from mo_future import text_type +from mo_dots import set_default, unwrap, wrap, listwrap, coalesce +from mo_files import File +from mo_future import text, is_text from mo_kwargs import override from mo_logs import Log, exceptions, machine_metadata +from mo_math.randoms import Random +from mo_threads import Thread +from mo_threads.threads import RegisterThread class Connection(object): - @override def __init__( self, @@ -41,59 +39,87 @@ class Connection(object): connect_kwargs=None, inline_ssh_env=None, key_filename=None, # part of connect_kwargs - kwargs=None + kwargs=None, ): - connect_kwargs = set_default({}, connect_kwargs, {"key_filename": File(key_filename).abspath}) + connect_kwargs = set_default( + {}, connect_kwargs, {"key_filename": File(key_filename).abspath} + ) + + key_filenames = listwrap(coalesce(connect_kwargs.key_filename, key_filename)) self.stdout = LogStream(host, "stdout") self.stderr = LogStream(host, "stderr") - config = Config(**unwrap(set_default({}, config, {"overrides": {"run": { - # "hide": True, - "out_stream": self.stdout, - "err_stream": self.stderr - }}}))) + config = Config(**unwrap(set_default( + {}, + config, + {"overrides": {"run": { + # "hide": True, + "out_stream": self.stdout, + "err_stream": self.stderr, + }}}, + ))) self.warn = False - self.conn = _Connection( - host, - user, - port, - config, - gateway, - forward_agent, - connect_timeout, - connect_kwargs, - inline_ssh_env - ) + cause = Except("expecting some private key to connect") + for key_file in key_filenames: + try: + connect_kwargs.key_filename=File(key_file).abspath + self.conn = _Connection( + host, + user, + port, + config, + gateway, + forward_agent, + connect_timeout, + connect_kwargs, + inline_ssh_env, + ) + self.conn.run("echo") # verify we can connect + return + except Exception as e: + cause = e + + Log.error("could not connect", cause = cause) def exists(self, path): - with TempFile() as t: - try: - result = self.conn.get(path, t.abspath) - return t.exists - except IOError: + try: + result = self.conn.run("ls " + path) + if "No such file or directory" in result: return False + else: + return True + except Exception as e: + return False def warn_only(self): """ IGNORE WARNING IN THIS CONTEXT """ - @contextmanager - def warning_set(): - old, self.warn = self.warn, True - yield - self.warn = old - return warning_set + return Warning(self) - def get(self, remote, local): - self.conn.get(remote, File(local).abspath) + def get(self, remote, local, use_sudo=False): + if self.conn.command_cwds and not remote.startswith(("/", "~")): + remote = self.conn.command_cwds[-1].rstrip("/'") + "/" + remote + + if use_sudo: + filename = "/tmp/" + Random.hex(20) + self.sudo("cp " + remote + " " + filename) + self.sudo("chmod a+r " + filename) + self.conn.get(filename, File(local).abspath) + self.sudo("rm " + filename) + else: + self.conn.get(remote, File(local).abspath) def put(self, local, remote, use_sudo=False): + if self.conn.command_cwds and not remote.startswith(("/", "~")): + remote = self.conn.command_cwds[-1].rstrip("/'") + "/" + remote + if use_sudo: - filename = "/tmp/"+Random.hex(20) + filename = "/tmp/" + Random.hex(20) self.conn.put(File(local).abspath, filename) - self.sudo("cp "+filename+" "+remote) - self.sudo("rm "+filename) + self.sudo("cp " + filename + " " + remote) + self.sudo("rm " + filename) else: self.conn.put(File(local).abspath, remote) @@ -113,62 +139,92 @@ class Connection(object): return getattr(self.conn, item) +class Warning(object): + def __init__(self, conn): + self.conn = conn + self.old = None + + def __enter__(self): + self.old, self.conn.warn = self.conn.warn, True + + def __exit__(self, exc_type, exc_val, exc_tb): + self.conn.warn = self.old + + +# EXTEND Result WITH __contains__ SO WE CAN PERFORM +# if some_text in result: +def __contains__(self, value): + return value in self.stdout or value in self.stderr + + +setattr(Result, "__contains__", __contains__) +del __contains__ + + EMPTY = str("") CR = str("\n") class LogStream(object): - def __init__(self, name, type): self.name = name self.type = type self.part_line = EMPTY def write(self, value): - lines = value.split(CR) - if len(lines) == 1: - self.part_line += lines[0] - return + with RegisterThread(name=self.name): + lines = value.split(CR) + if len(lines) == 1: + self.part_line += lines[0] + return - prefix = self.part_line - for line in lines[0:-1]: - full_line = prefix + line - note(u"{{name}} ({{type}}): {{line}}", name=self.name, type=self.type, line=full_line) - prefix = EMPTY - self.part_line = lines[-1] + prefix = self.part_line + for line in lines[0:-1]: + full_line = prefix + line + note( + "{{name}} ({{type}}): {{line}}", + name=self.name, + type=self.type, + line=full_line, + ) + prefix = EMPTY + self.part_line = lines[-1] def flush(self): pass -def note( - template, - **params -): - if not isinstance(template, text_type): +def note(template, **params): + if not is_text(template): Log.error("Log.note was expecting a unicode template") if len(template) > 10000: template = template[:10000] - log_params = wrap({ - "template": template, - "params": params, - "timestamp": datetime.utcnow(), - "machine": machine_metadata, - "context": exceptions.NOTE - }) + log_params = wrap( + { + "template": template, + "params": params, + "timestamp": datetime.utcnow(), + "machine": machine_metadata, + "context": exceptions.NOTE, + "thread": Thread.current() + } + ) if not template.startswith("\n") and template.find("\n") > -1: template = "\n" + template if Log.trace: - log_template = "{{machine.name}} (pid {{machine.pid}}) - {{timestamp|datetime}} - {{thread.name}} - \"{{location.file}}:{{location.line}}\" ({{location.method}}) - " + template.replace("{{", "{{params.") + log_template = ( + '{{machine.name}} (pid {{machine.pid}}) - {{timestamp|datetime}} - {{thread.name}} - "{{location.file}}:{{location.line}}" ({{location.method}}) - ' + + template.replace("{{", "{{params.") + ) f = sys._getframe(1) log_params.location = { "line": f.f_lineno, - "file": text_type(f.f_code.co_filename.split(os.sep)[-1]), - "method": text_type(f.f_code.co_name) + "file": text(f.f_code.co_filename.split(os.sep)[-1]), + "method": text(f.f_code.co_name), } else: log_template = "{{timestamp|datetime}} - " + template.replace("{{", "{{params.") diff --git a/vendor/mo_files/__init__.py b/vendor/mo_files/__init__.py index 2a7b921..7f7658e 100644 --- a/vendor/mo_files/__init__.py +++ b/vendor/mo_files/__init__.py @@ -5,7 +5,7 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # import base64 import io @@ -14,15 +14,14 @@ import re import shutil from datetime import datetime from mimetypes import MimeTypes -from tempfile import mkdtemp, NamedTemporaryFile +from tempfile import NamedTemporaryFile, mkdtemp -from mo_dots import get_module, coalesce, Null -from mo_future import text_type, binary_type, PY3 -from mo_logs import Log, Except -from mo_logs.exceptions import extract_stack -from mo_threads import Thread, Till - -mime = MimeTypes() +from mo_dots import Null, coalesce, get_module, is_list +from mo_files import mimetype +from mo_files.url import URL +from mo_future import PY3, binary_type, text, is_text +from mo_logs import Except, Log +from mo_logs.exceptions import get_stacktrace class File(object): @@ -42,12 +41,12 @@ class File(object): """ YOU MAY SET filename TO {"path":p, "key":k} FOR CRYPTO FILES """ - self._mime_type = mime_type - if filename == None: - Log.error(u"File must be given a filename") - elif isinstance(filename, File): + if isinstance(filename, File): return - elif isinstance(filename, (binary_type, text_type)): + + self._mime_type = mime_type + + if isinstance(filename, (binary_type, text)): try: self.key = None if filename==".": @@ -75,7 +74,7 @@ class File(object): self.buffering = buffering if suffix: - self._filename = File.add_suffix(self._filename, suffix) + self._filename = add_suffix(self._filename, suffix) @classmethod def new_instance(cls, *path): @@ -115,17 +114,11 @@ class File(object): else: return os.path.abspath(self._filename) - @staticmethod - def add_suffix(filename, suffix): + def add_suffix(self, suffix): """ ADD suffix TO THE filename (NOT INCLUDING THE FILE EXTENSION) """ - path = filename.split("/") - parts = path[-1].split(".") - i = max(len(parts) - 2, 0) - parts[i] = parts[i] + suffix - path[-1] = ".".join(parts) - return "/".join(path) + return File(add_suffix(self._filename, suffix)) @property def extension(self): @@ -151,8 +144,9 @@ class File(object): elif self.abspath.endswith(".css"): self._mime_type = "text/css" elif self.abspath.endswith(".json"): - self._mime_type = "application/json" + self._mime_type = mimetype.JSON else: + mime = MimeTypes() self._mime_type, _ = mime.guess_type(self.abspath) if not self._mime_type: self._mime_type = "application/binary" @@ -188,6 +182,12 @@ class File(object): path[-1] = ".".join(parts) return File("/".join(path)) + def add_extension(self, ext): + """ + RETURN NEW FILE WITH EXTENSION ADDED (OLD EXTENSION IS A SUFFIX) + """ + return File(self._filename + "." + text(ext)) + def set_name(self, name): """ RETURN NEW FILE WITH GIVEN EXTENSION @@ -230,15 +230,16 @@ class File(object): for num, zip_name in enumerate(zipped.namelist()): return zipped.open(zip_name).read().decode(encoding) - def read_lines(self, encoding="utf8"): with open(self._filename, "rb") as f: for line in f: yield line.decode(encoding).rstrip() def read_json(self, encoding="utf8", flexible=True, leaves=True): + from mo_json import json2value + content = self.read(encoding=encoding) - value = get_module(u"mo_json").json2value(content, flexible=flexible, leaves=leaves) + value = json2value(content, flexible=flexible, leaves=leaves) abspath = self.abspath if os.sep == "\\": abspath = "/" + abspath.replace(os.sep, "/") @@ -272,21 +273,21 @@ class File(object): if not self.parent.exists: self.parent.create() with open(self._filename, "wb") as f: - if isinstance(data, list) and self.key: + if is_list(data) and self.key: Log.error(u"list of data and keys are not supported, encrypt before sending to file") - if isinstance(data, list): + if is_list(data): pass - elif isinstance(data, (binary_type, text_type)): + elif isinstance(data, (binary_type, text)): data=[data] elif hasattr(data, "__iter__"): pass for d in data: - if not isinstance(d, text_type): + if not is_text(d): Log.error(u"Expecting unicode data only") if self.key: - from mo_math.crypto import encrypt + from mo_math.aes_crypto import encrypt f.write(encrypt(d, self.key).encode("utf8")) else: f.write(d.encode("utf8")) @@ -317,7 +318,7 @@ class File(object): if not self.parent.exists: self.parent.create() with open(self._filename, "ab") as output_file: - if not isinstance(content, text_type): + if not is_text(content): Log.error(u"expecting to write unicode only") output_file.write(content.encode(encoding)) output_file.write(b"\n") @@ -334,7 +335,7 @@ class File(object): self.parent.create() with open(self._filename, "ab") as output_file: for c in content: - if isinstance(c, str): + if not isinstance(c, text): Log.error(u"expecting to write unicode only") output_file.write(c.encode("utf8")) @@ -440,7 +441,7 @@ class TempDirectory(File): WILL BE DELETED WHEN EXITED """ def __new__(cls): - return File.__new__(cls, None) + return object.__new__(cls) def __init__(self): File.__init__(self, mkdtemp()) @@ -449,7 +450,9 @@ class TempDirectory(File): return self def __exit__(self, exc_type, exc_val, exc_tb): - Thread.run("delete dir " + self.name, delete_daemon, file=self, caller_stack=extract_stack(1)) + from mo_threads import Thread + + Thread.run("delete dir " + self.name, delete_daemon, file=self, caller_stack=get_stacktrace(1)).release() class TempFile(File): @@ -460,7 +463,9 @@ class TempFile(File): def __new__(cls, *args, **kwargs): return object.__new__(cls) - def __init__(self): + def __init__(self, filename=None): + if isinstance(filename, File): + return self.temp = NamedTemporaryFile(delete=False) self.temp.close() File.__init__(self, self.temp.name) @@ -469,8 +474,9 @@ class TempFile(File): return self def __exit__(self, exc_type, exc_val, exc_tb): - Thread.run("delete file " + self.name, delete_daemon, file=self, caller_stack=extract_stack(1)) + from mo_threads import Thread + Thread.run("delete file " + self.name, delete_daemon, file=self, caller_stack=get_stacktrace(1)).release() def _copy(from_, to_): if from_.is_directory(): @@ -557,6 +563,8 @@ def join_path(*path): def delete_daemon(file, caller_stack, please_stop): # WINDOWS WILL HANG ONTO A FILE FOR A BIT AFTER WE CLOSED IT + from mo_threads import Till + while not please_stop: try: file.delete() @@ -567,3 +575,16 @@ def delete_daemon(file, caller_stack, please_stop): Log.warning(u"problem deleting file {{file}}", file=file.abspath, cause=e) (Till(seconds=10)|please_stop).wait() + + +def add_suffix(filename, suffix): + """ + ADD suffix TO THE filename (NOT INCLUDING THE FILE EXTENSION) + """ + path = filename.split("/") + parts = path[-1].split(".") + i = max(len(parts) - 2, 0) + parts[i] = parts[i] + "." + text(suffix).strip(".") + path[-1] = ".".join(parts) + return File("/".join(path)) + diff --git a/vendor/mo_files/mimetype.py b/vendor/mo_files/mimetype.py new file mode 100644 index 0000000..0dbc51e --- /dev/null +++ b/vendor/mo_files/mimetype.py @@ -0,0 +1,4 @@ +from mo_future import text + +JSON = text("application/json") +ZIP = text("application/zip") diff --git a/vendor/mo_files/url.py b/vendor/mo_files/url.py index c208ef7..1c50e37 100644 --- a/vendor/mo_files/url.py +++ b/vendor/mo_files/url.py @@ -4,14 +4,11 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from collections import Mapping - -from mo_dots import wrap, Data, coalesce, Null -from mo_future import urlparse, text_type, PY2, unichr -from mo_json import value2json, json2value +from mo_dots import Data, Null, coalesce, is_data, is_list, wrap +from mo_future import PY2, is_text, text, unichr, urlparse, is_binary from mo_logs import Log @@ -22,7 +19,15 @@ class URL(object): [1] https://docs.python.org/3/library/urllib.parse.html """ + def __new__(cls, value, *args, **kwargs): + if isinstance(value, URL): + return value + else: + return object.__new__(cls) + def __init__(self, value, port=None, path=None, query=None, fragment=None): + if isinstance(value, URL): + return try: self.scheme = None self.host = None @@ -62,7 +67,7 @@ class URL(object): return False def __truediv__(self, other): - if not isinstance(other, text_type): + if not is_text(other): Log.error(u"Expecting text path") output = self.__copy__() output.path = output.path.rstrip('/') + "/" + other.lstrip('/') @@ -81,6 +86,9 @@ class URL(object): output.fragment = self.fragment return output + def decode(self, encoding=''): + return text(self).decode(encoding) + def __data__(self): return str(self) @@ -93,7 +101,7 @@ class URL(object): if self.port: url = url + ":" + str(self.port) if self.path: - if self.path[0] == text_type("/"): + if self.path[0] == text("/"): url += str(self.path) else: url += "/" + str(self.path) @@ -109,8 +117,10 @@ def int2hex(value, size): def hex2chr(hex): - return unichr(int(hex, 16)) - + try: + return unichr(int(hex, 16)) + except Exception as e: + raise e if PY2: _map2url = {chr(i): chr(i) for i in range(32, 128)} @@ -165,8 +175,10 @@ def url_param2value(param): output.append(c) i += 1 - output = text_type("".join(output)) + output = text("".join(output)) try: + from mo_json import json2value + return json2value(output) except Exception: pass @@ -186,7 +198,7 @@ def url_param2value(param): u = query.get(k) if u is None: query[k] = v - elif isinstance(u, list): + elif is_list(u): u += [v] else: query[k] = [u, v] @@ -202,15 +214,17 @@ def value2url_param(value): if value == None: Log.error("Can not encode None into a URL") - if isinstance(value, Mapping): + if is_data(value): + from mo_json import value2json + value_ = wrap(value) output = "&".join([ - value2url_param(k) + "=" + (value2url_param(v) if isinstance(v, text_type) else value2url_param(value2json(v))) + value2url_param(k) + "=" + (value2url_param(v) if is_text(v) else value2url_param(value2json(v))) for k, v in value_.leaves() ]) - elif isinstance(value, text_type): + elif is_text(value): output = "".join(_map2url[c] for c in value.encode('utf8')) - elif isinstance(value, str): + elif is_binary(value): output = "".join(_map2url[c] for c in value) elif hasattr(value, "__iter__"): output = ",".join(value2url_param(v) for v in value) diff --git a/vendor/mo_future/__init__.py b/vendor/mo_future/__init__.py index 567f59d..e5a6d42 100644 --- a/vendor/mo_future/__init__.py +++ b/vendor/mo_future/__init__.py @@ -4,17 +4,14 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import json import sys - PY3 = sys.version_info[0] == 3 PY2 = sys.version_info[0] == 2 @@ -31,18 +28,22 @@ boolean_type = type(True) if PY3: import itertools - import collections - from functools import cmp_to_key + from collections import OrderedDict, UserDict + from collections.abc import Callable, Iterable, Mapping, Set, MutableMapping + from functools import cmp_to_key, reduce, update_wrapper from configparser import ConfigParser from itertools import zip_longest + import builtins as __builtin__ + from builtins import input izip = zip zip_longest = itertools.zip_longest - text_type = str + text = str + text = str string_types = str binary_type = bytes - integer_types = int + integer_types = (int, ) number_types = (int, float) long = int unichr = chr @@ -55,7 +56,9 @@ if PY3: type(_gen()), type(filter(lambda x: True, [])), type({}.items()), - type({}.values()) + type({}.values()), + type(map(lambda: 0, iter([]))), + type(reversed([])) ) unichr = chr @@ -93,6 +96,27 @@ if PY3: def sort_using_key(data, key): return sorted(data, key=key) + def first(values): + try: + return iter(values).__next__() + except StopIteration: + return None + + def NEXT(_iter): + """ + RETURN next() FUNCTION, DO NOT CALL + """ + return _iter.__next__ + + def next(_iter): + return _iter.__next__() + + def is_text(t): + return t.__class__ is str + + def is_binary(b): + return b.__class__ is bytes + utf8_json_encoder = json.JSONEncoder( skipkeys=False, ensure_ascii=False, # DIFF FROM DEFAULTS @@ -104,18 +128,22 @@ if PY3: sort_keys=True # <-- IMPORTANT! sort_keys==True ).encode - UserDict = collections.UserDict -else: - import collections +else: # PY2 + from collections import Callable, Iterable, Mapping, Set, MutableMapping, OrderedDict + from functools import cmp_to_key, reduce, update_wrapper + import __builtin__ from types import GeneratorType from ConfigParser import ConfigParser from itertools import izip_longest as zip_longest from __builtin__ import zip as transpose from itertools import izip + from __builtin__ import raw_input as input - text_type = __builtin__.unicode + reduce = __builtin__.reduce + text = __builtin__.unicode + text = __builtin__.unicode string_types = (str, unicode) binary_type = str integer_types = (int, long) @@ -124,7 +152,7 @@ else: unichr = __builtin__.unichr xrange = __builtin__.xrange - generator_types = (GeneratorType,) + generator_types = (GeneratorType, type(reversed([]))) unichr = __builtin__.unichr round = __builtin__.round @@ -162,6 +190,27 @@ else: # lambda a, b: (1 if (a[0]>b[0]) else (-1 if (a[0] too_old] - - num_recent = len(recent_requests) - if num_recent >= max_requests: - space_free_at = recent_requests[0] + self.amortization_period - (please_stop | Till(till=space_free_at.unix)).wait() - continue - for _ in xrange(num_recent, max_requests): - request = self.todo.pop() - now = Date.now() - recent_requests.append(now) - self.requests.add(request) - except Exception as e: - Log.warning("failure", cause=e) - - def _cache_cleaner(self, please_stop): - while not please_stop: - now = Date.now() - too_old = now-CACHE_RETENTION - - remove = set() - with self.cache_locker: - for path, (ready, headers, response, timestamp) in self.cache: - if timestamp < too_old: - remove.add(path) - for r in remove: - del self.cache[r] - (please_stop | Till(seconds=CACHE_RETENTION.seconds / 2)).wait() - - def please_cache(self, path): - """ - :return: False if `path` is not to be cached - """ - if path.endswith("/tip"): - return False - if any(k in path for k in ["/json-annotate/", "/json-info/", "/json-log/", "/json-rev/", "/rev/", "/raw-rev/", "/raw-file/", "/json-pushes", "/pushloghtml", "/file/"]): - return True - - return False - - def request(self, method, path, headers): - now = Date.now() - self.inbound_rate.add(now) - ready = Signal(path) - - # TEST CACHE - with self.cache_locker: - pair = self.cache.get(path) - if pair is None: - self.cache[path] = (ready, None, None, now) - - - if pair is not None: - # REQUEST IS IN THE QUEUE ALREADY, WAIT - ready, headers, response, then = pair - if response is None: - ready.wait() - with self.cache_locker: - ready, headers, response, timestamp = self.cache.get(path) - with self.db.transaction() as t: - t.execute("UPDATE cache SET timestamp=" + quote_value(now) + " WHERE path=" + quote_value(path) + " AND timestamp<" + quote_value(now)) - return Response( - response, - status=200, - headers=json.loads(headers) - ) - - # TEST DB - db_response = self.db.query("SELECT headers, response FROM cache WHERE path=" + quote_value(path)).data - if db_response: - headers, response = db_response[0] - with self.db.transaction() as t: - t.execute("UPDATE cache SET timestamp=" + quote_value(now) + " WHERE path=" + quote_value(path) + " AND timestamp<" + quote_value(now)) - with self.cache_locker: - self.cache[path] = (ready, headers, response.encode('latin1'), now) - ready.go() - - return Response( - response, - status=200, - headers=json.loads(headers) - ) - - # MAKE A NETWORK REQUEST - self.todo.add((ready, method, path, headers, now)) - ready.wait() - with self.cache_locker: - ready, headers, response, timestamp = self.cache[path] - return Response( - response, - status=200, - headers=json.loads(headers) - ) - - def _worker(self, please_stop): - while not please_stop: - pair = self.requests.pop(till=please_stop) - if please_stop: - break - ready, method, path, req_headers, timestamp = pair - - try: - url = self.url / path - self.outbound_rate.add(Date.now()) - response = http.request(method, url, req_headers) - - del response.headers['transfer-encoding'] - resp_headers = value2json(response.headers) - resp_content = response.raw.read() - - please_cache = self.please_cache(path) - if please_cache: - with self.db.transaction() as t: - t.execute("INSERT INTO cache (path, headers, response, timestamp) VALUES" + quote_list((path, resp_headers, resp_content.decode('latin1'), timestamp))) - with self.cache_locker: - self.cache[path] = (ready, resp_headers, resp_content, timestamp) - except Exception as e: - Log.warning("problem with request to {{path}}", path=path, cause=e) - with self.cache_locker: - ready, headers, response = self.cache[path] - del self.cache[path] - finally: - ready.go() - - - diff --git a/vendor/mo_hg/hg_branches.py b/vendor/mo_hg/hg_branches.py deleted file mode 100644 index ecf331d..0000000 --- a/vendor/mo_hg/hg_branches.py +++ /dev/null @@ -1,231 +0,0 @@ -# encoding: utf-8 -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http://mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import unicode_literals - -import jx_elasticsearch -from bs4 import BeautifulSoup - -from mo_collections import UniqueIndex -from mo_dots import Data, set_default, FlatList -from mo_hg.hg_mozilla_org import DEFAULT_LOCALE -from mo_kwargs import override -from mo_logs import Log, Except -from mo_logs import startup, constants -from mo_math import MAX -from mo_times.dates import Date -from mo_times.durations import SECOND, DAY -from pyLibrary.env import elasticsearch, http - -EXTRA_WAIT_TIME = 20 * SECOND # WAIT TIME TO SEND TO AWS, IF WE wait_forever -OLD_BRANCH = DAY -BRANCH_WHITELIST = None - - -@override -def get_branches(hg, branches, kwargs=None): - # TRY ES - cluster = elasticsearch.Cluster(branches) - try: - es = cluster.get_index(kwargs=branches, read_only=False) - esq = jx_elasticsearch.new_instance(branches) - found_branches = esq.query({"from": "branches", "format": "list", "limit": 10000}).data - - # IF IT IS TOO OLD, THEN PULL FROM HG - oldest = Date(MAX(found_branches.etl.timestamp)) - if oldest == None or Date.now() - oldest > OLD_BRANCH: - found_branches = _get_branches_from_hg(hg) - es.extend({"id": b.name + " " + b.locale, "value": b} for b in found_branches) - es.flush() - - try: - return UniqueIndex(["name", "locale"], data=found_branches, fail_on_dup=False) - except Exception as e: - Log.error("Bad branch in ES index", cause=e) - except Exception as e: - e = Except.wrap(e) - if "Can not find index " in e: - set_default(branches, {"schema": branches_schema}) - es = cluster.get_or_create_index(branches) - es.add_alias() - return get_branches(kwargs) - Log.error("problem getting branches", cause=e) - - -@override -def _get_branches_from_hg(kwarg): - # GET MAIN PAGE - response = http.get(kwarg.url) - doc = BeautifulSoup(response.all_content, "html.parser") - - all_repos = doc("table")[1] - branches = UniqueIndex(["name", "locale"], fail_on_dup=False) - for i, r in enumerate(all_repos("tr")): - dir, name = [v.text.strip() for v in r("td")] - - b = _get_single_branch_from_hg(kwarg, name, dir.lstrip("/")) - branches.extend(b) - - # branches.add(set_default({"name": "release-mozilla-beta"}, branches["mozilla-beta", DEFAULT_LOCALE])) - for b in list(branches["mozilla-beta", ]): - branches.add(set_default({"name": "release-mozilla-beta"}, b)) # THIS IS THE l10n "name" - b.url = "https://hg.mozilla.org/releases/mozilla-beta" # THIS IS THE - - for b in list(branches["mozilla-release", ]): - branches.add(set_default({"name": "release-mozilla-release"}, b)) - - for b in list(branches["mozilla-aurora", ]): - if b.locale == "en-US": - continue - branches.add(set_default({"name": "comm-aurora"}, b)) - # b.url = "https://hg.mozilla.org/releases/mozilla-aurora" - - for b in list(branches): - if b.name.startswith("mozilla-esr"): - branches.add(set_default({"name": "release-" + b.name}, b)) # THIS IS THE l10n "name" - b.url = "https://hg.mozilla.org/releases/" + b.name - - #CHECKS - for b in branches: - if b.name != b.name.lower(): - Log.error("Expecting lowercase name") - if not b.locale: - Log.error("Not expected") - if not b.url.startswith("http"): - Log.error("Expecting a valid url") - if not b.etl.timestamp: - Log.error("Expecting a timestamp") - - return branches - - -def _get_single_branch_from_hg(settings, description, dir): - if dir == "users": - return [] - response = http.get(settings.url + "/" + dir) - doc = BeautifulSoup(response.all_content, "html.parser") - - output = [] - try: - all_branches = doc("table")[0] - except Exception: - return [] - - for i, b in enumerate(all_branches("tr")): - if i == 0: - continue # IGNORE HEADER - columns = b("td") - - try: - path = columns[0].a.get('href') - if path == "/": - continue - - name, desc, last_used = [c.text.strip() for c in columns][0:3] - - if last_used.startswith('at'): - last_used = last_used[2:] - - detail = Data( - name=name.lower(), - locale=DEFAULT_LOCALE, - parent_name=description, - url=settings.url + path, - description=desc, - last_used=Date(last_used), - etl={"timestamp": Date.now()} - ) - if detail.description == "unknown": - detail.description = None - - # SOME BRANCHES HAVE NAME COLLISIONS, IGNORE LEAST POPULAR - if path in [ - "/projects/dxr/", # moved to webtools - "/build/compare-locales/", # ?build team likes to clone? - "/build/puppet/", # ?build team likes to clone? - "/SeaMonkey/puppet/", # looses the popularity contest - "/releases/gaia-l10n/v1_2/en-US/", # use default branch - "/releases/gaia-l10n/v1_3/en-US/", # use default branch - "/releases/gaia-l10n/v1_4/en-US/", # use default branch - "/releases/gaia-l10n/v2_0/en-US/", # use default branch - "/releases/gaia-l10n/v2_1/en-US/", # use default branch - "/build/autoland/" - ]: - continue - - # MARKUP BRANCH IF LOCALE SPECIFIC - if path.startswith("/l10n-central"): - _path = path.strip("/").split("/") - detail.locale = _path[-1] - detail.name = "mozilla-central" - elif path.startswith("/releases/l10n/"): - _path = path.strip("/").split("/") - detail.locale = _path[-1] - detail.name = _path[-2].lower() - elif path.startswith("/releases/gaia-l10n/"): - _path = path.strip("/").split("/") - detail.locale = _path[-1] - detail.name = "gaia-" + _path[-2][1::] - elif path.startswith("/weave-l10n"): - _path = path.strip("/").split("/") - detail.locale = _path[-1] - detail.name = "weave" - - if BRANCH_WHITELIST is not None: - found = False - for br in BRANCH_WHITELIST: - if br in str(detail.name): - found = True - break - if not found: - continue - - Log.note("Branch {{name}} {{locale}}", name=detail.name, locale=detail.locale) - output.append(detail) - except Exception as e: - Log.warning("branch digestion problem", cause=e) - - return output - - -branches_schema = { - "settings": { - "index.number_of_replicas": 1, - "index.number_of_shards": 1 - }, - "mappings": { - "branch": { - "_all": { - "enabled": False - } - } - } -} - - -def main(): - - try: - settings = startup.read_settings() - constants.set(settings.constants) - Log.start(settings.debug) - - branches = _get_branches_from_hg(settings.hg) - - es = elasticsearch.Cluster(kwargs=settings.hg.branches).get_or_create_index(kwargs=settings.hg.branches) - es.add_alias() - es.extend({"id": b.name + " " + b.locale, "value": b} for b in branches) - Log.alert("DONE!") - except Exception as e: - Log.error("Problem with etl", e) - finally: - Log.stop() - - -if __name__ == "__main__": - main() diff --git a/vendor/mo_hg/hg_mozilla_org.py b/vendor/mo_hg/hg_mozilla_org.py deleted file mode 100644 index 8eba0cb..0000000 --- a/vendor/mo_hg/hg_mozilla_org.py +++ /dev/null @@ -1,733 +0,0 @@ -# encoding: utf-8 -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http://mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# - -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -import re -from collections import Mapping -from copy import copy - -import mo_threads -from mo_dots import set_default, Null, coalesce, unwraplist, listwrap, wrap, Data -from mo_future import text_type, binary_type -from mo_hg.parse import diff_to_json, diff_to_moves -from mo_hg.repos.changesets import Changeset -from mo_hg.repos.pushs import Push -from mo_hg.repos.revisions import Revision, revision_schema -from mo_json import json2value -from mo_kwargs import override -from mo_logs import Log, strings, machine_metadata -from mo_logs.exceptions import Explanation, assert_no_exception, Except, suppress_exception -from mo_logs.strings import expand_template -from mo_math.randoms import Random -from mo_threads import Thread, Lock, Queue, THREAD_STOP, Till -from mo_times.dates import Date -from mo_times.durations import SECOND, Duration, HOUR, MINUTE, DAY -from pyLibrary.env import http, elasticsearch -from pyLibrary.meta import cache - -_hg_branches = None -_OLD_BRANCH = None - - -def _count(values): - return len(list(values)) - - -def _late_imports(): - global _hg_branches - global _OLD_BRANCH - - from mo_hg import hg_branches as _hg_branches - from mo_hg.hg_branches import OLD_BRANCH as _OLD_BRANCH - - _ = _hg_branches - _ = _OLD_BRANCH - - -DEFAULT_LOCALE = "en-US" -DEBUG = False -DAEMON_DEBUG = False -DAEMON_HG_INTERVAL = 30 * SECOND # HOW LONG TO WAIT BETWEEN HG REQUESTS (MAX) -DAEMON_WAIT_AFTER_TIMEOUT = 10 * MINUTE # IF WE SEE A TIMEOUT, THEN WAIT -WAIT_AFTER_NODE_FAILURE = 10 * MINUTE # IF WE SEE A NODE FAILURE OR CLUSTER FAILURE, THEN WAIT -WAIT_AFTER_CACHE_MISS = 30 * SECOND # HOW LONG TO WAIT BETWEEN CACHE MISSES -DAEMON_DO_NO_SCAN = ["try"] # SOME BRANCHES ARE NOT WORTH SCANNING -DAEMON_QUEUE_SIZE = 2 ** 15 -DAEMON_RECENT_HG_PULL = 2 * SECOND # DETERMINE IF WE GOT DATA FROM HG (RECENT), OR ES (OLDER) -MAX_TODO_AGE = DAY # THE DAEMON WILL NEVER STOP SCANNING; DO NOT ADD OLD REVISIONS TO THE todo QUEUE -MIN_ETL_AGE = Date("03may2018").unix # ARTIFACTS OLDER THAN THIS IN ES ARE REPLACED -UNKNOWN_PUSH = "Unknown push {{revision}}" - -MAX_DIFF_SIZE = 1000 -DIFF_URL = "{{location}}/raw-rev/{{rev}}" -FILE_URL = "{{location}}/raw-file/{{rev}}{{path}}" - - -last_called_url = {} - - -class HgMozillaOrg(object): - """ - USE hg.mozilla.org FOR REPO INFORMATION - USE ES AS A FASTER CACHE FOR THE SAME - """ - - @override - def __init__( - self, - hg=None, # CONNECT TO hg - repo=None, # CONNECTION INFO FOR ES CACHE - branches=None, # CONNECTION INFO FOR ES CACHE - use_cache=False, # True IF WE WILL USE THE ES FOR DOWNLOADING BRANCHES - timeout=30 * SECOND, - kwargs=None - ): - if not _hg_branches: - _late_imports() - - self.es_locker = Lock() - self.todo = mo_threads.Queue("todo for hg daemon", max=DAEMON_QUEUE_SIZE) - - self.settings = kwargs - self.timeout = Duration(timeout) - - # VERIFY CONNECTIVITY - with Explanation("Test connect with hg"): - response = http.head(self.settings.hg.url) - - if branches == None: - self.branches = _hg_branches.get_branches(kwargs=kwargs) - self.es = None - return - - self.last_cache_miss = Date.now() - - set_default(repo, {"schema": revision_schema}) - self.es = elasticsearch.Cluster(kwargs=repo).get_or_create_index(kwargs=repo) - - def setup_es(please_stop): - with suppress_exception: - self.es.add_alias() - - with suppress_exception: - self.es.set_refresh_interval(seconds=1) - - Thread.run("setup_es", setup_es) - self.branches = _hg_branches.get_branches(kwargs=kwargs) - self.timeout = timeout - Thread.run("hg daemon", self._daemon) - - def _daemon(self, please_stop): - while not please_stop: - with Explanation("looking for work"): - try: - branch, revisions = self.todo.pop(till=please_stop) - except Exception as e: - if please_stop: - break - else: - raise e - if branch.name in DAEMON_DO_NO_SCAN: - continue - revisions = set(revisions) - - # FIND THE REVSIONS ON THIS BRANCH - for r in list(revisions): - try: - rev = self.get_revision(Revision(branch=branch, changeset={"id": r})) - if DAEMON_DEBUG: - Log.note("found revision with push date {{date|datetime}}", date=rev.push.date) - revisions.discard(r) - - if rev.etl.timestamp > Date.now()-DAEMON_RECENT_HG_PULL: - # SOME PUSHES ARE BIG, RUNNING THE RISK OTHER MACHINES ARE - # ALSO INTERESTED AND PERFORMING THE SAME SCAN. THIS DELAY - # WILL HAVE SMALL EFFECT ON THE MAJORITY OF SMALL PUSHES - # https://bugzilla.mozilla.org/show_bug.cgi?id=1417720 - Till(seconds=Random.float(DAEMON_HG_INTERVAL).seconds*2).wait() - - except Exception as e: - Log.warning( - "Scanning {{branch}} {{revision|left(12)}}", - branch=branch.name, - revision=r, - cause=e - ) - if "Read timed out" in e: - Till(seconds=DAEMON_WAIT_AFTER_TIMEOUT.seconds).wait() - - - # FIND ANY BRANCH THAT MAY HAVE THIS REVISION - for r in list(revisions): - self._find_revision(r) - - @cache(duration=HOUR, lock=True) - def get_revision(self, revision, locale=None, get_diff=False, get_moves=True): - """ - EXPECTING INCOMPLETE revision OBJECT - RETURNS revision - """ - rev = revision.changeset.id - if not rev: - return Null - elif rev == "None": - return Null - elif revision.branch.name == None: - return Null - locale = coalesce(locale, revision.branch.locale, DEFAULT_LOCALE) - output = self._get_from_elasticsearch(revision, locale=locale, get_diff=get_diff) - if output: - if not get_diff: # DIFF IS BIG, DO NOT KEEP IT IF NOT NEEDED - output.changeset.diff = None - if not get_moves: - output.changeset.moves = None - DEBUG and Log.note("Got hg ({{branch}}, {{locale}}, {{revision}}) from ES", branch=output.branch.name, locale=locale, revision=output.changeset.id) - if output.push.date >= Date.now()-MAX_TODO_AGE: - self.todo.add((output.branch, listwrap(output.parents))) - self.todo.add((output.branch, listwrap(output.children))) - if output.push.date: - return output - - # RATE LIMIT CALLS TO HG (CACHE MISSES) - next_cache_miss = self.last_cache_miss + (Random.float(WAIT_AFTER_CACHE_MISS.seconds * 2) * SECOND) - self.last_cache_miss = Date.now() - if next_cache_miss > self.last_cache_miss: - Log.note("delaying next hg call for {{seconds|round(decimal=1)}}", seconds=next_cache_miss - self.last_cache_miss) - Till(till=next_cache_miss.unix).wait() - - found_revision = copy(revision) - if isinstance(found_revision.branch, (text_type, binary_type)): - lower_name = found_revision.branch.lower() - else: - lower_name = found_revision.branch.name.lower() - - if not lower_name: - Log.error("Defective revision? {{rev|json}}", rev=found_revision.branch) - - b = found_revision.branch = self.branches[(lower_name, locale)] - if not b: - b = found_revision.branch = self.branches[(lower_name, DEFAULT_LOCALE)] - if not b: - Log.warning("can not find branch ({{branch}}, {{locale}})", branch=lower_name, locale=locale) - return Null - - if Date.now() - Date(b.etl.timestamp) > _OLD_BRANCH: - self.branches = _hg_branches.get_branches(kwargs=self.settings) - - push = self._get_push(found_revision.branch, found_revision.changeset.id) - - url1 = found_revision.branch.url.rstrip("/") + "/json-info?node=" + found_revision.changeset.id[0:12] - url2 = found_revision.branch.url.rstrip("/") + "/json-rev/" + found_revision.changeset.id[0:12] - with Explanation("get revision from {{url}}", url=url1, debug=DEBUG): - raw_rev2 = Null - try: - raw_rev1 = self._get_raw_json_info(url1, found_revision.branch) - raw_rev2 = self._get_raw_json_rev(url2, found_revision.branch) - except Exception as e: - if "Hg denies it exists" in e: - raw_rev1 = Data(node=revision.changeset.id) - else: - raise e - output = self._normalize_revision(set_default(raw_rev1, raw_rev2), found_revision, push, get_diff, get_moves) - if output.push.date >= Date.now()-MAX_TODO_AGE: - self.todo.add((output.branch, listwrap(output.parents))) - self.todo.add((output.branch, listwrap(output.children))) - - if not get_diff: # DIFF IS BIG, DO NOT KEEP IT IF NOT NEEDED - output.changeset.diff = None - if not get_moves: - output.changeset.moves = None - return output - - def _get_from_elasticsearch(self, revision, locale=None, get_diff=False, get_moves=True): - rev = revision.changeset.id - if self.es.cluster.version.startswith("1.7."): - query = { - "query": {"filtered": { - "query": {"match_all": {}}, - "filter": {"and": [ - {"term": {"changeset.id12": rev[0:12]}}, - {"term": {"branch.name": revision.branch.name}}, - {"term": {"branch.locale": coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)}}, - {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} - ]} - }}, - "size": 20 - } - else: - query = { - "query": {"bool": {"must": [ - {"term": {"changeset.id12": rev[0:12]}}, - {"term": {"branch.name": revision.branch.name}}, - {"term": {"branch.locale": coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)}}, - {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} - ]}}, - "size": 20 - } - - for attempt in range(3): - try: - with self.es_locker: - docs = self.es.search(query).hits.hits - if len(docs) == 0: - return None - best = docs[0]._source - if len(docs) > 1: - for d in docs: - if d._id.endswith(d._source.branch.locale): - best = d._source - Log.warning("expecting no more than one document") - return best - except Exception as e: - e = Except.wrap(e) - if "EsRejectedExecutionException[rejected execution (queue capacity" in e: - (Till(seconds=Random.int(30))).wait() - continue - else: - Log.warning("Bad ES call, waiting for {{num}} seconds", num=WAIT_AFTER_NODE_FAILURE.seconds, cause=e) - Till(seconds=WAIT_AFTER_NODE_FAILURE.seconds).wait() - continue - - Log.warning("ES did not deliver, fall back to HG") - return None - - - @cache(duration=HOUR, lock=True) - def _get_raw_json_info(self, url, branch): - raw_revs = self._get_and_retry(url, branch) - if "(not in 'served' subset)" in raw_revs: - Log.error("Tried {{url}}. Hg denies it exists.", url=url) - if isinstance(raw_revs, text_type) and raw_revs.startswith("unknown revision '"): - Log.error("Tried {{url}}. Hg denies it exists.", url=url) - if len(raw_revs) != 1: - Log.error("do not know what to do") - return raw_revs.values()[0] - - @cache(duration=HOUR, lock=True) - def _get_raw_json_rev(self, url, branch): - raw_rev = self._get_and_retry(url, branch) - return raw_rev - - @cache(duration=HOUR, lock=True) - def _get_push(self, branch, changeset_id): - if self.es.cluster.version.startswith("1.7."): - query = { - "query": {"filtered": { - "query": {"match_all": {}}, - "filter": {"and": [ - {"term": {"branch.name": branch.name}}, - {"prefix": {"changeset.id": changeset_id[0:12]}} - ]} - }}, - "size": 1 - } - else: - query = { - "query": {"bool": {"must": [ - {"term": {"branch.name": branch.name}}, - {"prefix": {"changeset.id": changeset_id[0:12]}} - ]}}, - "size": 1 - } - - try: - # ALWAYS TRY ES FIRST - with self.es_locker: - response = self.es.search(query) - json_push = response.hits.hits[0]._source.push - if json_push: - return json_push - except Exception: - pass - - url = branch.url.rstrip("/") + "/json-pushes?full=1&changeset=" + changeset_id - with Explanation("Pulling pushlog from {{url}}", url=url, debug=DEBUG): - Log.note( - "Reading pushlog from {{url}}", - url=url, - changeset=changeset_id - ) - data = self._get_and_retry(url, branch) - # QUEUE UP THE OTHER CHANGESETS IN THE PUSH - self.todo.add((branch, [c.node for cs in data.values().changesets for c in cs])) - pushes = [ - Push(id=int(index), date=_push.date, user=_push.user) - for index, _push in data.items() - ] - - if len(pushes) == 0: - return Null - elif len(pushes) == 1: - return pushes[0] - else: - Log.error("do not know what to do") - - def _normalize_revision(self, r, found_revision, push, get_diff, get_moves): - new_names = set(r.keys()) - KNOWN_TAGS - if new_names and not r.tags: - Log.warning( - "hg is returning new property names {{names|quote}} for {{changeset}} from {{url}}", - names=new_names, - changeset=r.node, - url=found_revision.branch.url - ) - - changeset = Changeset( - id=r.node, - id12=r.node[0:12], - author=r.user, - description=strings.limit(coalesce(r.description, r.desc), 2000), - date=parse_hg_date(r.date), - files=r.files, - backedoutby=r.backedoutby if r.backedoutby else None, - bug=self._extract_bug_id(r.description) - ) - rev = Revision( - branch=found_revision.branch, - index=r.rev, - changeset=changeset, - parents=unwraplist(list(set(r.parents))), - children=unwraplist(list(set(r.children))), - push=push, - phase=r.phase, - bookmarks=unwraplist(r.bookmarks), - landingsystem=r.landingsystem, - etl={"timestamp": Date.now().unix, "machine": machine_metadata} - ) - - r.pushuser = None - r.pushdate = None - r.pushid = None - r.node = None - r.user = None - r.desc = None - r.description = None - r.date = None - r.files = None - r.backedoutby = None - r.parents = None - r.children = None - r.bookmarks = None - r.landingsystem = None - - set_default(rev, r) - - # ADD THE DIFF - if get_diff: - rev.changeset.diff = self._get_json_diff_from_hg(rev) - if get_moves: - rev.changeset.moves = self._get_moves_from_hg(rev) - - try: - _id = coalesce(rev.changeset.id12, "") + "-" + rev.branch.name + "-" + coalesce(rev.branch.locale, DEFAULT_LOCALE) - with self.es_locker: - self.es.add({"id": _id, "value": rev}) - except Exception as e: - e = Except.wrap(e) - Log.warning("Did not save to ES, waiting {{duration}}", duration=WAIT_AFTER_NODE_FAILURE, cause=e) - Till(seconds=WAIT_AFTER_NODE_FAILURE.seconds).wait() - if "FORBIDDEN/12/index read-only" in e: - pass # KNOWN FAILURE MODE - - return rev - - def _get_and_retry(self, url, branch, **kwargs): - """ - requests 2.5.0 HTTPS IS A LITTLE UNSTABLE - """ - kwargs = set_default(kwargs, {"timeout": self.timeout.seconds}) - try: - output = _get_url(url, branch, **kwargs) - return output - except Exception as e: - if UNKNOWN_PUSH in e: - Log.error("Tried {{url}} and failed", {"url": url}, cause=e) - - try: - (Till(seconds=5)).wait() - return _get_url(url.replace("https://", "http://"), branch, **kwargs) - except Exception as f: - pass - - path = url.split("/") - if path[3] == "l10n-central": - # FROM https://hg.mozilla.org/l10n-central/tr/json-pushes?full=1&changeset=a6eeb28458fd - # TO https://hg.mozilla.org/mozilla-central/json-pushes?full=1&changeset=a6eeb28458fd - path = path[0:3] + ["mozilla-central"] + path[5:] - return self._get_and_retry("/".join(path), branch, **kwargs) - elif len(path) > 5 and path[5] == "mozilla-aurora": - # FROM https://hg.mozilla.org/releases/l10n/mozilla-aurora/pt-PT/json-pushes?full=1&changeset=b44a8c68fc60 - # TO https://hg.mozilla.org/releases/mozilla-aurora/json-pushes?full=1&changeset=b44a8c68fc60 - path = path[0:4] + ["mozilla-aurora"] + path[7:] - return self._get_and_retry("/".join(path), branch, **kwargs) - elif len(path) > 5 and path[5] == "mozilla-beta": - # FROM https://hg.mozilla.org/releases/l10n/mozilla-beta/lt/json-pushes?full=1&changeset=03fbf7556c94 - # TO https://hg.mozilla.org/releases/mozilla-beta/json-pushes?full=1&changeset=b44a8c68fc60 - path = path[0:4] + ["mozilla-beta"] + path[7:] - return self._get_and_retry("/".join(path), branch, **kwargs) - elif len(path) > 7 and path[5] == "mozilla-release": - # FROM https://hg.mozilla.org/releases/l10n/mozilla-release/en-GB/json-pushes?full=1&changeset=57f513ab03308adc7aa02cc2ea8d73fe56ae644b - # TO https://hg.mozilla.org/releases/mozilla-release/json-pushes?full=1&changeset=57f513ab03308adc7aa02cc2ea8d73fe56ae644b - path = path[0:4] + ["mozilla-release"] + path[7:] - return self._get_and_retry("/".join(path), branch, **kwargs) - elif len(path) > 5 and path[4] == "autoland": - # FROM https://hg.mozilla.org/build/autoland/json-pushes?full=1&changeset=3ccccf8e5036179a3178437cabc154b5e04b333d - # TO https://hg.mozilla.org/integration/autoland/json-pushes?full=1&changeset=3ccccf8e5036179a3178437cabc154b5e04b333d - path = path[0:3] + ["try"] + path[5:] - return self._get_and_retry("/".join(path), branch, **kwargs) - - Log.error("Tried {{url}} twice. Both failed.", {"url": url}, cause=[e, f]) - - @cache(duration=HOUR, lock=True) - def _find_revision(self, revision): - please_stop = False - locker = Lock() - output = [] - queue = Queue("branches", max=2000) - queue.extend(b for b in self.branches if b.locale == DEFAULT_LOCALE and b.name in ["try", "mozilla-inbound", "autoland"]) - queue.add(THREAD_STOP) - - problems = [] - def _find(please_stop): - for b in queue: - if please_stop: - return - try: - url = b.url + "json-info?node=" + revision - rev = self.get_revision(Revision(branch=b, changeset={"id": revision})) - with locker: - output.append(rev) - Log.note("Revision found at {{url}}", url=url) - except Exception as f: - problems.append(f) - - threads = [] - for i in range(3): - threads.append(Thread.run("find changeset " + text_type(i), _find, please_stop=please_stop)) - - for t in threads: - with assert_no_exception: - t.join() - - return output - - def _extract_bug_id(self, description): - """ - LOOK INTO description to FIND bug_id - """ - if description == None: - return None - match = re.findall(r'[Bb](?:ug)?\s*([0-9]{5,7})', description) - if match: - return int(match[0]) - return None - - def _get_json_diff_from_hg(self, revision): - """ - :param revision: INCOMPLETE REVISION OBJECT - :return: - """ - @cache(duration=MINUTE, lock=True) - def inner(changeset_id): - if self.es.cluster.version.startswith("1.7."): - query = { - "query": {"filtered": { - "query": {"match_all": {}}, - "filter": {"and": [ - {"prefix": {"changeset.id": changeset_id}}, - {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} - ]} - }}, - "size": 1 - } - else: - query = { - "query": {"bool": {"must": [ - {"prefix": {"changeset.id": changeset_id}}, - {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} - ]}}, - "size": 1 - } - - try: - # ALWAYS TRY ES FIRST - with self.es_locker: - response = self.es.search(query) - json_diff = response.hits.hits[0]._source.changeset.diff - if json_diff: - return json_diff - except Exception as e: - pass - - url = expand_template(DIFF_URL, {"location": revision.branch.url, "rev": changeset_id}) - DEBUG and Log.note("get unified diff from {{url}}", url=url) - try: - response = http.get(url) - diff = response.content.decode("utf8") - json_diff = diff_to_json(diff) - num_changes = _count(c for f in json_diff for c in f.changes) - if json_diff: - if revision.changeset.description.startswith("merge "): - return None # IGNORE THE MERGE CHANGESETS - elif num_changes < MAX_DIFF_SIZE: - return json_diff - else: - Log.warning("Revision at {{url}} has a diff with {{num}} changes, ignored", url=url, num=num_changes) - for file in json_diff: - file.changes = None - return json_diff - except Exception as e: - Log.warning("could not get unified diff from {{url}}", url=url, cause=e) - - return inner(revision.changeset.id) - - def _get_moves_from_hg(self, revision): - """ - :param revision: INCOMPLETE REVISION OBJECT - :return: - """ - @cache(duration=MINUTE, lock=True) - def inner(changeset_id): - if self.es.cluster.version.startswith("1.7."): - query = { - "query": {"filtered": { - "query": {"match_all": {}}, - "filter": {"and": [ - {"prefix": {"changeset.id": changeset_id}}, - {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} - ]} - }}, - "size": 1 - } - else: - query = { - "query": {"bool": {"must": [ - {"prefix": {"changeset.id": changeset_id}}, - {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} - ]}}, - "size": 1 - } - - try: - # ALWAYS TRY ES FIRST - with self.es_locker: - response = self.es.search(query) - moves = response.hits.hits[0]._source.changeset.moves - if moves: - return moves - except Exception as e: - pass - - url = expand_template(DIFF_URL, {"location": revision.branch.url, "rev": changeset_id}) - DEBUG and Log.note("get unified diff from {{url}}", url=url) - try: - moves = http.get(url).content.decode('latin1') # THE ENCODING DOES NOT MATTER BECAUSE WE ONLY USE THE '+', '-' PREFIXES IN THE DIFF - return diff_to_moves(text_type(moves)) - except Exception as e: - Log.warning("could not get unified diff from {{url}}", url=url, cause=e) - - return inner(revision.changeset.id) - - def _get_source_code_from_hg(self, revision, file_path): - response = http.get(expand_template(FILE_URL, {"location": revision.branch.url, "rev": revision.changeset.id, "path": file_path})) - return response.content.decode("utf8", "replace") - - -def _trim(url): - return url.split("/json-pushes?")[0].split("/json-info?")[0].split("/json-rev/")[0] - - -def _get_url(url, branch, **kwargs): - with Explanation("get push from {{url}}", url=url, debug=DEBUG): - response = http.get(url, **kwargs) - data = json2value(response.content.decode("utf8")) - if isinstance(data, (text_type, str)) and data.startswith("unknown revision"): - Log.error(UNKNOWN_PUSH, revision=strings.between(data, "'", "'")) - branch.url = _trim(url) # RECORD THIS SUCCESS IN THE BRANCH - return data - - -def parse_hg_date(date): - if isinstance(date, text_type): - return Date(date) - elif isinstance(date, list): - # FIRST IN TUPLE (timestamp, time_zone) TUPLE, WHERE timestamp IS GMT - return Date(date[0]) - else: - Log.error("Can not deal with date like {{date|json}}", date=date) - - -def minimize_repo(repo): - """ - RETURN A MINIMAL VERSION OF THIS CHANGESET - """ - if repo == None: - return Null - output = wrap(_copy_but(repo, _exclude_from_repo)) - output.changeset.description = strings.limit(output.changeset.description, 1000) - return output - - -_exclude_from_repo = Data() -for k in [ - "changeset.files", - "changeset.diff", - "changeset.moves", - "etl", - "branch.last_used", - "branch.description", - "branch.etl", - "branch.parent_name", - "children", - "parents", - "phase", - "bookmarks", - "tags" -]: - _exclude_from_repo[k] = True -_exclude_from_repo = _exclude_from_repo - - -def _copy_but(value, exclude): - output = {} - for k, v in value.items(): - e = exclude.get(k, {}) - if e!=True: - if isinstance(v, Mapping): - v2 = _copy_but(v, e) - if v2 != None: - output[k] = v2 - elif v != None: - output[k] = v - return output if output else None - - -KNOWN_TAGS = { - "rev", - "node", - "user", - "description", - "desc", - "date", - "files", - "backedoutby", - "parents", - "children", - "branch", - "tags", - "pushuser", - "pushdate", - "pushid", - "phase", - "bookmarks", - "landingsystem" -} diff --git a/vendor/mo_hg/parse.py b/vendor/mo_hg/parse.py deleted file mode 100644 index 1fd2c88..0000000 --- a/vendor/mo_hg/parse.py +++ /dev/null @@ -1,161 +0,0 @@ -# encoding: utf-8 -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http://mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -import re - -from jx_base import DataClass -from mo_dots import wrap -from mo_logs import Log, strings - -MAX_CONTENT_LENGTH = 500 # SOME "lines" FOR CODE ARE REALLY TOO LONG - -GET_DIFF = "{{location}}/rev/{{rev}}" -GET_FILE = "{{location}}/file/{{rev}}{{path}}" - -HUNK_HEADER = re.compile(r"^-(\d+),(\d+) \+(\d+),(\d+) @@.*") -FILE_SEP = re.compile(r"^--- ", re.MULTILINE) -HUNK_SEP = re.compile(r"^@@ ", re.MULTILINE) - -MOVE = { - ' ': lambda c: (c[0]+1, c[1]+1), - '\\': lambda c: c, # FOR "\ no newline at end of file - '+': lambda c: (c[0]+1, c[1]), - '-': lambda c: (c[0], c[1]+1) -} -no_change = MOVE[' '] - - -def diff_to_json(unified_diff): - """ - CONVERT UNIFIED DIFF TO EASY-TO-STORE JSON FORMAT - :param unified_diff: text - :return: JSON details - """ - output = [] - files = FILE_SEP.split(unified_diff)[1:] - for file_ in files: - changes = [] - old_file_header, new_file_header, file_diff = file_.split("\n", 2) - old_file_path = old_file_header[1:] # eg old_file_header == "a/testing/marionette/harness/marionette_harness/tests/unit/unit-tests.ini" - new_file_path = new_file_header[5:] # eg new_file_header == "+++ b/tests/resources/example_file.py" - - c = 0, 0 - hunks = HUNK_SEP.split(file_diff)[1:] - for hunk in hunks: - line_diffs = hunk.split("\n") - old_start, old_length, new_start, new_length = HUNK_HEADER.match(line_diffs[0]).groups() - next_c = max(0, int(new_start)-1), max(0, int(old_start)-1) - if next_c[0] - next_c[1] != c[0] - c[1]: - Log.error("expecting a skew of {{skew}}", skew=next_c[0] - next_c[1]) - if c[0] > next_c[0]: - Log.error("can not handle out-of-order diffs") - while c[0] != next_c[0]: - c = no_change(c) - - for line in line_diffs[1:]: - if not line: - continue - if ( - line.startswith("new file mode") or - line.startswith("deleted file mode") or - line.startswith("index ") or - line.startswith("diff --git") - ): - # HAPPENS AT THE TOP OF NEW FILES - # diff --git a/security/sandbox/linux/SandboxFilter.cpp b/security/sandbox/linux/SandboxFilter.cpp - # u'new file mode 100644' - # u'deleted file mode 100644' - # index a763e390731f5379ddf5fa77090550009a002d13..798826525491b3d762503a422b1481f140238d19 - # GIT binary patch - # literal 30804 - break - d = line[0] - if d == '+': - changes.append({"new": {"line": int(c[0]), "content": strings.limit(line[1:], MAX_CONTENT_LENGTH)}}) - elif d == '-': - changes.append({"old": {"line": int(c[1]), "content": strings.limit(line[1:], MAX_CONTENT_LENGTH)}}) - try: - c = MOVE[d](c) - except Exception as e: - Log.warning("bad line {{line|quote}}", line=line, cause=e) - - output.append({ - "new": {"name": new_file_path}, - "old": {"name": old_file_path}, - "changes": changes - }) - return wrap(output) - - -def diff_to_moves(unified_diff): - """ - TODO: WE SHOULD BE ABLE TO STREAM THE RAW DIFF SO WE HANDLE LARGE ONES - FOR EACH FILE, RETURN AN ARRAY OF (line, action) PAIRS - :param unified_diff: raw diff - :return: (file, line, action) triples - """ - output = [] - files = FILE_SEP.split(unified_diff)[1:] - for file_ in files: - changes = [] - old_file_header, new_file_header, file_diff = file_.split("\n", 2) - old_file_path = old_file_header[1:] # eg old_file_header == "a/testing/marionette/harness/marionette_harness/tests/unit/unit-tests.ini" - new_file_path = new_file_header[5:] # eg new_file_header == "+++ b/tests/resources/example_file.py" - - c = 0, 0 - hunks = HUNK_SEP.split(file_diff)[1:] - for hunk in hunks: - line_diffs = hunk.split("\n") - old_start, old_length, new_start, new_length = HUNK_HEADER.match(line_diffs[0]).groups() - next_c = max(0, int(new_start)-1), max(0, int(old_start)-1) - if next_c[0] - next_c[1] != c[0] - c[1]: - Log.error("expecting a skew of {{skew}}", skew=next_c[0] - next_c[1]) - if c[0] > next_c[0]: - Log.error("can not handle out-of-order diffs") - while c[0] != next_c[0]: - c = no_change(c) - - for line in line_diffs[1:]: - if not line: - continue - if ( - line.startswith("new file mode") or - line.startswith("deleted file mode") or - line.startswith("index ") or - line.startswith("diff --git") - ): - # HAPPENS AT THE TOP OF NEW FILES - # diff --git a/security/sandbox/linux/SandboxFilter.cpp b/security/sandbox/linux/SandboxFilter.cpp - # u'new file mode 100644' - # u'deleted file mode 100644' - # index a763e390731f5379ddf5fa77090550009a002d13..798826525491b3d762503a422b1481f140238d19 - # GIT binary patch - # literal 30804 - break - d = line[0] - if d != ' ': - changes.append(Action(line=int(c[0]), action=d)) - c = MOVE[d](c) - - output.append({ - "new": {"name": new_file_path}, - "old": {"name": old_file_path}, - "changes": changes - }) - return wrap(output) - - -Action = DataClass( - "Action", - ["line", "action"], - constraint=True # TODO: remove when constrain=None is the same as True -) diff --git a/vendor/mo_hg/rate_logger.py b/vendor/mo_hg/rate_logger.py deleted file mode 100644 index a5f7b52..0000000 --- a/vendor/mo_hg/rate_logger.py +++ /dev/null @@ -1,45 +0,0 @@ -# encoding: utf-8 -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http://mozilla.org/MPL/2.0/. -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from mo_logs import Log -from mo_threads import Till, Thread, Lock -from mo_times import Date, SECOND - -METRIC_DECAY_RATE = 0.9 # PER-SECOND DECAY RATE FOR REPORTING REQUEST RATE -METRIC_REPORT_PERIOD = 10 * SECOND - - -class RateLogger(object): - - def __init__(self, name): - self.name = name - self.lock = Lock("rate locker") - self.request_rate = 0.0 - self.last_request = Date.now() - - Thread.run("rate logger", self._daemon) - - def add(self, timestamp): - with self.lock: - decay = METRIC_DECAY_RATE ** (timestamp - self.last_request).seconds - self.request_rate = decay*self.request_rate + 1 - self.last_request = timestamp - - def _daemon(self, please_stop): - while not please_stop: - timestamp = Date.now() - with self.lock: - decay = METRIC_DECAY_RATE ** (timestamp - self.last_request).seconds - request_rate = self.request_rate = decay * self.request_rate - self.last_request = timestamp - - Log.note("{{name}} request rate: {{rate|round(places=2)}} requests per second", name=self.name, rate=request_rate) - (please_stop | Till(seconds=METRIC_REPORT_PERIOD.seconds)).wait() - diff --git a/vendor/mo_hg/relay_app.py b/vendor/mo_hg/relay_app.py deleted file mode 100644 index 20fa0e6..0000000 --- a/vendor/mo_hg/relay_app.py +++ /dev/null @@ -1,107 +0,0 @@ -# encoding: utf-8 -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http://mozilla.org/MPL/2.0/. -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -import os - -import flask -from flask import Flask, Response - -from mo_hg.cache import Cache -from mo_json import value2json -from mo_logs import Log, constants, startup, Except -from mo_logs.strings import unicode2utf8 -from pyLibrary.env.flask_wrappers import cors_wrapper - -APP_NAME = "HG Relay" - - -class RelayApp(Flask): - - def run(self, *args, **kwargs): - # ENSURE THE LOGGING IS CLEANED UP - try: - Flask.run(self, *args, **kwargs) - except BaseException as e: # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT - Log.warning(APP_NAME + " service shutdown!", cause=e) - finally: - Log.stop() - - -flask_app = None -config = None -cache = None - - -@cors_wrapper -def relay_get(path): - try: - return cache.request("get", path, flask.request.headers) - except Exception as e: - e = Except.wrap(e) - Log.warning("could not handle request", cause=e) - return Response( - unicode2utf8(value2json(e, pretty=True)), - status=400, - headers={ - "Content-Type": "text/html" - } - ) - - -@cors_wrapper -def relay_post(path): - try: - return cache.request("post", path, flask.request.headers) - except Exception as e: - e = Except.wrap(e) - Log.warning("could not handle request", cause=e) - return Response( - unicode2utf8(value2json(e, pretty=True)), - status=400, - headers={ - "Content-Type": "text/html" - } - ) - - -def add(any_flask_app): - global cache - - cache = Cache(config.cache) - any_flask_app.add_url_rule(str('/'), None, relay_get, methods=[str('GET')]) - any_flask_app.add_url_rule(str('/'), None, relay_post, methods=[str('POST')]) - any_flask_app.add_url_rule(str('/'), None, relay_get, methods=[str('GET')]) - any_flask_app.add_url_rule(str('/'), None, relay_post, methods=[str('POST')]) - - -if __name__ in ("__main__",): - Log.note("Starting " + APP_NAME + " Service App...") - flask_app = RelayApp(__name__) - - try: - config = startup.read_settings( - filename=os.environ.get('HG_RELAY_CONFIG') - ) - constants.set(config.constants) - Log.start(config.debug) - - add(flask_app) - Log.note("Started " + APP_NAME + " Service") - except BaseException as e: # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT - try: - Log.error("Serious problem with " + APP_NAME + " service construction! Shutdown!", cause=e) - finally: - Log.stop() - - if config.flask: - if config.flask.port and config.args.process_num: - config.flask.port += config.args.process_num - Log.note("Running Flask...") - flask_app.run(**config.flask) diff --git a/vendor/mo_hg/repos/__init__.py b/vendor/mo_hg/repos/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/vendor/mo_hg/repos/changesets.py b/vendor/mo_hg/repos/changesets.py deleted file mode 100644 index d1d5277..0000000 --- a/vendor/mo_hg/repos/changesets.py +++ /dev/null @@ -1,26 +0,0 @@ -# encoding: utf-8 -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http://mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# - -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from mo_dots import Data - - -class Changeset(Data): - - def __hash__(self): - return hash(self.id) - - def __eq__(self, other): - if other==None: - return False - return self.id == other.id - diff --git a/vendor/mo_hg/repos/pushs.py b/vendor/mo_hg/repos/pushs.py deleted file mode 100644 index c856949..0000000 --- a/vendor/mo_hg/repos/pushs.py +++ /dev/null @@ -1,18 +0,0 @@ -# encoding: utf-8 -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http://mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# - -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from mo_dots import Data - - -class Push(Data): - pass diff --git a/vendor/mo_hg/repos/revisions.py b/vendor/mo_hg/repos/revisions.py deleted file mode 100644 index 6bd4ca6..0000000 --- a/vendor/mo_hg/repos/revisions.py +++ /dev/null @@ -1,110 +0,0 @@ -# encoding: utf-8 -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http://mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# - -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from mo_dots import Data - - -class Revision(Data): - def __hash__(self): - return hash((self.branch.name.lower(), self.changeset.id[:12])) - - def __eq__(self, other): - if other == None: - return False - return (self.branch.name.lower(), self.changeset.id[:12]) == (other.branch.name.lower(), other.changeset.id[:12]) - - -revision_schema = { - - - "settings": { - "index.number_of_replicas": 1, - "index.number_of_shards": 6, - "analysis": { - "tokenizer": { - "left250": { - "type": "pattern", - "pattern": "^.{1,250}" - } - }, - "analyzer": { - "description_limit": { - "type": "custom", - "tokenizer": "left250", - "filter": [ - "lowercase", - "asciifolding" - ] - } - } - } - }, - "mappings": { - "revision": { - "_all": { - "enabled": False - }, - "properties": { - "changeset": { - "type": "object", - "properties": { - "description": { - "store": True, - "index": True, - "type": "text", - "fields": { - "raw": { - "type": "text", - "analyzer": "description_limit" - } - } - }, - "diff": { - "type": "nested", - "dynamic": True, - "properties": { - "changes": { - "type": "nested", - "dynamic": True, - "properties": { - "new": { - "type": "object", - "dynamic": True, - "properties": { - "content": { - "store": True, - "type": "keyword" - } - } - }, - "old": { - "type": "object", - "dynamic": True, - "properties": { - "content": { - "store": True, - "type": "keyword" - } - } - } - } - } - - } - } - } - } - } - } - } -} diff --git a/vendor/jx_elasticsearch/es09/__init__.py b/vendor/mo_http/__init__.py similarity index 100% rename from vendor/jx_elasticsearch/es09/__init__.py rename to vendor/mo_http/__init__.py diff --git a/vendor/pyLibrary/env/big_data.py b/vendor/mo_http/big_data.py similarity index 90% rename from vendor/pyLibrary/env/big_data.py rename to vendor/mo_http/big_data.py index 99b12a5..662674f 100644 --- a/vendor/pyLibrary/env/big_data.py +++ b/vendor/mo_http/big_data.py @@ -4,26 +4,22 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import +from __future__ import absolute_import, division, unicode_literals import gzip import struct -from io import BytesIO -from tempfile import TemporaryFile +import time import zipfile import zlib +from io import BytesIO +from tempfile import TemporaryFile -import time - -from mo_future import text_type, PY3, long - -from mo_logs.exceptions import suppress_exception +import mo_math +from mo_future import PY3, long, text, next from mo_logs import Log -from mo_math import Math +from mo_logs.exceptions import suppress_exception # LIBRARY TO DEAL WITH BIG DATA ARRAYS AS ITERATORS OVER (IR)REGULAR SIZED # BLOCKS, OR AS ITERATORS OVER LINES @@ -33,7 +29,7 @@ MIN_READ_SIZE = 8 * 1024 MAX_STRING_SIZE = 1 * 1024 * 1024 -class FileString(text_type): +class FileString(text): """ ACTS LIKE A STRING, BUT IS A FILE """ @@ -48,7 +44,7 @@ class FileString(text_type): return self def split(self, sep): - if sep != "\n": + if sep not in (b"\n", u"\n"): Log.error("Can only split by lines") self.file.seek(0) return LazyLines(self.file) @@ -61,7 +57,7 @@ class FileString(text_type): return file_length def __getslice__(self, i, j): - j = Math.min(j, len(self)) + j = mo_math.min(j, len(self)) if j - 1 > 2 ** 28: Log.error("Slice of {{num}} bytes is too big", num=j - i) try: @@ -203,7 +199,7 @@ class LazyLines(object): try: if item == self._next: self._next += 1 - return self._iter.next() + return next(self._iter) elif item == self._next - 1: return self._last else: @@ -249,7 +245,7 @@ class CompressedLines(LazyLines): def __getitem__(self, item): try: if item == self._next: - self._last = self._iter.next() + self._last = next(self._iter) self._next += 1 return self._last elif item == self._next - 1: @@ -279,7 +275,7 @@ def compressed_bytes2ibytes(compressed, size): decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS) - for i in range(0, Math.ceiling(len(compressed), size), size): + for i in range(0, mo_math.ceiling(len(compressed), size), size): try: block = compressed[i: i + size] yield decompressor.decompress(block) @@ -298,13 +294,13 @@ def ibytes2ilines(generator, encoding="utf8", flexible=False, closer=None): :return: """ decode = get_decoder(encoding=encoding, flexible=flexible) - _buffer = generator.next() + _buffer = next(generator) s = 0 e = _buffer.find(b"\n") while True: while e == -1: try: - next_block = generator.next() + next_block = next(generator) _buffer = _buffer[s:] + next_block s = 0 e = _buffer.find(b"\n") @@ -387,7 +383,7 @@ def icompressed2ibytes(source): except Exception as e: Log.error("problem", cause=e) bytes_count += len(data) - if Math.floor(last_bytes_count, 1000000) != Math.floor(bytes_count, 1000000): + if mo_math.floor(last_bytes_count, 1000000) != mo_math.floor(bytes_count, 1000000): last_bytes_count = bytes_count DEBUG and Log.note("bytes={{bytes}}", bytes=bytes_count) yield data @@ -462,3 +458,35 @@ def get_decoder(encoding, flexible=False): def do_decode2(v): return v.decode(encoding) return do_decode2 + + +def zip2bytes(compressed): + """ + UNZIP DATA + """ + if hasattr(compressed, "read"): + return gzip.GzipFile(fileobj=compressed, mode='r') + + buff = BytesIO(compressed) + archive = gzip.GzipFile(fileobj=buff, mode='r') + return safe_size(archive) + + +def bytes2zip(bytes): + """ + RETURN COMPRESSED BYTES + """ + if hasattr(bytes, "read"): + buff = TemporaryFile() + archive = gzip.GzipFile(fileobj=buff, mode='w') + for b in bytes: + archive.write(b) + archive.close() + buff.seek(0) + return FileString(buff) + + buff = BytesIO() + archive = gzip.GzipFile(fileobj=buff, mode='w') + archive.write(bytes) + archive.close() + return buff.getvalue() diff --git a/vendor/pyLibrary/env/http.py b/vendor/mo_http/http.py similarity index 69% rename from vendor/pyLibrary/env/http.py rename to vendor/mo_http/http.py index f7fec8d..b88e1d2 100644 --- a/vendor/pyLibrary/env/http.py +++ b/vendor/mo_http/http.py @@ -4,7 +4,7 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # # MIMICS THE requests API (http://docs.python-requests.org/en/latest/) @@ -12,35 +12,33 @@ # WITH ADDED default_headers THAT CAN BE SET USING mo_logs.settings # EG # {"debug.constants":{ -# "pyLibrary.env.http.default_headers":{"From":"klahnakoski@mozilla.com"} +# "mo_http.http.default_headers":{"From":"klahnakoski@mozilla.com"} # }} -from __future__ import absolute_import -from __future__ import division +from __future__ import absolute_import, division +import zipfile from contextlib import closing from copy import copy from mmap import mmap from numbers import Number from tempfile import TemporaryFile -from requests import sessions, Response - -from jx_python import jx -from mo_dots import Data, coalesce, wrap, set_default, unwrap, Null +import mo_math +from mo_dots import Data, Null, coalesce, is_list, set_default, unwrap, wrap, is_sequence from mo_files.url import URL -from mo_future import text_type, PY2 -from mo_json import value2json, json2value +from mo_future import PY2, is_text, text +from mo_future import StringIO +from mo_json import json2value, value2json +from mo_kwargs import override from mo_logs import Log from mo_logs.exceptions import Except -from mo_logs.strings import utf82unicode, unicode2utf8 -from mo_math import Math -from mo_threads import Lock -from mo_threads import Till -from mo_times.durations import Duration -from pyLibrary import convert -from pyLibrary.env.big_data import safe_size, ibytes2ilines, icompressed2ibytes +from mo_threads import Lock, Till +from mo_times import Timer, Duration +from requests import Response, sessions + +from mo_http.big_data import ibytes2ilines, icompressed2ibytes, safe_size, ibytes2icompressed, bytes2zip, zip2bytes DEBUG = False FILE_SIZE_LIMIT = 100 * 1024 * 1024 @@ -61,7 +59,8 @@ _warning_sent = False request_count = 0 -def request(method, url, headers=None, zip=None, retry=None, **kwargs): +@override +def request(method, url, headers=None, data=None, json=None, zip=None, retry=None, timeout=None, session=None, kwargs=None): """ JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES DEMANDS data IS ONE OF: @@ -69,34 +68,37 @@ def request(method, url, headers=None, zip=None, retry=None, **kwargs): * LIST OF JSON-SERIALIZABLE STRUCTURES, OR * None - Parameters - * zip - ZIP THE REQUEST BODY, IF BIG ENOUGH - * json - JSON-SERIALIZABLE STRUCTURE - * retry - {"times": x, "sleep": y} STRUCTURE - - THE BYTE_STRINGS (b"") ARE NECESSARY TO PREVENT httplib.py FROM **FREAKING OUT** - IT APPEARS requests AND httplib.py SIMPLY CONCATENATE STRINGS BLINDLY, WHICH - INCLUDES url AND headers + :param method: GET, POST, etc + :param url: URL + :param headers: dict OF HTTP REQUEST HEADERS + :param data: BYTES (OR GENERATOR OF BYTES) + :param json: JSON-SERIALIZABLE STRUCTURE + :param zip: ZIP THE REQUEST BODY, IF BIG ENOUGH + :param retry: {"times": x, "sleep": y} STRUCTURE + :param timeout: SECONDS TO WAIT FOR RESPONSE + :param session: Session OBJECT, IF YOU HAVE ONE + :param kwargs: ALL PARAMETERS (DO NOT USE) + :return: """ global _warning_sent global request_count if not _warning_sent and not default_headers: - Log.warning(text_type( - "The pyLibrary.env.http module was meant to add extra " + + Log.warning(text( + "The mo_http.http module was meant to add extra " + "default headers to all requests, specifically the 'Referer' " + - "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` " + - "function to set `pyLibrary.env.http.default_headers`" + "header with a URL to the project. Use the `mo_logs.constants.set()` " + + "function to set `mo_http.http.default_headers`" )) _warning_sent = True - if isinstance(url, list): + if is_list(url): # TRY MANY URLS failures = [] - for remaining, u in jx.countdown(url): + for remaining, u in countdown(url): try: - response = request(method, u, retry=retry, **kwargs) - if Math.round(response.status_code, decimal=-2) not in [400, 500]: + response = request(url=u, kwargs=kwargs) + if mo_math.round(response.status_code, decimal=-2) not in [400, 500]: return response if not remaining: return response @@ -105,48 +107,49 @@ def request(method, url, headers=None, zip=None, retry=None, **kwargs): failures.append(e) Log.error(u"Tried {{num}} urls", num=len(url), cause=failures) - if 'session' in kwargs: - session = kwargs['session'] - del kwargs['session'] - sess = Null + if session: + close_after_response = Null else: - sess = session = sessions.Session() + close_after_response = session = sessions.Session() - with closing(sess): - if PY2 and isinstance(url, text_type): + with closing(close_after_response): + if PY2 and is_text(url): # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE url = url.encode('ascii') try: - set_default(kwargs, {"zip":zip, "retry": retry}, DEFAULTS) - _to_ascii_dict(kwargs) + set_default(kwargs, DEFAULTS) # HEADERS - headers = kwargs['headers'] = unwrap(set_default(headers, session.headers, default_headers)) + headers = unwrap(set_default(headers, session.headers, default_headers)) _to_ascii_dict(headers) - del kwargs['headers'] # RETRY - retry = wrap(kwargs['retry']) - if isinstance(retry, Number): - retry = set_default({"times":retry}, DEFAULTS['retry']) - if isinstance(retry.sleep, Duration): + retry = wrap(retry) + if retry == None: + retry = set_default({}, DEFAULTS['retry']) + elif isinstance(retry, Number): + retry = set_default({"times": retry}, DEFAULTS['retry']) + elif isinstance(retry.sleep, Duration): retry.sleep = retry.sleep.seconds - del kwargs['retry'] # JSON - if 'json' in kwargs: - kwargs['data'] = value2json(kwargs['json']).encode('utf8') - del kwargs['json'] + if json != None: + data = value2json(json).encode('utf8') # ZIP + zip = coalesce(zip, DEFAULTS['zip']) set_default(headers, {'Accept-Encoding': 'compress, gzip'}) - if kwargs['zip'] and len(coalesce(kwargs.get('data'))) > 1000: - compressed = convert.bytes2zip(kwargs['data']) - headers['content-encoding'] = 'gzip' - kwargs['data'] = compressed - del kwargs['zip'] + if zip: + if is_sequence(data): + compressed = ibytes2icompressed(data) + headers['content-encoding'] = 'gzip' + data = compressed + elif len(coalesce(data)) > 1000: + compressed = bytes2zip(data) + headers['content-encoding'] = 'gzip' + data = compressed except Exception as e: Log.error(u"Request setup failure on {{url}}", url=url, cause=e) @@ -156,9 +159,13 @@ def request(method, url, headers=None, zip=None, retry=None, **kwargs): Till(seconds=retry.sleep).wait() try: - DEBUG and Log.note(u"http {{method|upper}} to {{url}}", method=method, url=text_type(url)) request_count += 1 - return session.request(method=method, headers=headers, url=str(url), **kwargs) + with Timer( + "http {{method|upper}} to {{url}}", + param={"method": method, "url": text(url)}, + verbose=DEBUG + ): + return _session_request(session, url=str(url), headers=headers, data=data, json=None, kwargs=kwargs) except Exception as e: e = Except.wrap(e) if retry['http'] and str(url).startswith("https://") and "EOF occurred in violation of protocol" in e: @@ -167,23 +174,25 @@ def request(method, url, headers=None, zip=None, retry=None, **kwargs): errors.append(e) if " Read timed out." in errors[0]: - Log.error(u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=kwargs['timeout'], times=retry.times, cause=errors[0]) + Log.error(u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=timeout, times=retry.times, cause=errors[0]) else: Log.error(u"Tried {{times}} times: Request failure of {{url}}", url=url, times=retry.times, cause=errors[0]) +_session_request = override(sessions.Session.request) + if PY2: def _to_ascii_dict(headers): if headers is None: return for k, v in copy(headers).items(): - if isinstance(k, text_type): + if is_text(k): del headers[k] - if isinstance(v, text_type): + if is_text(v): headers[k.encode('ascii')] = v.encode('ascii') else: headers[k.encode('ascii')] = v - elif isinstance(v, text_type): + elif is_text(v): headers[k] = v.encode('ascii') else: def _to_ascii_dict(headers): @@ -201,9 +210,17 @@ def get_json(url, **kwargs): response = get(url, **kwargs) try: c = response.all_content - return json2value(utf82unicode(c)) + path = URL(url).path + if path.endswith(".zip"): + buff = StringIO(c) + archive = zipfile.ZipFile(buff, mode='r') + c = archive.read(archive.namelist()[0]) + elif path.endswith(".gz"): + c = zip2bytes(c) + + return json2value(c.decode('utf8')) except Exception as e: - if Math.round(response.status_code, decimal=-2) in [400, 500]: + if mo_math.round(response.status_code, decimal=-2) in [400, 500]: Log.error(u"Bad GET response: {{code}}", code=response.status_code) else: Log.error(u"Good GET requests, but bad JSON", cause=e) @@ -226,14 +243,14 @@ def post_json(url, **kwargs): ASSUME RESPONSE IN IN JSON """ if 'json' in kwargs: - kwargs['data'] = unicode2utf8(value2json(kwargs['json'])) + kwargs['data'] = value2json(kwargs['json']).encode('utf8') del kwargs['json'] elif 'data' in kwargs: - kwargs['data'] = unicode2utf8(value2json(kwargs['data'])) + kwargs['data'] = value2json(kwargs['data']).encode('utf8') else: Log.error(u"Expecting `json` parameter") response = post(url, **kwargs) - details = json2value(utf82unicode(response.content)) + details = json2value(response.content.decode('utf8')) if response.status_code not in [200, 201, 202]: if "template" in details: @@ -386,3 +403,8 @@ class Generator_usingStream(object): def __del__(self): self.close() + +def countdown(vals): + remaining = len(vals) - 1 + return [(remaining - i, v) for i, v in enumerate(vals)] + diff --git a/vendor/mo_json/__init__.py b/vendor/mo_json/__init__.py index a0d4af5..6f1bed3 100644 --- a/vendor/mo_json/__init__.py +++ b/vendor/mo_json/__init__.py @@ -5,29 +5,45 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals +from datetime import date, datetime, timedelta +from decimal import Decimal import math import re -from collections import Mapping -from datetime import date, timedelta, datetime -from decimal import Decimal -from mo_dots import FlatList, NullType, Data, wrap_leaves, wrap, Null, SLOT +from mo_dots import Data, FlatList, Null, NullType, SLOT, is_data, wrap, wrap_leaves from mo_dots.objects import DataObject -from mo_future import text_type, none_type, long, binary_type, PY2 -from mo_logs import Except, strings, Log +from mo_future import PY2, integer_types, is_binary, is_text, items, long, none_type, text +from mo_logs import Except, Log, strings from mo_logs.strings import expand_template from mo_times import Date, Duration FIND_LOOPS = False -SNAP_TO_BASE_10 = True # Identify floats near a round base10 value (has 000 or 999) and shorten +SNAP_TO_BASE_10 = False # Identify floats near a round base10 value (has 000 or 999) and shorten CAN_NOT_DECODE_JSON = "Can not decode JSON" +IS_NULL = '0' +BOOLEAN = 'boolean' +INTEGER = 'integer' +NUMBER = 'number' +TIME = 'time' +INTERVAL = 'interval' +STRING = 'string' +OBJECT = 'object' +NESTED = "nested" +EXISTS = "exists" + +ALL_TYPES = {IS_NULL: IS_NULL, BOOLEAN: BOOLEAN, INTEGER: INTEGER, NUMBER: NUMBER, TIME:TIME, INTERVAL:INTERVAL, STRING: STRING, OBJECT: OBJECT, NESTED: NESTED, EXISTS: EXISTS} +JSON_TYPES = (BOOLEAN, INTEGER, NUMBER, STRING, OBJECT) +NUMBER_TYPES = (INTEGER, NUMBER) +PRIMITIVE = (EXISTS, BOOLEAN, INTEGER, NUMBER, TIME, INTERVAL, STRING) +STRUCT = (EXISTS, OBJECT, NESTED) + + +true, false, null = True, False, None _get = object.__getattribute__ @@ -67,20 +83,21 @@ def float2json(value): digits, more_digits = _snap_to_base_10(mantissa) int_exp = int(str_exp) + more_digits if int_exp > 15: - return sign + digits[0] + '.' + (digits[1:].rstrip('0') or '0') + u"e" + text_type(int_exp) + return sign + digits[0] + '.' + (digits[1:].rstrip('0') or '0') + u"e" + text(int_exp) elif int_exp >= 0: return sign + (digits[:1 + int_exp] + '.' + digits[1 + int_exp:].rstrip('0')).rstrip('.') elif -4 < int_exp: digits = ("0" * (-int_exp)) + digits return sign + (digits[:1] + '.' + digits[1:].rstrip('0')).rstrip('.') else: - return sign + digits[0] + '.' + (digits[1:].rstrip('0') or '0') + u"e" + text_type(int_exp) + return sign + digits[0] + '.' + (digits[1:].rstrip('0') or '0') + u"e" + text(int_exp) except Exception as e: from mo_logs import Log Log.error("not expected", e) def _snap_to_base_10(mantissa): + # TODO: https://lists.nongnu.org/archive/html/gcl-devel/2012-10/pdfkieTlklRzN.pdf digits = mantissa.replace('.', '') if SNAP_TO_BASE_10: f9 = strings.find(digits, '999') @@ -88,7 +105,7 @@ def _snap_to_base_10(mantissa): if f9 == 0: return '1000000000000000', 1 elif f9 < f0: - digits = text_type(int(digits[:f9]) + 1) + ('0' * (16 - f9)) + digits = text(int(digits[:f9]) + 1) + ('0' * (16 - f9)) else: digits = digits[:f0]+('0'*(16-f0)) return digits, 0 @@ -110,7 +127,7 @@ def _keep_whitespace(value): return None -def _trim_whitespace(value): +def trim_whitespace(value): value_ = value.strip() if value_: return value_ @@ -135,7 +152,7 @@ def _scrub(value, is_done, stack, scrub_text, scrub_number): if type_ in (none_type, NullType): return None - elif type_ is text_type: + elif type_ is text: return scrub_text(value) elif type_ is float: if math.isnan(value) or math.isinf(value): @@ -143,7 +160,7 @@ def _scrub(value, is_done, stack, scrub_text, scrub_number): return scrub_number(value) elif type_ is bool: return value - elif type_ in (int, long): + elif type_ in integer_types: return scrub_number(value) elif type_ in (date, datetime): return scrub_number(datetime2unix(value)) @@ -154,12 +171,12 @@ def _scrub(value, is_done, stack, scrub_text, scrub_number): elif type_ is Duration: return scrub_number(value.seconds) elif type_ is str: - return utf82unicode(value) + return value.decode('utf8') elif type_ is Decimal: return scrub_number(value) elif type_ is Data: return _scrub(_get(value, SLOT), is_done, stack, scrub_text, scrub_number) - elif isinstance(value, Mapping): + elif is_data(value): _id = id(value) if _id in is_done: Log.warning("possible loop in structure detected") @@ -168,16 +185,16 @@ def _scrub(value, is_done, stack, scrub_text, scrub_number): output = {} for k, v in value.items(): - if isinstance(k, text_type): + if is_text(k): pass - elif isinstance(k, binary_type): + elif is_binary(k): k = k.decode('utf8') # elif hasattr(k, "__unicode__"): - # k = text_type(k) + # k = text(k) else: Log.error("keys must be strings") v = _scrub(v, is_done, stack, scrub_text, scrub_number) - if v != None or isinstance(v, Mapping): + if v != None or is_data(v): output[k] = v is_done.discard(_id) @@ -187,7 +204,7 @@ def _scrub(value, is_done, stack, scrub_text, scrub_number): for v in value: v = _scrub(v, is_done, stack, scrub_text, scrub_number) output.append(v) - return output + return output # if output else None elif type_ is type: return value.__name__ elif type_.__name__ == "bool_": # DEAR ME! Numpy has it's own booleans (value==False could be used, but 0==False in Python. DOH!) @@ -211,7 +228,7 @@ def _scrub(value, is_done, stack, scrub_text, scrub_number): output.append(v) return output elif hasattr(value, '__call__'): - return text_type(repr(value)) + return text(repr(value)) else: return _scrub(DataObject(value), is_done, stack, scrub_text, scrub_number) @@ -225,7 +242,7 @@ def value2json(obj, pretty=False, sort_keys=False, keep_whitespace=True): :return: """ if FIND_LOOPS: - obj = scrub(obj, scrub_text=_keep_whitespace if keep_whitespace else _trim_whitespace()) + obj = scrub(obj, scrub_text=_keep_whitespace if keep_whitespace else trim_whitespace()) try: json = json_encoder(obj, pretty=pretty) if json == None: @@ -239,7 +256,7 @@ def value2json(obj, pretty=False, sort_keys=False, keep_whitespace=True): return json except Exception: pass - Log.error("Can not encode into JSON: {{value}}", value=text_type(repr(obj)), cause=e) + Log.error("Can not encode into JSON: {{value}}", value=text(repr(obj)), cause=e) def remove_line_comment(line): @@ -276,7 +293,7 @@ def json2value(json_string, params=Null, flexible=False, leaves=False): :param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED :return: Python value """ - if not isinstance(json_string, text_type): + if not is_text(json_string) and json_string.__class__.__name__ != "FileString": Log.error("only unicode json accepted") try: @@ -295,7 +312,7 @@ def json2value(json_string, params=Null, flexible=False, leaves=False): json_string = expand_template(json_string, params) try: - value = wrap(json_decoder(text_type(json_string))) + value = wrap(json_decoder(text(json_string))) except Exception as e: Log.error("can not decode\n{{content}}", content=json_string, cause=e) @@ -346,10 +363,6 @@ else: return separator.join('{:02X}'.format(x) for x in value) -def utf82unicode(value): - return value.decode('utf8') - - def datetime2unix(d): try: if d == None: @@ -367,5 +380,49 @@ def datetime2unix(d): Log.error("Can not convert {{value}}", value= d, cause=e) +python_type_to_json_type = { + int: INTEGER, + text: STRING, + float: NUMBER, + Decimal: NUMBER, + bool: BOOLEAN, + NullType: OBJECT, + none_type: OBJECT, + Data: OBJECT, + dict: OBJECT, + object: OBJECT, + list: NESTED, + set: NESTED, + # tuple: NESTED, # DO NOT INCLUDE, WILL HIDE LOGIC ERRORS + FlatList: NESTED, + Date: TIME, + datetime: TIME, + date: TIME, +} + +if PY2: + python_type_to_json_type[str] = STRING + python_type_to_json_type[long] = INTEGER + + +for k, v in items(python_type_to_json_type): + python_type_to_json_type[k.__name__] = v + +_merge_order = { + BOOLEAN: 1, + INTEGER: 2, + NUMBER: 3, + STRING: 4, + OBJECT: 5, + NESTED: 6 +} + + +def _merge_json_type(A, B): + a = _merge_order[A] + b = _merge_order[B] + return A if a >= b else B + + from mo_json.decoder import json_decoder from mo_json.encoder import json_encoder, pypy_json_encode diff --git a/vendor/mo_json/decoder.py b/vendor/mo_json/decoder.py index 0c535f6..82dc994 100644 --- a/vendor/mo_json/decoder.py +++ b/vendor/mo_json/decoder.py @@ -5,11 +5,9 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import json diff --git a/vendor/mo_json/encoder.py b/vendor/mo_json/encoder.py index 7a8d079..5d9e63f 100644 --- a/vendor/mo_json/encoder.py +++ b/vendor/mo_json/encoder.py @@ -5,26 +5,23 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import json import math import time -from collections import Mapping -from datetime import datetime, date, timedelta +from datetime import date, datetime, timedelta from decimal import Decimal from json.encoder import encode_basestring from math import floor -from mo_dots import Data, FlatList, NullType, Null, SLOT -from mo_future import text_type, binary_type, long, utf8_json_encoder, sort_using_key, xrange, PYPY -from mo_json import ESCAPE_DCT, scrub, float2json +from mo_dots import Data, FlatList, Null, NullType, SLOT, is_data, is_list, unwrap +from mo_future import PYPY, binary_type, is_binary, is_text, long, sort_using_key, text, utf8_json_encoder, xrange +from mo_json import ESCAPE_DCT, float2json, scrub from mo_logs import Except -from mo_logs.strings import utf82unicode, quote +from mo_logs.strings import quote from mo_times import Timer from mo_times.dates import Date from mo_times.durations import Duration @@ -111,14 +108,17 @@ class cPythonJSONEncoder(object): try: with Timer("scrub", too_long=0.1): scrubbed = scrub(value) - with Timer("encode", too_long=0.1): - return text_type(self.encoder(scrubbed)) + param = {"size": 0} + with Timer("encode {{size}} characters", param=param, too_long=0.1): + output = text(self.encoder(scrubbed)) + param["size"] = len(output) + return output except Exception as e: from mo_logs.exceptions import Except from mo_logs import Log e = Except.wrap(e) - Log.warning("problem serializing {{type}}", type=text_type(repr(value)), cause=e) + Log.warning("problem serializing {{type}}", type=text(repr(value)), cause=e) raise e @@ -134,7 +134,7 @@ def ujson_encode(value, pretty=False): from mo_logs import Log e = Except.wrap(e) - Log.warning("problem serializing {{type}}", type=text_type(repr(value)), cause=e) + Log.warning("problem serializing {{type}}", type=text(repr(value)), cause=e) raise e @@ -155,14 +155,14 @@ def _value2json(value, _buffer): if type is binary_type: append(_buffer, QUOTE) try: - v = utf82unicode(value) + v = value.decode('utf8') except Exception as e: problem_serializing(value, e) for c in v: append(_buffer, ESCAPE_DCT.get(c, c)) append(_buffer, QUOTE) - elif type is text_type: + elif type is text: append(_buffer, QUOTE) for c in value: append(_buffer, ESCAPE_DCT.get(c, c)) @@ -178,7 +178,7 @@ def _value2json(value, _buffer): _value2json(d, _buffer) return elif type in (int, long, Decimal): - append(_buffer, text_type(value)) + append(_buffer, text(value)) elif type is float: if math.isnan(value) or math.isinf(value): append(_buffer, u'null') @@ -198,7 +198,7 @@ def _value2json(value, _buffer): append(_buffer, float2json(value.seconds)) elif type is NullType: append(_buffer, u"null") - elif isinstance(value, Mapping): + elif is_data(value): if not value: append(_buffer, u"{}") else: @@ -215,11 +215,11 @@ def _value2json(value, _buffer): else: from mo_logs import Log - Log.error(text_type(repr(value)) + " is not JSON serializable") + Log.error(text(repr(value)) + " is not JSON serializable") except Exception as e: from mo_logs import Log - Log.error(text_type(repr(value)) + " is not JSON serializable", cause=e) + Log.error(text(repr(value)) + " is not JSON serializable", cause=e) def _list2json(value, _buffer): @@ -250,8 +250,8 @@ def _dict2json(value, _buffer): for k, v in value.items(): append(_buffer, prefix) prefix = COMMA_QUOTE - if isinstance(k, binary_type): - k = utf82unicode(k) + if is_binary(k): + k = k.decode('utf8') for c in k: append(_buffer, ESCAPE_DCT.get(c, c)) append(_buffer, QUOTE_COLON) @@ -260,7 +260,7 @@ def _dict2json(value, _buffer): except Exception as e: from mo_logs import Log - Log.error(text_type(repr(value)) + " is not JSON serializable", cause=e) + Log.error(text(repr(value)) + " is not JSON serializable", cause=e) ARRAY_ROW_LENGTH = 80 @@ -275,21 +275,24 @@ def pretty_json(value): return "false" elif value is True: return "true" - elif isinstance(value, Mapping): + elif value == None: + return "null" + elif is_data(value): try: + value = unwrap(value) items = sort_using_key(value.items(), lambda r: r[0]) - values = [encode_basestring(k) + PRETTY_COLON + indent(pretty_json(v)).strip() for k, v in items if v != None] + values = [encode_basestring(k) + PRETTY_COLON + pretty_json(v) for k, v in items if v != None] if not values: return "{}" elif len(values) == 1: return "{" + values[0] + "}" else: - return "{\n" + INDENT + (",\n" + INDENT).join(values) + "\n}" + return "{\n" + ",\n".join(indent(v) for v in values) + "\n}" except Exception as e: from mo_logs import Log from mo_math import OR - if OR(not isinstance(k, text_type) for k in value.keys()): + if OR(not is_text(k) for k in value.keys()): Log.error( "JSON must have string keys: {{keys}}:", keys=[k for k in value.keys()], @@ -301,13 +304,14 @@ def pretty_json(value): keys=[k for k in value.keys()], cause=e ) - elif value in (None, Null): - return "null" - elif isinstance(value, (text_type, binary_type)): - if isinstance(value, binary_type): - value = utf82unicode(value) + elif value.__class__ in (binary_type, text): + if is_binary(value): + value = value.decode('utf8') try: - return quote(value) + if "\n" in value and value.strip(): + return pretty_json({"$concat": value.split("\n"), "separator": "\n"}) + else: + return quote(value) except Exception as e: from mo_logs import Log @@ -320,7 +324,7 @@ def pretty_json(value): c2 = ESCAPE_DCT[c] except Exception: c2 = c - c3 = text_type(c2) + c3 = text(c2) acc.append(c3) except BaseException: pass @@ -330,9 +334,9 @@ def pretty_json(value): Log.note("return value of length {{length}}", length=len(output)) return output except BaseException as f: - Log.warning("can not even explicit convert {{type}}", type=f.__class__.__name__, cause=f) + Log.warning("can not convert {{type}} to json", type=f.__class__.__name__, cause=f) return "null" - elif isinstance(value, list): + elif is_list(value): if not value: return "[]" @@ -401,13 +405,13 @@ def pretty_json(value): else: try: if int(value) == value: - return text_type(int(value)) + return text(int(value)) except Exception: pass try: if float(value) == value: - return text_type(float(value)) + return text(float(value)) except Exception: pass @@ -429,7 +433,7 @@ def problem_serializing(value, e=None): typename = "" try: - rep = text_type(repr(value)) + rep = text(repr(value)) except Exception as _: rep = None @@ -490,10 +494,10 @@ def unicode_key(key): """ CONVERT PROPERTY VALUE TO QUOTED NAME OF SAME """ - if not isinstance(key, (text_type, binary_type)): + if not isinstance(key, (text, binary_type)): from mo_logs import Log Log.error("{{key|quote}} is not a valid key", key=key) - return quote(text_type(key)) + return quote(text(key)) # OH HUM, cPython with uJSON, OR pypy WITH BUILTIN JSON? diff --git a/vendor/mo_json/stream.py b/vendor/mo_json/stream.py index 348ebd3..5ff022b 100644 --- a/vendor/mo_json/stream.py +++ b/vendor/mo_json/stream.py @@ -5,32 +5,347 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import json -from collections import Mapping from types import GeneratorType -from mo_dots import split_field, startswith_field, relative_field, Data, join_field, Null, wrap +from mo_dots import ( + Data, + Null, + is_data, + join_field, + relative_field, + split_field, + startswith_field, + wrap, +) +from mo_future import NEXT from mo_logs import Log DEBUG = False -MIN_READ_SIZE = 8*1024 +MIN_READ_SIZE = 8 * 1024 WHITESPACE = b" \n\r\t" -CLOSE = { - b"{": b"}", - b"[": b"]" -} +CLOSE = {b"{": b"}", b"[": b"]"} NO_VARS = set() json_decoder = json.JSONDecoder().decode +class Parser(object): + def __init__(self, json, query_path, expected_vars=NO_VARS): + + if hasattr(json, "read"): + # ASSUME IT IS A STREAM + temp = json + + def get_more(): + return temp.read(MIN_READ_SIZE) + + self.json = List_usingStream(get_more) + elif hasattr(json, "__call__"): + self.json = List_usingStream(json) + elif isinstance(json, GeneratorType): + self.json = List_usingStream(NEXT(json)) + else: + Log.error( + "Expecting json to be a stream, or a function that will return more bytes" + ) + + if is_data(query_path) and query_path.get("items"): + self.path_list = split_field(query_path.get("items")) + [ + "$items" + ] # INSERT A MARKER SO THAT OBJECT IS STREAM DECODED + else: + self.path_list = split_field(query_path) + + self.expected_vars = expected_vars + self.destination = [None] * len(expected_vars) + self.done = [self.path_list + [None]] + + def parse(self, start=0): + """ + YIELD (object, end) PAIRS + :param start: OFFSET TO START PARSING + """ + c, index = self.skip_whitespace(start) + for end in self._decode_token(index, c, [], self.path_list, self.expected_vars): + output = Data() + for i, e in enumerate(self.expected_vars): + output[e] = self.destination[i] + yield output, end + + def _iterate_list(self, index, c, parent_path, path, expected_vars): + c, index = self.skip_whitespace(index) + if c == b"]": + yield index + return + + while True: + if not path: + index = self._assign_token(index, c, expected_vars) + c, index = self.skip_whitespace(index) + if c == b"]": + yield index + self._done(parent_path) + return + elif c == b",": + yield index + c, index = self.skip_whitespace(index) + else: + for index in self._decode_token( + index, c, parent_path, path, expected_vars + ): + c, index = self.skip_whitespace(index) + if c == b"]": + yield index + self._done(parent_path) + return + elif c == b",": + yield index + c, index = self.skip_whitespace(index) + + def _done(self, parent_path): + if len(parent_path) < len(self.done[0]): + self.done[0] = parent_path + + def _decode_object(self, index, c, parent_path, query_path, expected_vars): + if "." in expected_vars: + if len(self.done[0]) <= len(parent_path) and all( + d == p for d, p in zip(self.done[0], parent_path) + ): + Log.error("Can not pick up more variables, iterator is done") + + if query_path: + Log.error( + "Can not extract objects that contain the iteration", + var=join_field(query_path), + ) + + index = self._assign_token(index, c, expected_vars) + # c, index = self.skip_whitespace(index) + yield index + return + + did_yield = False + while True: + c, index = self.skip_whitespace(index) + if c == b",": + continue + elif c == b'"': + name, index = self.simple_token(index, c) + + c, index = self.skip_whitespace(index) + if c != b":": + Log.error("Expecting colon") + c, index = self.skip_whitespace(index) + + child_expected = needed(name, expected_vars) + child_path = parent_path + [name] + if any(child_expected): + if not query_path: + index = self._assign_token(index, c, child_expected) + elif query_path[0] == name: + for index in self._decode_token( + index, c, child_path, query_path[1:], child_expected + ): + did_yield = True + yield index + else: + if len(self.done[0]) <= len(child_path): + Log.error( + "Can not pick up more variables, iterator over {{path}} is done", + path=join_field(self.done[0]), + ) + index = self._assign_token(index, c, child_expected) + elif query_path and query_path[0] == name: + for index in self._decode_token( + index, c, child_path, query_path[1:], child_expected + ): + yield index + else: + index = self.jump_to_end(index, c) + elif c == b"}": + if not did_yield: + yield index + break + + def set_destination(self, expected_vars, value): + for i, e in enumerate(expected_vars): + if e is None: + pass + elif e == ".": + self.destination[i] = value + elif is_data(value): + self.destination[i] = value[e] + else: + self.destination[i] = Null + + def _decode_object_items(self, index, c, parent_path, query_path, expected_vars): + """ + ITERATE THROUGH THE PROPERTIES OF AN OBJECT + """ + c, index = self.skip_whitespace(index) + num_items = 0 + while True: + if c == b",": + c, index = self.skip_whitespace(index) + elif c == b'"': + name, index = self.simple_token(index, c) + if "name" in expected_vars: + for i, e in enumerate(expected_vars): + if e == "name": + self.destination[i] = name + + c, index = self.skip_whitespace(index) + if c != b":": + Log.error("Expecting colon") + c, index = self.skip_whitespace(index) + + child_expected = needed("value", expected_vars) + index = self._assign_token(index, c, child_expected) + c, index = self.skip_whitespace(index) + DEBUG and not num_items % 1000 and Log.note( + "{{num}} items iterated", num=num_items + ) + yield index + num_items += 1 + elif c == b"}": + break + + def _decode_token(self, index, c, parent_path, query_path, expected_vars): + if c == b"{": + if query_path and query_path[0] == "$items": + if any(expected_vars): + for index in self._decode_object_items( + index, c, parent_path, query_path[1:], expected_vars + ): + yield index + else: + index = self.jump_to_end(index, c) + yield index + elif not any(expected_vars): + index = self.jump_to_end(index, c) + yield index + else: + for index in self._decode_object( + index, c, parent_path, query_path, expected_vars + ): + yield index + elif c == b"[": + for index in self._iterate_list( + index, c, parent_path, query_path, expected_vars + ): + yield index + else: + index = self._assign_token(index, c, expected_vars) + yield index + + def _assign_token(self, index, c, expected_vars): + if not any(expected_vars): + return self.jump_to_end(index, c) + + value, index = self.simple_token(index, c) + self.set_destination(expected_vars, value) + + return index + + def jump_to_end(self, index, c): + """ + DO NOT PROCESS THIS JSON OBJECT, JUST RETURN WHERE IT ENDS + """ + if c == b'"': + while True: + c = self.json[index] + index += 1 + if c == b"\\": + index += 1 + elif c == b'"': + break + return index + elif c not in b"[{": + while True: + c = self.json[index] + index += 1 + if c in b",]}": + break + return index - 1 + + # OBJECTS AND ARRAYS ARE MORE INVOLVED + stack = [b" "] * 1024 + stack[0] = CLOSE[c] + i = 0 # FOR INDEXING THE STACK + while True: + c = self.json[index] + index += 1 + + if c == b'"': + while True: + c = self.json[index] + index += 1 + if c == b"\\": + index += 1 + elif c == b'"': + break + elif c in b"[{": + i += 1 + stack[i] = CLOSE[c] + elif c == stack[i]: + i -= 1 + if i == -1: + return index # FOUND THE MATCH! RETURN + elif c in b"]}": + Log.error("expecting {{symbol}}", symbol=stack[i]) + + def simple_token(self, index, c): + if c == b'"': + self.json.mark(index - 1) + while True: + c = self.json[index] + index += 1 + if c == b"\\": + index += 1 + elif c == b'"': + break + return json_decoder(self.json.release(index).decode("utf8")), index + elif c in b"{[": + self.json.mark(index - 1) + index = self.jump_to_end(index, c) + temp = self.json.release(index).decode("utf8") + value = wrap(json_decoder(temp)) + return value, index + elif c == b"t" and self.json.slice(index, index + 3) == b"rue": + return True, index + 3 + elif c == b"n" and self.json.slice(index, index + 3) == b"ull": + return None, index + 3 + elif c == b"f" and self.json.slice(index, index + 4) == b"alse": + return False, index + 4 + else: + self.json.mark(index - 1) + while True: + c = self.json[index] + if c in b",]}": + break + index += 1 + text = self.json.release(index) + try: + return float(text), index + except Exception: + Log.error("Not a known JSON primitive: {{text|quote}}", text=text) + + def skip_whitespace(self, index): + """ + RETURN NEXT NON-WHITESPACE CHAR, AND ITS INDEX + """ + c = self.json[index] + while c in WHITESPACE: + index += 1 + c = self.json[index] + return c, index + 1 + def parse(json, query_path, expected_vars=NO_VARS): """ @@ -53,279 +368,26 @@ def parse(json, query_path, expected_vars=NO_VARS): MORE-THAN-ONE PASS IS REQUIRED :return: RETURNS AN ITERATOR OVER ALL OBJECTS FROM ARRAY LOCATED AT query_path """ - if hasattr(json, "read"): - # ASSUME IT IS A STREAM - temp = json - def get_more(): - return temp.read(MIN_READ_SIZE) - json = List_usingStream(get_more) - elif hasattr(json, "__call__"): - json = List_usingStream(json) - elif isinstance(json, GeneratorType): - json = List_usingStream(json.next) - else: - Log.error("Expecting json to be a stream, or a function that will return more bytes") + for v, i in Parser(json, query_path, expected_vars).parse(): + yield v - def _iterate_list(index, c, parent_path, path, expected_vars): - c, index = skip_whitespace(index) - if c == b']': - yield index +def parse_concatenated(json, query_path, expected_vars=NO_VARS): + """ + SAME AS PARSE, BUT WILL CONTINUE PARSING SUBEQUENT JSON OBJECTS TOO + THIS CAN BE LINE-DELIMITED JSON, OR SIMPLY CONCATENATED JSON: + """ + + parser = Parser(json, query_path, expected_vars) + end = 0 + while True: + try: + for v, i in parser.parse(start=end): + end = i + yield v + except EOFError: return - while True: - if not path: - index = _assign_token(index, c, expected_vars) - c, index = skip_whitespace(index) - if c == b']': - yield index - _done(parent_path) - return - elif c == b',': - yield index - c, index = skip_whitespace(index) - else: - for index in _decode_token(index, c, parent_path, path, expected_vars): - c, index = skip_whitespace(index) - if c == b']': - yield index - _done(parent_path) - return - elif c == b',': - yield index - c, index = skip_whitespace(index) - - def _done(parent_path): - if len(parent_path) < len(done[0]): - done[0] = parent_path - - def _decode_object(index, c, parent_path, query_path, expected_vars): - if "." in expected_vars: - if len(done[0]) <= len(parent_path) and all(d == p for d, p in zip(done[0], parent_path)): - Log.error("Can not pick up more variables, iterator is done") - - if query_path: - Log.error("Can not extract objects that contain the iteration", var=join_field(query_path)) - - index = _assign_token(index, c, expected_vars) - # c, index = skip_whitespace(index) - yield index - return - - did_yield = False - while True: - c, index = skip_whitespace(index) - if c == b',': - continue - elif c == b'"': - name, index = simple_token(index, c) - - c, index = skip_whitespace(index) - if c != b':': - Log.error("Expecting colon") - c, index = skip_whitespace(index) - - child_expected = needed(name, expected_vars) - child_path = parent_path + [name] - if any(child_expected): - if not query_path: - index = _assign_token(index, c, child_expected) - elif query_path[0] == name: - for index in _decode_token(index, c, child_path, query_path[1:], child_expected): - did_yield = True - yield index - else: - if len(done[0]) <= len(child_path): - Log.error("Can not pick up more variables, iterator over {{path}} is done", path=join_field(done[0])) - index = _assign_token(index, c, child_expected) - elif query_path and query_path[0] == name: - for index in _decode_token(index, c, child_path, query_path[1:], child_expected): - yield index - else: - index = jump_to_end(index, c) - elif c == b"}": - if not did_yield: - yield index - break - - def set_destination(expected_vars, value): - for i, e in enumerate(expected_vars): - if e is None: - pass - elif e == ".": - destination[i] = value - elif isinstance(value, Mapping): - destination[i] = value[e] - else: - destination[i] = Null - - def _decode_object_items(index, c, parent_path, query_path, expected_vars): - """ - ITERATE THROUGH THE PROPERTIES OF AN OBJECT - """ - c, index = skip_whitespace(index) - num_items = 0 - while True: - if c == b',': - c, index = skip_whitespace(index) - elif c == b'"': - name, index = simple_token(index, c) - if "name" in expected_vars: - for i, e in enumerate(expected_vars): - if e == "name": - destination[i] = name - - c, index = skip_whitespace(index) - if c != b':': - Log.error("Expecting colon") - c, index = skip_whitespace(index) - - child_expected = needed("value", expected_vars) - index = _assign_token(index, c, child_expected) - c, index = skip_whitespace(index) - DEBUG and not num_items % 1000 and Log.note("{{num}} items iterated", num=num_items) - yield index - num_items += 1 - elif c == b"}": - break - - def _decode_token(index, c, parent_path, query_path, expected_vars): - if c == b'{': - if query_path and query_path[0] == "$items": - if any(expected_vars): - for index in _decode_object_items(index, c, parent_path, query_path[1:], expected_vars): - yield index - else: - index = jump_to_end(index, c) - yield index - elif not any(expected_vars): - index = jump_to_end(index, c) - yield index - else: - for index in _decode_object(index, c, parent_path, query_path, expected_vars): - yield index - elif c == b'[': - for index in _iterate_list(index, c, parent_path, query_path, expected_vars): - yield index - else: - index = _assign_token(index, c, expected_vars) - yield index - - def _assign_token(index, c, expected_vars): - if not any(expected_vars): - return jump_to_end(index, c) - - value, index = simple_token(index, c) - set_destination(expected_vars, value) - - return index - - def jump_to_end(index, c): - """ - DO NOT PROCESS THIS JSON OBJECT, JUST RETURN WHERE IT ENDS - """ - if c == b'"': - while True: - c = json[index] - index += 1 - if c == b'\\': - index += 1 - elif c == b'"': - break - return index - elif c not in b"[{": - while True: - c = json[index] - index += 1 - if c in b',]}': - break - return index - 1 - - # OBJECTS AND ARRAYS ARE MORE INVOLVED - stack = [None] * 1024 - stack[0] = CLOSE[c] - i = 0 # FOR INDEXING THE STACK - while True: - c = json[index] - index += 1 - - if c == b'"': - while True: - c = json[index] - index += 1 - if c == b'\\': - index += 1 - elif c == b'"': - break - elif c in b'[{': - i += 1 - stack[i] = CLOSE[c] - elif c == stack[i]: - i -= 1 - if i == -1: - return index # FOUND THE MATCH! RETURN - elif c in b']}': - Log.error("expecting {{symbol}}", symbol=stack[i]) - - def simple_token(index, c): - if c == b'"': - json.mark(index - 1) - while True: - c = json[index] - index += 1 - if c == b"\\": - index += 1 - elif c == b'"': - break - return json_decoder(json.release(index).decode("utf8")), index - elif c in b"{[": - json.mark(index-1) - index = jump_to_end(index, c) - value = wrap(json_decoder(json.release(index).decode("utf8"))) - return value, index - elif c == b"t" and json.slice(index, index + 3) == b"rue": - return True, index + 3 - elif c == b"n" and json.slice(index, index + 3) == b"ull": - return None, index + 3 - elif c == b"f" and json.slice(index, index + 4) == b"alse": - return False, index + 4 - else: - json.mark(index-1) - while True: - c = json[index] - if c in b',]}': - break - index += 1 - text = json.release(index) - try: - return float(text), index - except Exception: - Log.error("Not a known JSON primitive: {{text|quote}}", text=text) - - def skip_whitespace(index): - """ - RETURN NEXT NON-WHITESPACE CHAR, AND ITS INDEX - """ - c = json[index] - while c in WHITESPACE: - index += 1 - c = json[index] - return c, index + 1 - - if isinstance(query_path, Mapping) and query_path.get("items"): - path_list = split_field(query_path.get("items")) + ["$items"] # INSERT A MARKER SO THAT OBJECT IS STREAM DECODED - else: - path_list = split_field(query_path) - - destination = [None] * len(expected_vars) - c, index = skip_whitespace(0) - done = [path_list + [None]] - for _ in _decode_token(index, c, [], path_list, expected_vars): - output = Data() - for i, e in enumerate(expected_vars): - output[e] = destination[i] - yield output - def needed(name, required): """ @@ -336,10 +398,12 @@ def needed(name, required): for r in required ] + class List_usingStream(object): """ EXPECTING A FUNCTION """ + def __init__(self, get_more_bytes): """ get_more_bytes() SHOULD RETURN AN ARRAY OF BYTES OF ANY SIZE @@ -357,21 +421,27 @@ class List_usingStream(object): def __getitem__(self, index): offset = index - self.start if offset < len(self.buffer): - return self.buffer[offset:offset + 1] + return self.buffer[offset : offset + 1] if offset < 0: - Log.error("Can not go in reverse on stream index=={{index}} (offset={{offset}})", index=index, offset=offset) + Log.error( + "Can not go in reverse on stream index=={{index}} (offset={{offset}})", + index=index, + offset=offset, + ) if self._mark == -1: self.start += self.buffer_length offset = index - self.start self.buffer = self.get_more() self.buffer_length = len(self.buffer) + if self.buffer_length == 0: + raise EOFError() while self.buffer_length <= offset: more = self.get_more() self.buffer += more self.buffer_length = len(self.buffer) - return self.buffer[offset:offset+1] + return self.buffer[offset : offset + 1] needless_bytes = self._mark - self.start if needless_bytes: @@ -386,7 +456,7 @@ class List_usingStream(object): self.buffer_length = len(self.buffer) try: - return self.buffer[offset:offset+1] + return self.buffer[offset : offset + 1] except Exception as e: Log.error("error", cause=e) @@ -413,6 +483,6 @@ class List_usingStream(object): self.buffer += self.get_more() self.buffer_length = len(self.buffer) - output = self.buffer[self._mark - self.start:end_offset] + output = self.buffer[self._mark - self.start : end_offset] self._mark = -1 return output diff --git a/vendor/mo_json/typed_encoder.py b/vendor/mo_json/typed_encoder.py index 30d10b8..89154f1 100644 --- a/vendor/mo_json/typed_encoder.py +++ b/vendor/mo_json/typed_encoder.py @@ -5,24 +5,24 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import time -from collections import Mapping from datetime import date, datetime, timedelta from decimal import Decimal from json.encoder import encode_basestring -from mo_dots import Data, FlatList, NullType, join_field, split_field, _get, SLOT, DataObject -from mo_future import text_type, binary_type, sort_using_key, long, PY2, none_type, generator_types -from mo_json import ESCAPE_DCT, float2json -from mo_json.encoder import UnicodeBuilder, COLON, COMMA, problem_serializing, json_encoder +from mo_dots import CLASS, Data, DataObject, FlatList, NullType, SLOT, _get, is_data, join_field, split_field, \ + concat_field +from mo_dots.objects import OBJ +from mo_future import binary_type, generator_types, integer_types, is_binary, is_text, sort_using_key, text +from mo_json import BOOLEAN, ESCAPE_DCT, EXISTS, INTEGER, NESTED, NUMBER, STRING, float2json, python_type_to_json_type, \ + NUMBER_TYPES +from mo_json.encoder import COLON, COMMA, UnicodeBuilder, json_encoder, problem_serializing from mo_logs import Log -from mo_logs.strings import quote, utf82unicode +from mo_logs.strings import quote from mo_times import Date, Duration @@ -45,11 +45,31 @@ def untype_path(encoded): def unnest_path(encoded): if encoded.startswith(".."): - encoded = encoded.lstrip(".") - if not encoded: - encoded = "." + remainder = encoded.lstrip(".") + back = len(encoded) - len(remainder) + return ("." * back) + unnest_path(remainder) - return join_field(decode_property(c) for c in split_field(encoded) if c != NESTED_TYPE) + path = split_field(encoded) + if not path: + return "." + if path[-1] == NESTED_TYPE: + path = path[:-1] + if not path: + return "." + + return join_field([decode_property(c) for c in path[:-1] if not c.startswith(TYPE_PREFIX)] + [decode_property(path[-1])]) + + +def get_nested_path(typed_path): + # CONSTRUCT THE nested_path FROM THE typed_path + path = split_field(typed_path) + parent = "." + nested_path = (parent,) + for i, p in enumerate(path[:-1]): + if p == NESTED_TYPE: + step = concat_field(parent, join_field(path[0:i + 1])) + nested_path = (step,) + nested_path + return nested_path def untyped(value): @@ -57,7 +77,7 @@ def untyped(value): def _untype_list(value): - if any(isinstance(v, Mapping) for v in value): + if any(is_data(v) for v in value): # MAY BE MORE TYPED OBJECTS IN THIS LIST output = [_untype_value(v) for v in value] else: @@ -91,7 +111,7 @@ def _untype_dict(value): def _untype_value(value): - _type = _get(value, "__class__") + _type = _get(value, CLASS) if _type is Data: return _untype_dict(_get(value, SLOT)) elif _type is dict: @@ -103,7 +123,7 @@ def _untype_value(value): elif _type is NullType: return None elif _type is DataObject: - return _untype_value(_get(value, "_obj")) + return _untype_value(_get(value, OBJ)) elif _type in generator_types: return _untype_list(value) else: @@ -139,16 +159,18 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer): if value_json_type == column_json_type: pass # ok + elif value_json_type in NUMBER_TYPES and column_json_type in NUMBER_TYPES: + pass # ok elif value_json_type == NESTED and all(python_type_to_json_type[v.__class__] == column_json_type for v in value if v != None): pass # empty arrays can be anything else: from mo_logs import Log - Log.error("Can not store {{value}} in {{column|quote}}", value=value, column=sub_schema.names['.']) + Log.error("Can not store {{value}} in {{column|quote}}", value=value, column=sub_schema.name) sub_schema = {json_type_to_inserter_type[value_json_type]: sub_schema} - if value == None: + if value == None and path: from mo_logs import Log Log.error("can not encode null (missing) values") elif value is True: @@ -183,13 +205,18 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer): _dict2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE], net_new_properties, buffer) append(buffer, ']' + COMMA) append(buffer, QUOTED_EXISTS_TYPE) - append(buffer, text_type(len(value))) + append(buffer, text(len(value))) append(buffer, '}') else: - # SINGLETON LISTS OF null SHOULD NOT EXIST - from mo_logs import Log - - Log.error("should not happen") + # SINGLETON LIST + append(buffer, '{') + append(buffer, QUOTED_NESTED_TYPE) + append(buffer, '[{') + append(buffer, QUOTED_EXISTS_TYPE) + append(buffer, '1}]') + append(buffer, COMMA) + append(buffer, QUOTED_EXISTS_TYPE) + append(buffer, '1}') else: if EXISTS_TYPE not in sub_schema: sub_schema[EXISTS_TYPE] = {} @@ -200,7 +227,7 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer): else: append(buffer, '{') append(buffer, QUOTED_EXISTS_TYPE) - append(buffer, '0}') + append(buffer, '1}') elif _type is binary_type: if STRING_TYPE not in sub_schema: sub_schema[STRING_TYPE] = True @@ -209,14 +236,14 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer): append(buffer, QUOTED_STRING_TYPE) append(buffer, '"') try: - v = utf82unicode(value) + v = value.decode('utf8') except Exception as e: raise problem_serializing(value, e) for c in v: append(buffer, ESCAPE_DCT.get(c, c)) append(buffer, '"}') - elif _type is text_type: + elif _type is text: if STRING_TYPE not in sub_schema: sub_schema[STRING_TYPE] = True net_new_properties.append(path + [STRING_TYPE]) @@ -226,14 +253,14 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer): for c in value: append(buffer, ESCAPE_DCT.get(c, c)) append(buffer, '"}') - elif _type in (int, long): + elif _type in integer_types: if NUMBER_TYPE not in sub_schema: sub_schema[NUMBER_TYPE] = True net_new_properties.append(path + [NUMBER_TYPE]) append(buffer, '{') append(buffer, QUOTED_NUMBER_TYPE) - append(buffer, text_type(value)) + append(buffer, text(value)) append(buffer, '}') elif _type in (float, Decimal): if NUMBER_TYPE not in sub_schema: @@ -246,16 +273,27 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer): elif _type in (set, list, tuple, FlatList): if len(value) == 0: append(buffer, '{') - append(buffer, QUOTED_NESTED_TYPE) - append(buffer, '[]}') - elif any(isinstance(v, (Mapping, set, list, tuple, FlatList)) for v in value): - if NESTED_TYPE not in sub_schema: - sub_schema[NESTED_TYPE] = {} - net_new_properties.append(path + [NESTED_TYPE]) - append(buffer, '{') - append(buffer, QUOTED_NESTED_TYPE) - _list2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE], net_new_properties, buffer) - append(buffer, '}') + append(buffer, QUOTED_EXISTS_TYPE) + append(buffer, '0}') + elif any(v.__class__ in (Data, dict, set, list, tuple, FlatList) for v in value): + # THIS IS NOT DONE BECAUSE + if len(value) == 1: + if NESTED_TYPE in sub_schema: + append(buffer, '{') + append(buffer, QUOTED_NESTED_TYPE) + _list2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE], net_new_properties, buffer) + append(buffer, '}') + else: + # NO NEED TO NEST, SO DO NOT DO IT + typed_encode(value[0], sub_schema, path, net_new_properties, buffer) + else: + if NESTED_TYPE not in sub_schema: + sub_schema[NESTED_TYPE] = {} + net_new_properties.append(path + [NESTED_TYPE]) + append(buffer, '{') + append(buffer, QUOTED_NESTED_TYPE) + _list2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE], net_new_properties, buffer) + append(buffer, '}') else: # ALLOW PRIMITIVE MULTIVALUES value = [v for v in value if v != None] @@ -332,11 +370,11 @@ def typed_encode(value, sub_schema, path, net_new_properties, buffer): else: from mo_logs import Log - Log.error(text_type(repr(value)) + " is not JSON serializable") + Log.error(text(repr(value)) + " is not JSON serializable") except Exception as e: from mo_logs import Log - Log.error(text_type(repr(value)) + " is not JSON serializable", cause=e) + Log.error(text(repr(value)) + " is not JSON serializable", cause=e) def _list2json(value, sub_schema, path, net_new_properties, buffer): @@ -351,7 +389,7 @@ def _list2json(value, sub_schema, path, net_new_properties, buffer): append(buffer, ']') append(buffer, COMMA) append(buffer, QUOTED_EXISTS_TYPE) - append(buffer, text_type(len(value))) + append(buffer, text(len(value))) def _multivalue2json(value, sub_schema, path, net_new_properties, buffer): @@ -380,7 +418,7 @@ def _iter2json(value, sub_schema, path, net_new_properties, buffer): append(buffer, ']') append(buffer, COMMA) append(buffer, QUOTED_EXISTS_TYPE) - append(buffer, text_type(count)) + append(buffer, text(count)) def _dict2json(value, sub_schema, path, net_new_properties, buffer): @@ -390,9 +428,9 @@ def _dict2json(value, sub_schema, path, net_new_properties, buffer): continue append(buffer, prefix) prefix = COMMA - if isinstance(k, binary_type): - k = utf82unicode(k) - if not isinstance(k, text_type): + if is_binary(k): + k = k.decode('utf8') + if not is_text(k): Log.error("Expecting property name to be a string") if k not in sub_schema: sub_schema[k] = {} @@ -410,42 +448,6 @@ def _dict2json(value, sub_schema, path, net_new_properties, buffer): append(buffer, '1}') -IS_NULL = '0' -BOOLEAN = 'boolean' -INTEGER = 'integer' -NUMBER = 'number' -STRING = 'string' -OBJECT = 'object' -NESTED = "nested" -EXISTS = "exists" - -JSON_TYPES = [BOOLEAN, INTEGER, NUMBER, STRING, OBJECT] -PRIMITIVE = [EXISTS, BOOLEAN, INTEGER, NUMBER, STRING] -STRUCT = [EXISTS, OBJECT, NESTED] - - -python_type_to_json_type = { - int: NUMBER, - text_type: STRING, - float: NUMBER, - None: OBJECT, - bool: BOOLEAN, - NullType: OBJECT, - none_type: OBJECT, - Data: OBJECT, - dict: OBJECT, - object: OBJECT, - Mapping: OBJECT, - list: NESTED, - FlatList: NESTED, - Date: NUMBER -} - -if PY2: - python_type_to_json_type[str] = STRING - python_type_to_json_type[long] = NUMBER - - TYPE_PREFIX = "~" # u'\u0442\u0443\u0440\u0435-' # "туре" BOOLEAN_TYPE = TYPE_PREFIX + "b~" NUMBER_TYPE = TYPE_PREFIX + "n~" @@ -461,6 +463,12 @@ QUOTED_STRING_TYPE = quote(STRING_TYPE) + COLON QUOTED_NESTED_TYPE = quote(NESTED_TYPE) + COLON QUOTED_EXISTS_TYPE = quote(EXISTS_TYPE) + COLON +inserter_type_to_json_type = { + BOOLEAN_TYPE: BOOLEAN, + NUMBER_TYPE: NUMBER, + STRING_TYPE: STRING +} + json_type_to_inserter_type = { BOOLEAN: BOOLEAN_TYPE, INTEGER: NUMBER_TYPE, diff --git a/vendor/mo_json_config/__init__.py b/vendor/mo_json_config/__init__.py index be6da8f..3c0b829 100644 --- a/vendor/mo_json_config/__init__.py +++ b/vendor/mo_json_config/__init__.py @@ -5,31 +5,28 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import os -from collections import Mapping -import mo_dots -from mo_dots import set_default, wrap, unwrap +from mo_dots import is_data, is_list, set_default, unwrap, wrap, is_sequence, coalesce, get_attr from mo_files import File from mo_files.url import URL -from mo_future import text_type +from mo_future import is_text +from mo_future import text from mo_json import json2value from mo_json_config.convert import ini2value -from mo_logs import Log, Except +from mo_logs import Except, Log DEBUG = False def get_file(file): file = File(file) - if os.sep=="\\": + if os.sep == "\\": return get("file:///" + file.abspath) else: return get("file://" + file.abspath) @@ -39,7 +36,7 @@ def get(url): """ USE json.net CONVENTIONS TO LINK TO INLINE OTHER JSON """ - url = text_type(url) + url = text(url) if url.find("://") == -1: Log.error("{{url}} must have a prototcol (eg http://) declared", url=url) @@ -86,7 +83,7 @@ def _replace_ref(node, url): if url.path.endswith("/"): url.path = url.path[:-1] - if isinstance(node, Mapping): + if is_data(node): ref = None output = {} for k, v in node.items(): @@ -117,13 +114,13 @@ def _replace_ref(node, url): raise Log.error("unknown protocol {{scheme}}", scheme=ref.scheme) if ref.fragment: - new_value = mo_dots.get_attr(new_value, ref.fragment) + new_value = get_attr(new_value, ref.fragment) DEBUG and Log.note("Replace {{ref}} with {{new_value}}", ref=ref, new_value=new_value) if not output: output = new_value - elif isinstance(output, text_type): + elif is_text(output): Log.error("Can not handle set_default({{output}},{{new_value}})", output=output, new_value=new_value) else: output = unwrap(set_default(output, new_value)) @@ -131,7 +128,7 @@ def _replace_ref(node, url): DEBUG and Log.note("Return {{output}}", output=output) return output - elif isinstance(node, list): + elif is_list(node): output = [_replace_ref(n, url) for n in node] # if all(p[0] is p[1] for p in zip(output, node)): # return node @@ -141,13 +138,17 @@ def _replace_ref(node, url): def _replace_locals(node, doc_path): - if isinstance(node, Mapping): + if is_data(node): # RECURS, DEEP COPY ref = None output = {} for k, v in node.items(): if k == "$ref": ref = v + elif k == "$concat": + if not is_sequence(v): + Log.error("$concat expects an array of strings") + return coalesce(node.get("separator"), "").join(v) elif v == None: continue else: @@ -164,13 +165,13 @@ def _replace_locals(node, doc_path): if p != ".": if i>len(doc_path): Log.error("{{frag|quote}} reaches up past the root document", frag=frag) - new_value = mo_dots.get_attr(doc_path[i-1], frag[i::]) + new_value = get_attr(doc_path[i-1], frag[i::]) break else: new_value = doc_path[len(frag) - 1] else: # ABSOLUTE - new_value = mo_dots.get_attr(doc_path[-1], frag) + new_value = get_attr(doc_path[-1], frag) new_value = _replace_locals(new_value, [new_value] + doc_path) @@ -179,7 +180,7 @@ def _replace_locals(node, doc_path): else: return unwrap(set_default(output, new_value)) - elif isinstance(node, list): + elif is_list(node): candidate = [_replace_locals(n, [n] + doc_path) for n in node] # if all(p[0] is p[1] for p in zip(candidate, node)): # return node @@ -247,10 +248,14 @@ def get_http(ref, url): def _get_env(ref, url): # GET ENVIRONMENT VARIABLES ref = ref.host + raw_value = os.environ.get(ref) + if not raw_value: + Log.error("expecting environment variable with name {{env_var}}", env_var=ref) + try: - new_value = json2value(os.environ[ref]) + new_value = json2value(raw_value) except Exception as e: - new_value = os.environ.get(ref) + new_value = raw_value return new_value diff --git a/vendor/mo_json_config/convert.py b/vendor/mo_json_config/convert.py index 25bcda2..60ebd03 100644 --- a/vendor/mo_json_config/convert.py +++ b/vendor/mo_json_config/convert.py @@ -5,15 +5,13 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from mo_future import StringIO, ConfigParser from mo_dots import wrap +from mo_future import ConfigParser, StringIO def ini2value(ini_content): diff --git a/vendor/mo_kwargs/__init__.py b/vendor/mo_kwargs/__init__.py index 011684e..5d5e363 100644 --- a/vendor/mo_kwargs/__init__.py +++ b/vendor/mo_kwargs/__init__.py @@ -5,18 +5,23 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from collections import Mapping +from functools import update_wrapper -from mo_dots import zip as dict_zip, get_logger, wrap -from mo_future import text_type, get_function_arguments, get_function_defaults, get_function_name +from mo_dots import get_logger, is_data, wrap, zip as dict_zip, set_default +from mo_future import ( + get_function_arguments, + get_function_defaults, + get_function_name, + text, +) from mo_logs import Except +KWARGS = str("kwargs") + def override(func): """ @@ -38,12 +43,20 @@ def override(func): if not get_function_defaults(func): defaults = {} else: - defaults = {k: v for k, v in zip(reversed(params), reversed(get_function_defaults(func)))} + defaults = { + k: v + for k, v in zip(reversed(params), reversed(get_function_defaults(func))) + } - def raise_error(e, packed): - err = text_type(e) + def raise_error(e, a, k): + packed = set_default(dict(zip(params, a)), k) + err = text(e) e = Except.wrap(e) - if err.startswith(func_name) and ("takes at least" in err or "required positional argument" in err): + if err.startswith(func_name) and ( + "takes at least" in err + or "takes exactly " in err + or "required positional argument" in err + ): missing = [p for p in params if str(p) not in packed] given = [p for p in params if str(p) in packed] if not missing: @@ -55,87 +68,93 @@ def override(func): missing=missing, given=given, stack_depth=2, - cause=e + cause=e, ) raise e - if "kwargs" not in params: - # WE ASSUME WE ARE ONLY ADDING A kwargs PARAMETER TO SOME REGULAR METHOD + if KWARGS not in params: + # ADDING A kwargs PARAMETER TO SOME REGULAR METHOD def wo_kwargs(*args, **kwargs): - settings = kwargs.get("kwargs") + settings = kwargs.get(KWARGS, {}) ordered_params = dict(zip(params, args)) - packed = params_pack(params, ordered_params, kwargs, settings, defaults) + a, k = params_pack(params, defaults, settings, kwargs, ordered_params) try: - return func(**packed) + return func(*a, **k) except TypeError as e: - raise_error(e, packed) - return wo_kwargs + raise_error(e, a, k) - elif func_name in ("__init__", "__new__"): - def w_constructor(*args, **kwargs): - if "kwargs" in kwargs: - packed = params_pack(params, kwargs, dict_zip(params, args), kwargs["kwargs"], defaults) - elif len(args) == 2 and len(kwargs) == 0 and isinstance(args[1], Mapping): - # ASSUME SECOND UNNAMED PARAM IS kwargs - packed = params_pack(params, {"self": args[0]}, args[1], defaults) - else: - # DO NOT INCLUDE self IN kwargs - packed = params_pack(params, kwargs, dict_zip(params, args), defaults) - try: - return func(**packed) - except TypeError as e: - raise_error(e, packed) - return w_constructor + return update_wrapper(wo_kwargs, func) + + elif func_name in ("__init__", "__new__") or params[0] in ("self", "cls"): - elif params[0] == "self": def w_bound_method(*args, **kwargs): - if len(args) == 2 and len(kwargs) == 0 and isinstance(args[1], Mapping): + if len(args) == 2 and len(kwargs) == 0 and is_data(args[1]): # ASSUME SECOND UNNAMED PARAM IS kwargs - packed = params_pack(params, args[1], defaults) - elif "kwargs" in kwargs and isinstance(kwargs["kwargs"], Mapping): + a, k = params_pack( + params, defaults, args[1], {params[0]: args[0]}, kwargs + ) + elif KWARGS in kwargs and is_data(kwargs[KWARGS]): # PUT args INTO kwargs - packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), kwargs["kwargs"], defaults) + a, k = params_pack( + params, defaults, kwargs[KWARGS], dict_zip(params, args), kwargs + ) else: - packed = params_pack(params, kwargs, dict_zip(params[1:], args[1:]), defaults) + a, k = params_pack(params, defaults, dict_zip(params, args), kwargs) try: - return func(args[0], **packed) + return func(*a, **k) except TypeError as e: - raise_error(e, packed) - return w_bound_method + raise_error(e, a, k) + + return update_wrapper(w_bound_method, func) else: + def w_kwargs(*args, **kwargs): - if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], Mapping): + if len(args) == 1 and len(kwargs) == 0 and is_data(args[0]): # ASSUME SINGLE PARAMETER IS kwargs - packed = params_pack(params, args[0], defaults) - elif "kwargs" in kwargs and isinstance(kwargs["kwargs"], Mapping): + a, k = params_pack(params, defaults, args[0]) + elif KWARGS in kwargs and is_data(kwargs[KWARGS]): # PUT args INTO kwargs - packed = params_pack(params, kwargs, dict_zip(params, args), kwargs["kwargs"], defaults) + a, k = params_pack( + params, defaults, kwargs[KWARGS], dict_zip(params, args), kwargs + ) else: # PULL kwargs OUT INTO PARAMS - packed = params_pack(params, kwargs, dict_zip(params, args), defaults) + a, k = params_pack(params, defaults, dict_zip(params, args), kwargs) try: - return func(**packed) + return func(*a, **k) except TypeError as e: - raise_error(e, packed) - return w_kwargs + raise_error(e, a, k) + + return update_wrapper(w_kwargs, func) def params_pack(params, *args): + """ + :param params: + :param args: + :return: (args, kwargs) pair + """ settings = {} for a in args: - if a == None: - continue for k, v in a.items(): - k = text_type(k) - if k in settings: - continue - settings[k] = v if v != None else None - settings["kwargs"] = settings + settings[str(k)] = v + settings[KWARGS] = wrap(settings) - output = { - str(k): settings[k] if k != "kwargs" else wrap(settings) - for k in params - if k in settings - } - return output + if params and params[0] in ("self", "cls"): + s = settings.get(params[0]) + if s is None: + return ( + [], + {k: settings[k] for k in params[1:] if k in settings}, + ) + else: + return ( + [s], + {k: settings[k] for k in params[1:] if k in settings}, + ) + else: + return ( + [], + {k: settings[k] for k in params if k in settings} + ) diff --git a/vendor/mo_logs/__init__.py b/vendor/mo_logs/__init__.py index 1d657f0..cb8071c 100644 --- a/vendor/mo_logs/__init__.py +++ b/vendor/mo_logs/__init__.py @@ -5,23 +5,20 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import os import platform import sys -from collections import Mapping from datetime import datetime -from mo_dots import coalesce, listwrap, wrap, unwrap, unwraplist, set_default, FlatList -from mo_future import text_type, PY3 -from mo_logs import constants -from mo_logs.exceptions import Except, suppress_exception -from mo_logs.strings import indent +from mo_dots import Data, FlatList, coalesce, is_data, is_list, listwrap, unwraplist, wrap +from mo_future import PY3, is_text, text +from mo_logs import constants, exceptions, strings +from mo_logs.exceptions import Except, LogItem, suppress_exception +from mo_logs.strings import CR, indent _Thread = None if PY3: @@ -30,7 +27,6 @@ else: STDOUT = sys.stdout - class Log(object): """ FOR STRUCTURED LOGGING AND EXCEPTION CHAINING @@ -78,7 +74,7 @@ class Log(object): from mo_threads import profiles profiles.enable_profilers(settings.cprofile.filename) - if settings.profile is True or (isinstance(settings.profile, Mapping) and settings.profile.enabled): + if settings.profile is True or (is_data(settings.profile) and settings.profile.enabled): Log.error("REMOVED 2018-09-02, Activedata revision 3f30ff46f5971776f8ba18") # from mo_logs import profiles # @@ -92,10 +88,11 @@ class Log(object): if settings.constants: constants.set(settings.constants) - if settings.log: + logs = coalesce(settings.log, settings.logs) + if logs: cls.logging_multi = StructuredLogger_usingMulti() - for log in listwrap(settings.log): - Log.add_log(Log.new_instance(log)) + for log in listwrap(logs): + Log._add_log(Log.new_instance(log)) from mo_logs.log_usingThread import StructuredLogger_usingThread cls.main_log = StructuredLogger_usingThread(cls.logging_multi) @@ -116,16 +113,19 @@ class Log(object): if settings["class"]: if settings["class"].startswith("logging.handlers."): - from mo_logs.log_usingLogger import StructuredLogger_usingLogger + from mo_logs.log_usingHandler import StructuredLogger_usingHandler - return StructuredLogger_usingLogger(settings) + return StructuredLogger_usingHandler(settings) else: with suppress_exception: from mo_logs.log_usingLogger import make_log_from_settings return make_log_from_settings(settings) - # OH WELL :( + # OH WELL :( + if settings.log_type == "logger": + from mo_logs.log_usingLogger import StructuredLogger_usingLogger + return StructuredLogger_usingLogger(settings) if settings.log_type == "file" or settings.file: return StructuredLogger_usingFile(settings.file) if settings.log_type == "file" or settings.filename: @@ -152,12 +152,20 @@ class Log(object): from mo_logs.log_usingNothing import StructuredLogger return StructuredLogger() - Log.error("Log type of {{log_type|quote}} is not recognized", log_type=settings.log_type) + Log.error("Log type of {{config|json}} is not recognized", config=settings) @classmethod - def add_log(cls, log): + def _add_log(cls, log): cls.logging_multi.add_log(log) + @classmethod + def set_logger(cls, logger): + if cls.logging_multi: + cls.logging_multi.add_log(logger) + else: + from mo_logs.log_usingThread import StructuredLogger_usingThread + cls.main_log = StructuredLogger_usingThread(logger) + @classmethod def note( cls, @@ -175,38 +183,20 @@ class Log(object): :param more_params: *any more parameters (which will overwrite default_params) :return: """ - if not isinstance(template, text_type): + timestamp = datetime.utcnow() + if not is_text(template): Log.error("Log.note was expecting a unicode template") - if len(template) > 10000: - template = template[:10000] - - params = dict(unwrap(default_params), **more_params) - - log_params = set_default({ - "template": template, - "params": params, - "timestamp": datetime.utcnow(), - "machine": machine_metadata - }, log_context, {"context": exceptions.NOTE}) - - if not template.startswith("\n") and template.find("\n") > -1: - template = "\n" + template - - if cls.trace: - log_template = "{{machine.name}} (pid {{machine.pid}}) - {{timestamp|datetime}} - {{thread.name}} - \"{{location.file}}:{{location.line}}\" ({{location.method}}) - " + template.replace("{{", "{{params.") - f = sys._getframe(stack_depth + 1) - log_params.location = { - "line": f.f_lineno, - "file": text_type(f.f_code.co_filename.split(os.sep)[-1]), - "method": text_type(f.f_code.co_name) - } - thread = _Thread.current() - log_params.thread = {"name": thread.name, "id": thread.id} - else: - log_template = "{{timestamp|datetime}} - " + template.replace("{{", "{{params.") - - cls.main_log.write(log_template, log_params) + Log._annotate( + LogItem( + context=exceptions.NOTE, + format=template, + template=template, + params=dict(default_params, **more_params) + ), + timestamp, + stack_depth+1 + ) @classmethod def unexpected( @@ -227,22 +217,26 @@ class Log(object): :param more_params: *any more parameters (which will overwrite default_params) :return: """ + timestamp = datetime.utcnow() + if not is_text(template): + Log.error("Log.warning was expecting a unicode template") + if isinstance(default_params, BaseException): cause = default_params default_params = {} - params = dict(unwrap(default_params), **more_params) + if "values" in more_params.keys(): + Log.error("Can not handle a logging parameter by name `values`") - if cause and not isinstance(cause, Except): - cause = Except(exceptions.UNEXPECTED, text_type(cause), trace=exceptions._extract_traceback(0)) + params = Data(dict(default_params, **more_params)) + cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) + trace = exceptions.get_stacktrace(stack_depth + 1) - trace = exceptions.extract_stack(1) - e = Except(type=exceptions.UNEXPECTED, template=template, params=params, cause=cause, trace=trace) - Log.note( - "{{error}}", - error=e, - log_context=set_default({"context": exceptions.WARNING}, log_context), - stack_depth=stack_depth + 1 + e = Except(exceptions.UNEXPECTED, template=template, params=params, cause=cause, trace=trace) + Log._annotate( + e, + timestamp, + stack_depth+1 ) @classmethod @@ -259,44 +253,23 @@ class Log(object): :param default_params: *dict* parameters to fill in template :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience - :param more_params: *any more parameters (which will overwrite default_params) + :param more_params: more parameters (which will overwrite default_params) :return: """ - # USE replace() AS POOR MAN'S CHILD TEMPLATE - - template = ("*" * 80) + "\n" + indent(template, prefix="** ").strip() + "\n" + ("*" * 80) - Log.note( - template, - default_params=default_params, - stack_depth=stack_depth + 1, - log_context=set_default({"context": exceptions.ALARM}, log_context), - **more_params + timestamp = datetime.utcnow() + format = ("*" * 80) + CR + indent(template, prefix="** ").strip() + CR + ("*" * 80) + Log._annotate( + LogItem( + context=exceptions.ALARM, + format=format, + template=template, + params=dict(default_params, **more_params) + ), + timestamp, + stack_depth + 1 ) - @classmethod - def alert( - cls, - template, - default_params={}, - stack_depth=0, - log_context=None, - **more_params - ): - """ - :param template: *string* human readable string with placeholders for parameters - :param default_params: *dict* parameters to fill in template - :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller - :param log_context: *dict* extra key:value pairs for your convenience - :param more_params: *any more parameters (which will overwrite default_params) - :return: - """ - return Log.alarm( - template, - default_params=default_params, - stack_depth=stack_depth + 1, - log_context=set_default({"context": exceptions.ALARM}, log_context), - **more_params - ) + alert = alarm @classmethod def warning( @@ -317,7 +290,8 @@ class Log(object): :param more_params: *any more parameters (which will overwrite default_params) :return: """ - if not isinstance(template, text_type): + timestamp = datetime.utcnow() + if not is_text(template): Log.error("Log.warning was expecting a unicode template") if isinstance(default_params, BaseException): @@ -326,19 +300,18 @@ class Log(object): if "values" in more_params.keys(): Log.error("Can not handle a logging parameter by name `values`") - params = dict(unwrap(default_params), **more_params) + + params = Data(dict(default_params, **more_params)) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) - trace = exceptions.extract_stack(stack_depth + 1) + trace = exceptions.get_stacktrace(stack_depth + 1) - e = Except(type=exceptions.WARNING, template=template, params=params, cause=cause, trace=trace) - Log.note( - "{{error|unicode}}", - error=e, - log_context=set_default({"context": exceptions.WARNING}, log_context), - stack_depth=stack_depth + 1 + e = Except(exceptions.WARNING, template=template, params=params, cause=cause, trace=trace) + Log._annotate( + e, + timestamp, + stack_depth+1 ) - @classmethod def error( cls, @@ -359,7 +332,7 @@ class Log(object): :param more_params: *any more parameters (which will overwrite default_params) :return: """ - if not isinstance(template, text_type): + if not is_text(template): sys.stderr.write(str("Log.error was expecting a unicode template")) Log.error("Log.error was expecting a unicode template") @@ -367,12 +340,12 @@ class Log(object): cause = default_params default_params = {} - params = dict(unwrap(default_params), **more_params) + params = Data(dict(default_params, **more_params)) add_to_trace = False if cause == None: causes = None - elif isinstance(cause, list): + elif is_list(cause): causes = [] for c in listwrap(cause): # CAN NOT USE LIST-COMPREHENSION IN PYTHON3 (EXTRA STACK DEPTH FROM THE IN-LINED GENERATOR) causes.append(Except.wrap(c, stack_depth=1)) @@ -383,60 +356,52 @@ class Log(object): causes = None Log.error("can only accept Exception, or list of exceptions") - trace = exceptions.extract_stack(stack_depth + 1) + trace = exceptions.get_stacktrace(stack_depth + 1) if add_to_trace: cause[0].trace.extend(trace[1:]) - e = Except(type=exceptions.ERROR, template=template, params=params, cause=causes, trace=trace) + e = Except(context=exceptions.ERROR, template=template, params=params, cause=causes, trace=trace) raise_from_none(e) @classmethod - def fatal( + def _annotate( cls, - template, # human readable template - default_params={}, # parameters for template - cause=None, # pausible cause - stack_depth=0, - log_context=None, - **more_params + item, + timestamp, + stack_depth ): """ - SEND TO STDERR - - :param template: *string* human readable string with placeholders for parameters - :param default_params: *dict* parameters to fill in template - :param cause: *Exception* for chaining - :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller - :param log_context: *dict* extra key:value pairs for your convenience - :param more_params: *any more parameters (which will overwrite default_params) + :param itemt: A LogItemTHE TYPE OF MESSAGE + :param stack_depth: FOR TRACKING WHAT LINE THIS CAME FROM :return: """ - if default_params and isinstance(listwrap(default_params)[0], BaseException): - cause = default_params - default_params = {} + item.timestamp = timestamp + item.machine = machine_metadata + item.template = strings.limit(item.template, 10000) - params = dict(unwrap(default_params), **more_params) + item.format = strings.limit(item.format, 10000) + if item.format == None: + format = text(item) + else: + format = item.format.replace("{{", "{{params.") + if not format.startswith(CR) and format.find(CR) > -1: + format = CR + format - cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) - trace = exceptions.extract_stack(stack_depth + 1) - - e = Except(type=exceptions.ERROR, template=template, params=params, cause=cause, trace=trace) - - error_mode = cls.error_mode - with suppress_exception: - if not error_mode: - cls.error_mode = True - Log.note( - "{{error|unicode}}", - error=e, - log_context=set_default({"context": exceptions.FATAL}, log_context), - stack_depth=stack_depth + 1 - ) - cls.error_mode = error_mode - - sys.stderr.write(str(e)) + if cls.trace: + log_format = item.format = "{{machine.name}} (pid {{machine.pid}}) - {{timestamp|datetime}} - {{thread.name}} - \"{{location.file}}:{{location.line}}\" - ({{location.method}}) - " + format + f = sys._getframe(stack_depth + 1) + item.location = { + "line": f.f_lineno, + "file": text(f.f_code.co_filename), + "method": text(f.f_code.co_name) + } + thread = _Thread.current() + item.thread = {"name": thread.name, "id": thread.id} + else: + log_format = item.format = "{{timestamp|datetime}} - " + format + cls.main_log.write(log_format, item.__data__()) def write(self): raise NotImplementedError @@ -449,9 +414,9 @@ def _same_frame(frameA, frameB): # GET THE MACHINE METADATA machine_metadata = wrap({ "pid": os.getpid(), - "python": text_type(platform.python_implementation()), - "os": text_type(platform.system() + platform.release()).strip(), - "name": text_type(platform.node()) + "python": text(platform.python_implementation()), + "os": text(platform.system() + platform.release()).strip(), + "name": text(platform.node()) }) diff --git a/vendor/mo_logs/constants.py b/vendor/mo_logs/constants.py index 219f35d..aea9970 100644 --- a/vendor/mo_logs/constants.py +++ b/vendor/mo_logs/constants.py @@ -4,16 +4,13 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import sys -from mo_dots import set_attr as mo_dots_set_attr -from mo_dots import wrap, join_field, split_field +from mo_dots import _set_attr as mo_dots_set_attr, split_field, wrap DEBUG = True @@ -28,10 +25,15 @@ def set(constants): return constants = wrap(constants) - for k, new_value in constants.leaves(): + for full_path, new_value in constants.leaves(): errors = [] + k_path = split_field(full_path) + if len(k_path) < 2: + from mo_logs import Log + Log.error("expecting . format, not {{path|quote}}", path=k_path) + name = k_path[-1] try: - old_value = mo_dots_set_attr(sys.modules, k, new_value) + mo_dots_set_attr(sys.modules, k_path, new_value) continue except Exception as e: errors.append(e) @@ -42,31 +44,29 @@ def set(constants): caller_file = caller_globals["__file__"] if not caller_file.endswith(".py"): raise Exception("do not know how to handle non-python caller") - caller_module = caller_file[:-3].replace("/", ".") + caller_module = caller_file[:-3].replace("\\", "/") + module_path = caller_module.split("/") - path = split_field(k) - for i, p in enumerate(path): - if i == 0: - continue - prefix = join_field(path[:1]) - name = join_field(path[i:]) - if caller_module.endswith(prefix): - old_value = mo_dots_set_attr(caller_globals, name, new_value) - if DEBUG: - from mo_logs import Log + # ENSURE THERE IS SOME EVIDENCE THE MODULE MATCHES THE PATH + if k_path[-2] != module_path[-1]: + continue - Log.note( - "Changed {{module}}[{{attribute}}] from {{old_value}} to {{new_value}}", - module=prefix, - attribute=name, - old_value=old_value, - new_value=new_value - ) - break + old_value = mo_dots_set_attr(caller_globals, [name], new_value) + if DEBUG: + from mo_logs import Log + + Log.note( + "Changed {{module}}[{{attribute}}] from {{old_value}} to {{new_value}}", + module=caller_module, + attribute=name, + old_value=old_value, + new_value=new_value + ) + break except Exception as e: errors.append(e) if errors: from mo_logs import Log - Log.error("Can not set constant {{path}}", path=k, cause=errors) + Log.error("Can not set constant {{path}}", path=full_path, cause=errors) diff --git a/vendor/mo_logs/convert.py b/vendor/mo_logs/convert.py index 55b4470..adaab15 100644 --- a/vendor/mo_logs/convert.py +++ b/vendor/mo_logs/convert.py @@ -5,17 +5,27 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import json as _json -from datetime import datetime, date +from datetime import date, datetime -from mo_future import text_type, PY3 +from mo_future import PY3 + +if PY3: + from datetime import timezone + def utcfromtimestamp(u): + d = datetime.utcfromtimestamp(u) + d = d.replace(tzinfo=timezone.utc) + return d + MAX_TIME = datetime(2286, 11, 20, 17, 46, 39, 0, timezone.utc) +else: + def utcfromtimestamp(u): + return datetime.utcfromtimestamp(u) + MAX_TIME = datetime(2286, 11, 20, 17, 46, 39) def unix2datetime(u): @@ -23,8 +33,8 @@ def unix2datetime(u): if u == None: return None if u == 9999999999: # PYPY BUG https://bugs.pypy.org/issue1697 - return datetime(2286, 11, 20, 17, 46, 39) - return datetime.utcfromtimestamp(u) + return MAX_TIME + return utcfromtimestamp(u) except Exception as e: from mo_logs import Log Log.error("Can not convert {{value}} to datetime", value= u, cause=e) diff --git a/vendor/mo_logs/exceptions.py b/vendor/mo_logs/exceptions.py index 10c8544..9dc50a3 100644 --- a/vendor/mo_logs/exceptions.py +++ b/vendor/mo_logs/exceptions.py @@ -5,21 +5,18 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals +from mo_future import is_text, is_binary import sys -from collections import Mapping - -from mo_dots import Data, listwrap, unwraplist, set_default, Null, coalesce -from mo_future import text_type, PY3 -from mo_logs.strings import indent, expand_template +from mo_dots import Data, Null, is_data, listwrap, unwraplist +from mo_future import PY3, text +from mo_logs.strings import CR, expand_template, indent FATAL = "FATAL" ERROR = "ERROR" @@ -29,27 +26,47 @@ UNEXPECTED = "UNEXPECTED" NOTE = "NOTE" -class Except(Exception): +class LogItem(object): + + def __init__(self, context, format, template, params): + self.context = context + self.format = format + self.template = template + self.params = params + + def __data__(self): + return Data(self.__dict__) + + +class Except(Exception, LogItem): @staticmethod def new_instance(desc): return Except( - type=desc.type, + context=desc.context, template=desc.template, params=desc.params, cause=[Except.new_instance(c) for c in listwrap(desc.cause)], trace=desc.trace ) - def __init__(self, type=ERROR, template=Null, params=Null, cause=Null, trace=Null, **kwargs): - Exception.__init__(self) - self.type = type - self.template = template - self.params = set_default(kwargs, params) + def __init__(self, context=ERROR, template=Null, params=Null, cause=Null, trace=Null, **_): + if context == None: + raise ValueError("expecting context to not be None") + self.cause = Except.wrap(cause) + Exception.__init__(self) + LogItem.__init__( + self, + context=context, + format=None, + template=template, + params=params + ) + if not trace: - self.trace=extract_stack(2) + self.trace = get_stacktrace(2) else: self.trace = trace @@ -66,7 +83,7 @@ class Except(Exception): return Null elif isinstance(e, (list, Except)): return e - elif isinstance(e, Mapping): + elif is_data(e): e.cause = unwraplist([Except.wrap(c) for c in listwrap(e.cause)]) return Except(**e) else: @@ -74,15 +91,15 @@ class Except(Exception): if tb is not None: trace = _parse_traceback(tb) else: - trace = _extract_traceback(0) + trace = get_traceback(0) cause = Except.wrap(getattr(e, '__cause__', None)) if hasattr(e, "message") and e.message: - output = Except(type=ERROR, template=text_type(e.message), trace=trace, cause=cause) + output = Except(context=ERROR, template=text(e.message), trace=trace, cause=cause) else: - output = Except(type=ERROR, template=text_type(e), trace=trace, cause=cause) + output = Except(context=ERROR, template=text(e), trace=trace, cause=cause) - trace = extract_stack(stack_depth + 2) # +2 = to remove the caller, and it's call to this' Except.wrap() + trace = get_stacktrace(stack_depth + 2) # +2 = to remove the caller, and it's call to this' Except.wrap() output.trace.extend(trace) return output @@ -91,11 +108,11 @@ class Except(Exception): return expand_template(self.template, self.params) def __contains__(self, value): - if isinstance(value, text_type): - if self.template.find(value) >= 0 or self.message.find(value) >= 0: + if is_text(value): + if value in self.template or value in self.message: return True - if self.type == value: + if self.context == value: return True for c in listwrap(self.cause): if value in c: @@ -103,7 +120,7 @@ class Except(Exception): return False def __unicode__(self): - output = self.type + ": " + self.template + "\n" + output = self.context + ": " + self.template + CR if self.params: output = expand_template(output, self.params) @@ -113,8 +130,10 @@ class Except(Exception): if self.cause: cause_strings = [] for c in listwrap(self.cause): - with suppress_exception: - cause_strings.append(text_type(c)) + try: + cause_strings.append(text(c)) + except Exception as e: + sys.stderr("Problem serializing cause"+text(c)) output += "caused by\n\t" + "and caused by\n\t".join(cause_strings) @@ -128,16 +147,12 @@ class Except(Exception): return self.__unicode__().encode('latin1', 'replace') def __data__(self): - return Data( - type=self.type, - template=self.template, - params=self.params, - cause=self.cause, - trace=self.trace - ) + output = Data({k:getattr(self,k) for k in vars(self)}) + output.cause=unwraplist([c.__data__() for c in listwrap(output.cause)]) + return output -def extract_stack(start=0): +def get_stacktrace(start=0): """ SNAGGED FROM traceback.py Altered to return Data @@ -168,7 +183,7 @@ def extract_stack(start=0): return stack -def _extract_traceback(start): +def get_traceback(start): """ SNAGGED FROM traceback.py @@ -195,11 +210,10 @@ def _parse_traceback(tb): def format_trace(tbs, start=0): - trace = [] - for d in tbs[start::]: - item = expand_template('File "{{file}}", line {{line}}, in {{method}}\n', d) - trace.append(item) - return "".join(trace) + return "".join( + expand_template('File "{{file}}", line {{line}}, in {{method}}\n', d) + for d in tbs[start::] + ) class Suppress(object): @@ -208,13 +222,13 @@ class Suppress(object): """ def __init__(self, exception_type): - self.type = exception_type + self.context = exception_type def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): - if not exc_val or isinstance(exc_val, self.type): + if not exc_val or isinstance(exc_val, self.context): return True suppress_exception = Suppress(Exception) diff --git a/vendor/mo_logs/log_usingElasticSearch.py b/vendor/mo_logs/log_usingElasticSearch.py index 74086d1..8760138 100644 --- a/vendor/mo_logs/log_usingElasticSearch.py +++ b/vendor/mo_logs/log_usingElasticSearch.py @@ -5,61 +5,86 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from collections import Mapping +from datetime import date, datetime +import sys -import mo_json from jx_python import jx -from mo_dots import wrap, coalesce, FlatList -from mo_future import text_type, binary_type, number_types -from mo_json import value2json +from mo_dots import coalesce, listwrap, set_default, wrap, is_data, is_sequence +from mo_future import number_types, text, is_text, is_binary +from mo_json import datetime2unix, json2value, value2json from mo_kwargs import override from mo_logs import Log, strings -from mo_logs.exceptions import suppress_exception +from mo_logs.exceptions import Except, suppress_exception from mo_logs.log_usingNothing import StructuredLogger -from mo_threads import Thread, Queue, Till, THREAD_STOP -from mo_times import MINUTE, Duration +from mo_math.randoms import Random +from mo_threads import Queue, THREAD_STOP, Thread, Till +from mo_times import Duration, MINUTE +from mo_times.dates import datetime2unix from pyLibrary.convert import bytes2base64 -from pyLibrary.env.elasticsearch import Cluster +from jx_elasticsearch.rollover_index import RolloverIndex MAX_BAD_COUNT = 5 LOG_STRING_LENGTH = 2000 +PAUSE_AFTER_GOOD_INSERT = 60 +PAUSE_AFTER_BAD_INSERT = 600 class StructuredLogger_usingElasticSearch(StructuredLogger): @override - def __init__(self, host, index, port=9200, type="log", queue_size=1000, batch_size=100, kwargs=None): + def __init__( + self, + host, + index, + port=9200, + type="log", + queue_size=1000, + batch_size=100, + refresh_interval="1second", + kwargs=None, + ): """ settings ARE FOR THE ELASTICSEARCH INDEX """ kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds kwargs.retry.times = coalesce(kwargs.retry.times, 3) kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds + kwargs.host = Random.sample(listwrap(host), 1)[0] - self.es = Cluster(kwargs).get_or_create_index( - schema=mo_json.json2value(value2json(SCHEMA), leaves=True), + rollover_interval = coalesce(kwargs.rollover.interval, kwargs.rollover.max, "year") + rollover_max = coalesce(kwargs.rollover.max, kwargs.rollover.interval, "year") + + schema = set_default( + kwargs.schema, + {"mappings": {kwargs.type: {"properties": {"~N~": {"type": "nested"}}}}}, + json2value(value2json(SCHEMA), leaves=True) + ) + + self.es = RolloverIndex( + rollover_field={"get": [{"first": "."}, {"literal": "timestamp"}]}, + rollover_interval=rollover_interval, + rollover_max=rollover_max, + schema=schema, limit_replicas=True, typed=True, - kwargs=kwargs + read_only=False, + kwargs=kwargs, ) self.batch_size = batch_size - self.es.add_alias(coalesce(kwargs.alias, kwargs.index)) self.queue = Queue("debug logs to es", max=queue_size, silent=True) - Thread.run("add debug logs to es", self._insert_loop) + self.worker = Thread.run("add debug logs to es", self._insert_loop) def write(self, template, params): - if params.get("template"): - # DETECTED INNER TEMPLATE, ASSUME TRACE IS ON, SO DO NOT NEED THE OUTER TEMPLATE - self.queue.add({"value": params}) - else: - template = strings.limit(template, 2000) - self.queue.add({"value": {"template": template, "params": params}}, timeout=3 * MINUTE) + try: + params.template = strings.limit(params.template, 2000) + params.format = None + self.queue.add({"value": _deep_json_to_string(params, 3)}, timeout=3 * 60) + except Exception as e: + sys.stdout.write(text(Except.wrap(e))) return self def _insert_loop(self, please_stop=None): @@ -68,17 +93,25 @@ class StructuredLogger_usingElasticSearch(StructuredLogger): try: messages = wrap(self.queue.pop_all()) if not messages: - Till(seconds=1).wait() + Till(seconds=PAUSE_AFTER_GOOD_INSERT).wait() continue - for g, mm in jx.groupby(messages, size=self.batch_size): + for g, mm in jx.chunk(messages, size=self.batch_size): scrubbed = [] for i, message in enumerate(mm): if message is THREAD_STOP: please_stop.go() - return + continue try: - scrubbed.append(_deep_json_to_string(message, depth=3)) + chain = flatten_causal_chain(message.value) + scrubbed.append( + { + "value": [ + _deep_json_to_string(link, depth=3) + for link in chain + ] + } + ) except Exception as e: Log.warning("Problem adding to scrubbed list", cause=e) @@ -88,13 +121,17 @@ class StructuredLogger_usingElasticSearch(StructuredLogger): Log.warning("Problem inserting logs into ES", cause=f) bad_count += 1 if bad_count > MAX_BAD_COUNT: - Log.warning("Given up trying to write debug logs to ES index {{index}}", index=self.es.settings.index) - Till(seconds=30).wait() + Log.warning( + "Given up trying to write debug logs to ES index {{index}}", + index=self.es.settings.index, + ) + break + Till(seconds=PAUSE_AFTER_BAD_INSERT).wait() # CONTINUE TO DRAIN THIS QUEUE while not please_stop: try: - Till(seconds=1).wait() + Till(seconds=PAUSE_AFTER_GOOD_INSERT).wait() self.queue.pop_all() except Exception as e: Log.warning("Should not happen", cause=e) @@ -105,6 +142,21 @@ class StructuredLogger_usingElasticSearch(StructuredLogger): with suppress_exception: self.queue.close() + self.worker.join() + + +def flatten_causal_chain(log_item, output=None): + output = output or [] + + if is_text(log_item): + output.append({"template": log_item}) + return + + output.append(log_item) + for c in listwrap(log_item.cause): + flatten_causal_chain(c, output) + log_item.cause = None + return output def _deep_json_to_string(value, depth): @@ -113,31 +165,32 @@ def _deep_json_to_string(value, depth): :param depth: THE MAX DEPTH OF PROPERTIES, DEEPER WILL BE STRING-IFIED :return: FLATTER STRUCTURE """ - if isinstance(value, Mapping): + if is_data(value): if depth == 0: return strings.limit(value2json(value), LOG_STRING_LENGTH) return {k: _deep_json_to_string(v, depth - 1) for k, v in value.items()} - elif isinstance(value, (list, FlatList)): + elif is_sequence(value): return strings.limit(value2json(value), LOG_STRING_LENGTH) elif isinstance(value, number_types): return value - elif isinstance(value, text_type): + elif is_text(value): return strings.limit(value, LOG_STRING_LENGTH) - elif isinstance(value, binary_type): + elif is_binary(value): return strings.limit(bytes2base64(value), LOG_STRING_LENGTH) + elif isinstance(value, (date, datetime)): + return datetime2unix(value) else: return strings.limit(value2json(value), LOG_STRING_LENGTH) SCHEMA = { - "settings": {"index.number_of_shards": 2, "index.number_of_replicas": 2}, - "mappings": {"_default_": { - "dynamic_templates": [ - {"everything_else": { - "match": "*", - "mapping": {"index": False} - }} - ] - }} + "settings": {"index.number_of_shards": 6, "index.number_of_replicas": 2}, + "mappings": { + "_default_": { + "dynamic_templates": [ + {"everything_else": {"match": "*", "mapping": {"index": False}}} + ] + }, + }, } diff --git a/vendor/mo_logs/log_usingEmail.py b/vendor/mo_logs/log_usingEmail.py index 22fe483..d6bc4c1 100644 --- a/vendor/mo_logs/log_usingEmail.py +++ b/vendor/mo_logs/log_usingEmail.py @@ -5,15 +5,13 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from mo_dots import listwrap, literal_field, Data +from mo_dots import Data, listwrap, literal_field from mo_kwargs import override from mo_logs import Log from mo_logs.exceptions import ALARM, NOTE diff --git a/vendor/mo_logs/log_usingFile.py b/vendor/mo_logs/log_usingFile.py index 8f25052..6bf0305 100644 --- a/vendor/mo_logs/log_usingFile.py +++ b/vendor/mo_logs/log_usingFile.py @@ -5,14 +5,13 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals +from mo_future import is_text, is_binary import time from mo_future import allocate_lock diff --git a/vendor/mo_logs/log_usingHandler.py b/vendor/mo_logs/log_usingHandler.py new file mode 100644 index 0000000..667c424 --- /dev/null +++ b/vendor/mo_logs/log_usingHandler.py @@ -0,0 +1,107 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + + +from __future__ import absolute_import, division, unicode_literals + +import logging + +from mo_dots import unwrap +from mo_logs import Log +from mo_logs.exceptions import suppress_exception +from mo_logs.log_usingNothing import StructuredLogger +from mo_logs.log_usingThreadedStream import StructuredLogger_usingThreadedStream, time_delta_pusher + +_THREAD_STOP = None +_Queue = None +_Thread = None + + +def _late_import(): + global _THREAD_STOP + global _Queue + global _Thread + + from mo_threads import THREAD_STOP as _THREAD_STOP + from mo_threads import Queue as _Queue + from mo_threads import Thread as _Thread + + _ = _THREAD_STOP + _ = _Queue + _ = _Thread + + +# WRAP PYTHON CLASSIC logger OBJECTS +class StructuredLogger_usingHandler(StructuredLogger): + def __init__(self, settings): + if not _Thread: + _late_import() + + self.logger = logging.Logger("unique name", level=logging.INFO) + self.logger.addHandler(make_log_from_settings(settings)) + + # TURNS OUT LOGGERS ARE REALLY SLOW TOO + self.queue = _Queue("queue for classic logger", max=10000, silent=True) + self.thread = _Thread( + "pushing to classic logger", + time_delta_pusher, + appender=self.logger.info, + queue=self.queue, + interval=0.3 + ) + self.thread.parent.remove_child(self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() + self.thread.start() + + def write(self, template, params): + # http://docs.python.org/2/library/logging.html# logging.LogRecord + self.queue.add({"template": template, "params": params}) + + def stop(self): + with suppress_exception: + self.queue.add(_THREAD_STOP) # BE PATIENT, LET REST OF MESSAGE BE SENT + self.thread.join() + + with suppress_exception: + self.queue.close() + + +def make_log_from_settings(settings): + assert settings["class"] + + settings = settings.copy() + + # IMPORT MODULE FOR HANDLER + path = settings["class"].split(".") + class_name = path[-1] + path = ".".join(path[:-1]) + constructor = None + try: + temp = __import__(path, globals(), locals(), [class_name], 0) + constructor = object.__getattribute__(temp, class_name) + except Exception as e: + if settings.stream and not constructor: + # PROVIDE A DEFAULT STREAM HANLDER + constructor = StructuredLogger_usingThreadedStream + else: + Log.error("Can not find class {{class}}", {"class": path}, cause=e) + + # IF WE NEED A FILE, MAKE SURE DIRECTORY EXISTS + if settings.filename != None: + from mo_files import File + + f = File(settings.filename) + if not f.parent.exists: + f.parent.create() + + settings['class'] = None + params = unwrap(settings) + log_instance = constructor(**params) + return log_instance + diff --git a/vendor/mo_logs/log_usingLogger.py b/vendor/mo_logs/log_usingLogger.py index a0b4beb..7e06499 100644 --- a/vendor/mo_logs/log_usingLogger.py +++ b/vendor/mo_logs/log_usingLogger.py @@ -5,104 +5,22 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import logging -from mo_logs import Log -from mo_logs.exceptions import suppress_exception from mo_logs.log_usingNothing import StructuredLogger -from mo_logs.log_usingThreadedStream import StructuredLogger_usingThreadedStream, time_delta_pusher -from mo_dots import unwrap - - -_THREAD_STOP = None -_Queue = None -_Thread = None - - -def _late_import(): - global _THREAD_STOP - global _Queue - global _Thread - - from mo_threads import THREAD_STOP as _THREAD_STOP - from mo_threads import Queue as _Queue - from mo_threads import Thread as _Thread - - _ = _THREAD_STOP - _ = _Queue - _ = _Thread +from mo_logs.strings import expand_template # WRAP PYTHON CLASSIC logger OBJECTS class StructuredLogger_usingLogger(StructuredLogger): def __init__(self, settings): - if not _Thread: - _late_import() - - self.logger = logging.Logger("unique name", level=logging.INFO) - self.logger.addHandler(make_log_from_settings(settings)) - - # TURNS OUT LOGGERS ARE REALLY SLOW TOO - self.queue = _Queue("queue for classic logger", max=10000, silent=True) - self.thread = _Thread( - "pushing to classic logger", - time_delta_pusher, - appender=self.logger.info, - queue=self.queue, - interval=0.3 - ) - self.thread.parent.remove_child(self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() - self.thread.start() + self.logger = logging.getLogger(settings.name) + self.logger.setLevel(logging.INFO) def write(self, template, params): - # http://docs.python.org/2/library/logging.html# logging.LogRecord - self.queue.add({"template": template, "params": params}) - - def stop(self): - with suppress_exception: - self.queue.add(_THREAD_STOP) # BE PATIENT, LET REST OF MESSAGE BE SENT - self.thread.join() - - with suppress_exception: - self.queue.close() - - -def make_log_from_settings(settings): - assert settings["class"] - - # IMPORT MODULE FOR HANDLER - path = settings["class"].split(".") - class_name = path[-1] - path = ".".join(path[:-1]) - constructor = None - try: - temp = __import__(path, globals(), locals(), [class_name], 0) - constructor = object.__getattribute__(temp, class_name) - except Exception as e: - if settings.stream and not constructor: - # PROVIDE A DEFAULT STREAM HANLDER - constructor = StructuredLogger_usingThreadedStream - else: - Log.error("Can not find class {{class}}", {"class": path}, cause=e) - - # IF WE NEED A FILE, MAKE SURE DIRECTORY EXISTS - if settings.filename != None: - from mo_files import File - - f = File(settings.filename) - if not f.parent.exists: - f.parent.create() - - settings['class'] = None - params = unwrap(settings) - log_instance = constructor(**params) - return log_instance - + log_line = expand_template(template, params) + self.logger.info(log_line) diff --git a/vendor/mo_logs/log_usingMozLog.py b/vendor/mo_logs/log_usingMozLog.py index f4b85e4..7cc5e08 100644 --- a/vendor/mo_logs/log_usingMozLog.py +++ b/vendor/mo_logs/log_usingMozLog.py @@ -5,19 +5,18 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals +from mo_future import is_text, is_binary from decimal import Decimal from mo_dots import wrap -from mo_json import value2json, datetime2unix +from mo_json import datetime2unix, value2json from mo_kwargs import override from mo_logs import Log -from mo_logs.exceptions import ERROR, NOTE, WARNING, ALARM +from mo_logs.exceptions import ALARM, ERROR, NOTE, WARNING from mo_logs.log_usingElasticSearch import _deep_json_to_string from mo_logs.log_usingNothing import StructuredLogger diff --git a/vendor/mo_logs/log_usingMulti.py b/vendor/mo_logs/log_usingMulti.py index 84e76e8..263afcd 100644 --- a/vendor/mo_logs/log_usingMulti.py +++ b/vendor/mo_logs/log_usingMulti.py @@ -5,16 +5,14 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals from mo_logs import Log -from mo_logs.exceptions import suppress_exception +from mo_logs.exceptions import suppress_exception, Except from mo_logs.log_usingNothing import StructuredLogger @@ -28,6 +26,7 @@ class StructuredLogger_usingMulti(StructuredLogger): try: m.write(template, params) except Exception as e: + e = Except.wrap(e) bad.append(m) Log.warning("Logger {{type|quote}} failed! It will be removed.", type=m.__class__.__name__, cause=e) with suppress_exception: diff --git a/vendor/mo_logs/log_usingNothing.py b/vendor/mo_logs/log_usingNothing.py index 39973d6..9b5f576 100644 --- a/vendor/mo_logs/log_usingNothing.py +++ b/vendor/mo_logs/log_usingNothing.py @@ -5,15 +5,14 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals +from mo_future import is_text, is_binary class StructuredLogger(object): """ ABSTRACT BASE CLASS FOR JSON LOGGING diff --git a/vendor/mo_logs/log_usingQueue.py b/vendor/mo_logs/log_usingQueue.py deleted file mode 100644 index 869c218..0000000 --- a/vendor/mo_logs/log_usingQueue.py +++ /dev/null @@ -1,44 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http://mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# - - -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from mo_logs.log_usingNothing import StructuredLogger -from mo_logs.strings import expand_template -from mo_threads import Queue - - -class StructuredLogger_usingQueue(StructuredLogger): - - def __init__(self, name=None): - queue_name = "log messages to queue" - if name: - queue_name += " "+name - self.queue = Queue(queue_name) - - def write(self, template, params): - self.queue.add(expand_template(template, params)) - - def stop(self): - self.queue.close() - - def pop(self): - lines = self.queue.pop() - output = [] - for l in lines.split("\n"): - if l[19:22] == " - ": - l = l[22:] - if l.strip().startswith("File"): - continue - output.append(l) - return "\n".join(output).strip() diff --git a/vendor/mo_logs/log_usingSES.py b/vendor/mo_logs/log_usingSES.py index 1a0b84b..e55d819 100644 --- a/vendor/mo_logs/log_usingSES.py +++ b/vendor/mo_logs/log_usingSES.py @@ -5,17 +5,16 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals +from mo_future import is_text, is_binary from boto.ses import connect_to_region -from mo_dots import listwrap, unwrap, literal_field, Data +from mo_dots import Data, listwrap, literal_field, unwrap from mo_kwargs import override from mo_logs import Log, suppress_exception from mo_logs.exceptions import ALARM, NOTE diff --git a/vendor/mo_logs/log_usingStream.py b/vendor/mo_logs/log_usingStream.py index e6a282e..40c4ac8 100644 --- a/vendor/mo_logs/log_usingStream.py +++ b/vendor/mo_logs/log_usingStream.py @@ -5,19 +5,17 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import sys from mo_future import PY3, allocate_lock from mo_logs.log_usingNothing import StructuredLogger -from mo_logs.strings import expand_template +from mo_logs.strings import CR, expand_template class StructuredLogger_usingStream(StructuredLogger): @@ -36,7 +34,7 @@ class StructuredLogger_usingStream(StructuredLogger): value = expand_template(template, params) self.locker.acquire() try: - self.writer(value + "\n") + self.writer(value + CR) finally: self.locker.release() @@ -52,5 +50,6 @@ class _UTF8Encoder(object): def write(self, v): try: self.stream.write(v.encode('utf8')) + self.stream.flush() except Exception: sys.stderr.write("can not handle") diff --git a/vendor/mo_logs/log_usingThread.py b/vendor/mo_logs/log_usingThread.py index 7a0d7a1..f5dc58f 100644 --- a/vendor/mo_logs/log_usingThread.py +++ b/vendor/mo_logs/log_usingThread.py @@ -5,17 +5,15 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -from mo_logs import Log, Except, suppress_exception +from mo_logs import Except, Log, suppress_exception from mo_logs.log_usingNothing import StructuredLogger -from mo_threads import Thread, Queue, Till, THREAD_STOP +from mo_threads import Queue, THREAD_STOP, Thread, Till DEBUG = False diff --git a/vendor/mo_logs/log_usingThreadedStream.py b/vendor/mo_logs/log_usingThreadedStream.py index a644196..993f6ac 100644 --- a/vendor/mo_logs/log_usingThreadedStream.py +++ b/vendor/mo_logs/log_usingThreadedStream.py @@ -5,23 +5,22 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals +from mo_future import is_text, is_binary import sys from time import time from mo_dots import Data -from mo_future import text_type, PY3 +from mo_future import PY3, text from mo_logs import Log from mo_logs.log_usingNothing import StructuredLogger -from mo_logs.strings import expand_template -from mo_threads import Thread, THREAD_STOP, Till +from mo_logs.strings import CR, expand_template +from mo_threads import THREAD_STOP, Thread, Till DEBUG_LOGGING = False @@ -32,7 +31,7 @@ class StructuredLogger_usingThreadedStream(StructuredLogger): def __init__(self, stream): assert stream - if isinstance(stream, text_type): + if is_text(stream): name = stream stream = self.stream = eval(stream) if name.startswith("sys.") and PY3: @@ -45,7 +44,7 @@ class StructuredLogger_usingThreadedStream(StructuredLogger): from mo_threads import Queue def utf8_appender(value): - if isinstance(value, text_type): + if is_text(value): value = value.encode('utf8') self.stream.write(value) @@ -113,10 +112,10 @@ def time_delta_pusher(please_stop, appender, queue, interval): Log.warning("Trouble formatting log from {{location}}", location=location, cause=e) # SWALLOW ERROR, GOT TO KEEP RUNNING try: - appender(u"\n".join(lines) + u"\n") + appender(CR.join(lines) + CR) except Exception as e: - sys.stderr.write(str("Trouble with appender: ") + str(e.__class__.__name__) + str("\n")) + sys.stderr.write(str("Trouble with appender: ") + str(e.__class__.__name__) + str(CR)) # SWALLOW ERROR, MUST KEEP RUNNING diff --git a/vendor/mo_logs/startup.py b/vendor/mo_logs/startup.py index 4ef49dd..84df9b5 100644 --- a/vendor/mo_logs/startup.py +++ b/vendor/mo_logs/startup.py @@ -5,12 +5,10 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import argparse as _argparse import os @@ -18,10 +16,9 @@ import sys import tempfile import mo_json_config +from mo_dots import coalesce, listwrap, unwrap, wrap from mo_files import File from mo_logs import Log -from mo_dots import listwrap, wrap, unwrap, coalesce - # PARAMETERS MATCH argparse.ArgumentParser.add_argument() # https://docs.python.org/dev/library/argparse.html#the-add-argument-method @@ -44,7 +41,7 @@ class _ArgParser(_argparse.ArgumentParser): Log.error("argparse error: {{error}}", error=message) -def argparse(defs): +def argparse(defs, complain=True): parser = _ArgParser() for d in listwrap(defs): args = d.copy() @@ -52,18 +49,18 @@ def argparse(defs): args.name = None parser.add_argument(*unwrap(listwrap(name)), **args) namespace, unknown = parser.parse_known_args() - if unknown: + if unknown and complain: Log.warning("Ignoring arguments: {{unknown|json}}", unknown=unknown) output = {k: getattr(namespace, k) for k in vars(namespace)} return wrap(output) -def read_settings(filename=None, defs=None): +def read_settings(defs=None, filename=None, default_filename=None, complain=True): """ :param filename: Force load a file - :param defs: arguments you want to accept + :param defs: more arguments you want to accept (see https://docs.python.org/3/library/argparse.html#argparse.ArgumentParser.add_argument) :param default_filename: A config file from an environment variable (a fallback config file, if no other provided) - :return: + :parma complain: Complain about args mismatch """ # READ SETTINGS defs = listwrap(defs) @@ -75,14 +72,20 @@ def read_settings(filename=None, defs=None): "default": None, "required": False }) - args = argparse(defs) + args = argparse(defs, complain) - args.filename = coalesce(filename, args.filename, "./config.json") + args.filename = coalesce( + filename, + args.filename if args.filename.endswith(".json") else None, + default_filename, + "./config.json" + ) settings_file = File(args.filename) - if not settings_file.exists: - Log.error("Can not read configuration file {{filename}}", { - "filename": settings_file.abspath - }) + if settings_file.exists: + Log.note("Using {{filename}} for configuration", filename=settings_file.abspath) + else: + Log.error("Can not read configuration file {{filename}}", filename=settings_file.abspath) + settings = mo_json_config.get_file(settings_file) settings.args = args return settings @@ -103,7 +106,7 @@ class SingleInstance: with SingleInstance(settings.args.filename): - This option is very useful if you have scripts executed by crontab at small amounts of time. + This option is very useful if you have scripts executed by crontab at small intervals, causing multiple instances Remember that this works by creating a lock file with a filename based on the full path to the script file. """ diff --git a/vendor/mo_logs/strings.py b/vendor/mo_logs/strings.py index e724510..765423c 100644 --- a/vendor/mo_logs/strings.py +++ b/vendor/mo_logs/strings.py @@ -5,30 +5,26 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import cgi import json as _json import math import re import string -from collections import Mapping -from datetime import datetime as builtin_datetime -from datetime import timedelta, date +from datetime import date, datetime as builtin_datetime, timedelta from json.encoder import encode_basestring -from mo_dots import coalesce, wrap, get_module, Data -from mo_future import text_type, xrange, binary_type, round as _round, get_function_name, zip_longest, transpose, PY3 -from mo_logs.convert import datetime2unix, datetime2string, value2json, milli2datetime, unix2datetime - -# from mo_files.url import value2url_param +from mo_dots import Data, coalesce, get_module, is_data, is_list, wrap, is_sequence, NullType +from mo_future import PY3, get_function_name, is_text, round as _round, text, transpose, xrange, zip_longest, \ + binary_type, Mapping +from mo_logs.convert import datetime2string, datetime2unix, milli2datetime, unix2datetime, value2json FORMATTERS = {} +CR = text("\n") _json_encoder = None _Log = None @@ -48,7 +44,11 @@ def _late_import(): _json_encoder = lambda value, pretty: _json.dumps(value) from mo_logs import Log as _Log from mo_logs.exceptions import Except as _Except - from mo_times.durations import Duration as _Duration + try: + from mo_times.durations import Duration as _Duration + except Exception as e: + _Duration = NullType + _Log.warning("It would be nice to pip install mo-times", cause=e) _ = _json_encoder _ = _Log @@ -60,7 +60,7 @@ def formatter(func): """ register formatters """ - FORMATTERS[get_function_name(func)]=func + FORMATTERS[get_function_name(func)] = func return func @@ -78,8 +78,13 @@ def datetime(value): else: value = milli2datetime(value) - return datetime2string(value, "%Y-%m-%d %H:%M:%S") - + output = datetime2string(value, "%Y-%m-%d %H:%M:%S.%f") + if output.endswith(".000000"): + return output[:-7] + elif output.endswith("000"): + return output[:-3] + else: + return output @formatter def unicode(value): @@ -90,7 +95,7 @@ def unicode(value): """ if value == None: return "" - return text_type(value) + return text(value) @formatter @@ -157,7 +162,7 @@ def newline(value): """ ADD NEWLINE, IF SOMETHING """ - return "\n" + toString(value).lstrip("\n") + return CR + toString(value).lstrip(CR) @formatter @@ -179,7 +184,7 @@ def json(value, pretty=True): :param pretty: :return: """ - if not _Duration: + if _Duration is None: _late_import() return _json_encoder(value, pretty=pretty) @@ -191,15 +196,15 @@ def tab(value): :param value: :return: """ - if isinstance(value, Mapping): + if is_data(value): h, d = transpose(*wrap(value).leaves()) return ( "\t".join(map(value2json, h)) + - "\n" + + CR + "\t".join(map(value2json, d)) ) else: - text_type(value) + text(value) @formatter @@ -219,9 +224,9 @@ def indent(value, prefix=u"\t", indent=None): content = value.rstrip() suffix = value[len(content):] lines = content.splitlines() - return prefix + (u"\n" + prefix).join(lines) + suffix + return prefix + (CR + prefix).join(lines) + suffix except Exception as e: - raise Exception(u"Problem with indent of value (" + e.message + u")\n" + text_type(toString(value))) + raise Exception(u"Problem with indent of value (" + e.message + u")\n" + text(toString(value))) @formatter @@ -238,7 +243,7 @@ def outdent(value): trim = len(l.lstrip()) if trim > 0: num = min(num, len(l) - len(l.lstrip())) - return u"\n".join([l[num:] for l in lines]) + return CR.join([l[num:] for l in lines]) except Exception as e: if not _Log: _late_import() @@ -265,7 +270,7 @@ def round(value, decimal=None, digits=None, places=None): decimal = digits - left_of_decimal right_of_decimal = max(decimal, 0) - format = "{:." + text_type(right_of_decimal) + "f}" + format = "{:." + text(right_of_decimal) + "f}" return format.format(_round(value, decimal)) @@ -279,6 +284,9 @@ def percent(value, decimal=None, digits=None, places=None): :param places: :return: """ + if value == None: + return "" + value = float(value) if value == 0.0: return "0%" @@ -290,7 +298,7 @@ def percent(value, decimal=None, digits=None, places=None): decimal = coalesce(decimal, 0) right_of_decimal = max(decimal, 0) - format = "{:." + text_type(right_of_decimal) + "%}" + format = "{:." + text(right_of_decimal) + "%}" return format.format(_round(value, decimal + 2)) @@ -304,7 +312,7 @@ def find(value, find, start=0): :return: If NOT found, return the length of `value` string """ l = len(value) - if isinstance(find, list): + if is_list(find): m = l for f in find: i = value.find(f, start) @@ -354,7 +362,7 @@ def trim(value): @formatter -def between(value, prefix, suffix, start=0): +def between(value, prefix=None, suffix=None, start=0): """ Return first substring between `prefix` and `suffix` :param value: @@ -376,9 +384,12 @@ def between(value, prefix, suffix, start=0): return None s += len(prefix) - e = value.find(suffix, s) - if e == -1: - return None + if suffix is None: + e = len(value) + else: + e = value.find(suffix, s) + if e == -1: + return None s = value.rfind(prefix, start, e) + len(prefix) # WE KNOW THIS EXISTS, BUT THERE MAY BE A RIGHT-MORE ONE @@ -408,7 +419,7 @@ def right_align(value, length): if length <= 0: return u"" - value = text_type(value) + value = text(value) if len(value) < length: return (" " * (length - len(value))) + value @@ -421,7 +432,7 @@ def left_align(value, length): if length <= 0: return u"" - value = text_type(value) + value = text(value) if len(value) < length: return value + (" " * (length - len(value))) @@ -453,7 +464,7 @@ def comma(value): else: output = "{:,}".format(float(value)) except Exception: - output = text_type(value) + output = text(value) return output @@ -467,7 +478,7 @@ def quote(value): """ if value == None: output = "" - elif isinstance(value, text_type): + elif is_text(value): output = encode_basestring(value) else: output = _json.dumps(value) @@ -489,8 +500,12 @@ _SNIP = "......" @formatter def limit(value, length): + """ + LIMIT THE STRING value TO GIVEN LENGTH, CHOPPING OUT THE MIDDLE IF REQUIRED + """ + if value == None: + return None try: - # LIMIT THE STRING value TO GIVEN LENGTH, CHOPPING OUT THE MIDDLE IF REQUIRED if len(value) <= length: return value elif length < len(_SNIP) * 2: @@ -500,12 +515,12 @@ def limit(value, length): rhs = length - len(_SNIP) - lhs return value[:lhs] + _SNIP + value[-rhs:] except Exception as e: - if not _Duration: + if _Duration is None: _late_import() _Log.error("Not expected", cause=e) @formatter -def split(value, sep="\n"): +def split(value, sep=CR): # GENERATOR VERSION OF split() # SOMETHING TERRIBLE HAPPENS, SOMETIMES, IN PYPY s = 0 @@ -526,14 +541,17 @@ THE REST OF THIS FILE IS TEMPLATE EXPANSION CODE USED BY mo-logs def expand_template(template, value): """ :param template: A UNICODE STRING WITH VARIABLE NAMES IN MOUSTACHES `{{.}}` - :param value: Data HOLDING THE PARAMTER VALUES + :param value: Data HOLDING THE PARAMETER VALUES :return: UNICODE STRING WITH VARIABLES EXPANDED """ - value = wrap(value) - if isinstance(template, text_type): - return _simple_expand(template, (value,)) + try: + value = wrap(value) + if is_text(template): + return _simple_expand(template, (value,)) - return _expand(template, (value,)) + return _expand(template, (value,)) + except Exception as e: + return "FAIL TO EXPAND: " + template def common_prefix(*args): @@ -572,7 +590,7 @@ def deformat(value): FOR SOME REASON translate CAN NOT BE CALLED: ERROR: translate() takes exactly one argument (2 given) - File "C:\Python27\lib\string.py", line 493, in translate + File "C:\\Python27\\lib\\string.py", line 493, in translate """ output = [] for c in value: @@ -589,9 +607,11 @@ def _expand(template, seq): """ seq IS TUPLE OF OBJECTS IN PATH ORDER INTO THE DATA TREE """ - if isinstance(template, text_type): + if is_text(template): return _simple_expand(template, seq) - elif isinstance(template, Mapping): + elif is_data(template): + # EXPAND LISTS OF ITEMS USING THIS FORM + # {"from":from, "template":template, "separator":separator} template = wrap(template) assert template["from"], "Expecting template to have 'from' attribute" assert template.template, "Expecting template to have 'template' attribute" @@ -602,7 +622,7 @@ def _expand(template, seq): s = seq + (d,) output.append(_expand(template.template, s)) return coalesce(template.separator, "").join(output) - elif isinstance(template, list): + elif is_list(template): return "".join(_expand(t, seq) for t in template) else: if not _Log: @@ -626,7 +646,7 @@ def _simple_expand(template, seq): try: val = seq[-depth] if var: - if isinstance(val, (list, tuple)) and float(var) == _round(float(var), 0): + if is_sequence(val) and float(var) == _round(float(var), 0): val = val[int(var)] else: val = val[var] @@ -662,7 +682,7 @@ def _simple_expand(template, seq): def toString(val): - if not _Duration: + if _Duration is None: _late_import() if val == None: @@ -672,13 +692,13 @@ def toString(val): elif hasattr(val, "__json__"): return val.__json__() elif isinstance(val, _Duration): - return text_type(round(val.seconds, places=4)) + " seconds" + return text(round(val.seconds, places=4)) + " seconds" elif isinstance(val, timedelta): duration = val.total_seconds() - return text_type(round(duration, 3)) + " seconds" - elif isinstance(val, text_type): + return text(round(duration, 3)) + " seconds" + elif is_text(val): return val - elif isinstance(val, str): + elif isinstance(val, binary_type): try: return val.decode('utf8') except Exception as _: @@ -690,15 +710,15 @@ def toString(val): if not _Log: _late_import() - _Log.error(text_type(type(val)) + " type can not be converted to unicode", cause=e) + _Log.error(text(type(val)) + " type can not be converted to unicode", cause=e) else: try: - return text_type(val) + return text(val) except Exception as e: if not _Log: _late_import() - _Log.error(text_type(type(val)) + " type can not be converted to unicode", cause=e) + _Log.error(text(type(val)) + " type can not be converted to unicode", cause=e) def edit_distance(s1, s2): @@ -851,51 +871,10 @@ def apply_diff(text, diff, reverse=False, verify=True): return output -def unicode2utf8(value): - return value.encode('utf8') - - -def utf82unicode(value): - """ - WITH EXPLANATION FOR FAILURE - """ - try: - return value.decode("utf8") - except Exception as e: - if not _Log: - _late_import() - - if not isinstance(value, binary_type): - _Log.error("Can not convert {{type}} to unicode because it's not bytes", type= type(value).__name__) - - e = _Except.wrap(e) - for i, c in enumerate(value): - try: - c.decode("utf8") - except Exception as f: - _Log.error("Can not convert charcode {{c}} in string index {{i}}", i=i, c=ord(c), cause=[e, _Except.wrap(f)]) - - try: - latin1 = text_type(value.decode("latin1")) - _Log.error("Can not explain conversion failure, but seems to be latin1", e) - except Exception: - pass - - try: - a = text_type(value.decode("latin1")) - _Log.error("Can not explain conversion failure, but seems to be latin1", e) - except Exception: - pass - - _Log.error("Can not explain conversion failure of " + type(value).__name__ + "!", e) - - def wordify(value): return [w for w in re.split(r"[\W_]", value) if strip(w)] - - def pairwise(values): """ WITH values = [a, b, c, d, ...] diff --git a/vendor/mo_math/__init__.py b/vendor/mo_math/__init__.py index 300a94d..26a1e04 100644 --- a/vendor/mo_math/__init__.py +++ b/vendor/mo_math/__init__.py @@ -5,269 +5,255 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -import math +import base64 +from math import ( + pow as math_pow, + exp as math_exp, + log as math_log, + isnan as math_isnan, + ceil as math_ceil, + log10 as math_log10, + floor as math_floor, +) -from mo_dots import Null, coalesce -from mo_future import round as _round +from mo_dots import Null, coalesce, is_container +from mo_future import round as _round, text, __builtin__, binary_type + +""" +MATH FUNCTIONS THAT ASSUME None IMPLY *NOT APPLICABLE* RATHER THAN *MISSING* +LET "." BE SOME OPERATOR (+, -, *, etc) +a.None == None +None.a == None +.None == None +func(None, *kwargs)) == None +""" + +math_abs = __builtin__.abs -class Math(object): - """ - MATH FUNCTIONS THAT ASSUME None IMPLY *NOT APPLICABLE* RATHER THAN *MISSING* - LET "." BE SOME OPERATOR (+, -, *, etc) - a.None == None - None.a == None - .None == None - func(None, *kwargs)) == None - """ +def bayesian_add(*args): + a = args[0] + if a >= 1 or a <= 0: + from mo_logs import Log + Log.error("Only allowed values *between* zero and one") - @staticmethod - def bayesian_add(*args): - a = args[0] - if a >= 1 or a <= 0: + for b in args[1:]: + if b == None: + continue + if b >= 1 or b <= 0: from mo_logs import Log + Log.error("Only allowed values *between* zero and one") + a = a * b / (a * b + (1 - a) * (1 - b)) - for b in args[1:]: - if b == None: - continue - if b >= 1 or b <= 0: - from mo_logs import Log - Log.error("Only allowed values *between* zero and one") - a = a * b / (a * b + (1 - a) * (1 - b)) - - return a - - @staticmethod - def bayesian_subtract(a, b): - return Math.bayesian_add(a, 1 - b) + return a - @staticmethod - def abs(v): +def bayesian_subtract(a, b): + return bayesian_add(a, 1 - b) + + +def abs(v): + if v == None: + return Null + return math_abs(v) + + +def pow(n, p): + if n == None or p == None: + return None + return math_pow(n, p) + + +def exp(v): + if v == None: + return Null + return math_exp(v) + + +def log(v, base=None): + try: if v == None: return Null - return abs(v) + if v == 0.0: + return -float("inf") + if base == None: + return math_log(v) + return math_log(v, base) + except Exception as e: + from mo_logs import Log - @staticmethod - def pow(n, p): - if n == None or p == None: - return None - return math.pow(n, p) + Log.error("error in log", cause=e) - @staticmethod - def exp(v): - if v == None: - return Null - return math.exp(v) - @staticmethod - def log(v, base=None): +def log10(v): + try: + return math_log(v, 10) + except Exception as e: + return Null + + +# FOR GOODNESS SAKE - IF YOU PROVIDE A METHOD abs(), PLEASE PROVIDE ITS COMPLEMENT +# x = abs(x)*sign(x) +# FOUND IN numpy, BUT WE USUALLY DO NOT NEED TO BRING IN A BIG LIB FOR A SIMPLE DECISION + + +def sign(v): + if v == None: + return Null + if v < 0: + return -1 + if v > 0: + return +1 + return 0 + + +def is_nan(s): + return s == None or math_isnan(s) + + +def is_finite(s): + try: + f = float(s) + if math_abs(f) == float("+inf"): + return False + return True + except Exception: + return False + + +def is_hex(value): + try: + int(value, 16) + return True + except Exception: + return False + + +def is_integer(s): + if s is True or s is False: + return False + + try: + if float(s) == round(float(s), 0): + return True + return False + except Exception: + return False + + +def round(value, decimal=0, digits=None): + """ + ROUND TO GIVEN NUMBER OF DIGITS, OR GIVEN NUMBER OF DECIMAL PLACES + decimal - NUMBER OF DIGITS AFTER DECIMAL POINT (NEGATIVE IS VALID) + digits - NUMBER OF SIGNIFICANT DIGITS (LESS THAN 1 IS INVALID) + """ + if value == None: + return None + elif value == 0: + return 0 + else: + value = float(value) + + if digits != None: try: - if v == None: - return Null - if v == 0.0: - return -float("inf") - if base == None: - return math.log(v) - return math.log(v, base) + if digits <= 0: + return sign(value) * pow(10, round(math_log10(abs(value)), 0)) + m = pow(10, math_ceil(math_log10(abs(value)))) + return _round(value / m, 0) * m except Exception as e: from mo_logs import Log - Log.error("error in log") + + Log.error("not expected", e) + elif decimal <= 0: + return int(_round(value, decimal)) + else: + return _round(value, decimal) - @staticmethod - def log10(v): - try: - return math.log(v, 10) - except Exception as e: - return Null - - # FOR GOODNESS SAKE - IF YOU PROVIDE A METHOD abs(), PLEASE PROVIDE ITS COMPLEMENT - # x = abs(x)*sign(x) - # FOUND IN numpy, BUT WE USUALLY DO NOT NEED TO BRING IN A BIG LIB FOR A SIMPLE DECISION - @staticmethod - def sign(v): - if v == None: - return Null - if v < 0: - return -1 - if v > 0: - return +1 - return 0 +def floor(value, mod=1): + """ + x == floor(x, a) + mod(x, a) FOR ALL a, x + RETURN None WHEN GIVEN INVALID ARGUMENTS + """ + if value == None: + return None + elif mod <= 0: + return None + elif mod == 1: + return int(math_floor(value)) + elif is_integer(mod): + return int(math_floor(value / mod)) * mod + else: + return math_floor(value / mod) * mod - @staticmethod - def is_number(s): - if s is True or s is False or s == None: - return False - - try: - s = float(s) - return not math.isnan(s) - except Exception: - return False - - @staticmethod - def is_nan(s): - return s==None or math.isnan(s) - - @staticmethod - def is_finite(s): - try: - f = float(s) - if abs(f) == float("+inf"): - return False - return True - except Exception: - return False - - @staticmethod - def is_hex(value): - try: - int(value, 16) - return True - except Exception: - return False - - @staticmethod - def is_integer(s): - if s is True or s is False: - return False - - try: - if float(s) == round(float(s), 0): - return True - return False - except Exception: - return False - - @staticmethod - def round(value, decimal=7, digits=None): - """ - ROUND TO GIVEN NUMBER OF DIGITS, OR GIVEN NUMBER OF DECIMAL PLACES - decimal - NUMBER OF DIGITS AFTER DECIMAL POINT (NEGATIVE IS VALID) - digits - NUMBER OF SIGNIFICANT DIGITS (LESS THAN 1 IS INVALID) - """ - if value == None: - return None - else: - value = float(value) - - if digits != None: - if digits <= 0: - if value == 0: - return int(_round(value, digits)) - try: - m = pow(10, math.ceil(math.log10(abs(value)))) - return int(_round(value / m, digits) * m) - except Exception as e: - from mo_logs import Log - - Log.error("not expected", e) - else: - if value == 0: - return _round(value, digits) - try: - m = pow(10, math.ceil(math.log10(abs(value)))) - return _round(value / m, digits) * m - except Exception as e: - from mo_logs import Log - Log.error("not expected", e) - elif decimal <= 0: - return int(_round(value, decimal)) - else: - return _round(value, decimal) +def mod(value, mod=1): + """ + RETURN NON-NEGATIVE MODULO + RETURN None WHEN GIVEN INVALID ARGUMENTS + """ + if value == None: + return None + elif mod <= 0: + return None + elif value < 0: + return (value % mod + mod) % mod + else: + return value % mod - @staticmethod - def floor(value, mod=1): - """ - x == Math.floor(x, a) + Math.mod(x, a) FOR ALL a, x - RETURN None WHEN GIVEN INVALID ARGUMENTS - """ - if value == None: - return None - elif mod <= 0: - return None - elif mod == 1: - return int(math.floor(value)) - elif Math.is_integer(mod): - return int(math.floor(value/mod))*mod - else: - return math.floor(value/mod)*mod +# RETURN A VALUE CLOSE TO value, BUT WITH SHORTER len(text(value)) 0: from mo_logs import Log + Log.error("no longer accepting args, use a single list") output = Null for v in values: if v == None: continue - if isinstance(v, float) and math.isnan(v): + if isinstance(v, float) and math_isnan(v): continue if output == None: output = v @@ -355,6 +343,7 @@ def PRODUCT(values, *others): def AND(values, *others): if len(others) > 0: from mo_logs import Log + Log.error("no longer accepting args, use a single list") for v in values: @@ -368,6 +357,7 @@ def AND(values, *others): def OR(values, *others): if len(others) > 0: from mo_logs import Log + Log.error("no longer accepting args, use a single list") for v in values: @@ -381,13 +371,14 @@ def OR(values, *others): def UNION(values, *others): if len(others) > 0: from mo_logs import Log + Log.error("no longer accepting args, use a single list") output = set() for v in values: if values == None: continue - if isinstance(v, (list, set)): + if is_container(v): output.update(v) continue else: @@ -395,9 +386,21 @@ def UNION(values, *others): return output +def is_number(s): + if s is True or s is False or s == None: + return False + + try: + s = float(s) + return not math_isnan(s) + except Exception: + return False + + def INTERSECT(values, *others): if len(others) > 0: from mo_logs import Log + Log.error("no longer accepting args, use a single list") output = set(values[0]) @@ -418,14 +421,47 @@ def almost_equal(first, second, digits=None, places=None, delta=None): return True else: places = coalesce(places, digits, 18) - diff = math.log10(abs(first - second)) - if diff < Math.ceiling(math.log10(first)) - places: + diff = math_log10(abs(first - second)) + if diff < ceiling(math_log10(first)) - places: return True return False except Exception as e: from mo_logs import Log + Log.error("problem comparing", cause=e) + +def bytes2base64(value): + if isinstance(value, bytearray): + value = binary_type(value) + return base64.b64encode(value).decode("latin1") + + +def bytes2base64URL(value): + """ + RETURN URL-FRIENDLY VERSION OF BASE64 + """ + if isinstance(value, bytearray): + value = binary_type(value) + return base64.b64encode(value, b"-_").rstrip(b"=").decode("latin1") + + +def base642bytes(value): + if value == None: + return b"" + else: + return base64.b64decode(value) + + +def int2base64(value): + return bytes2base64(value.to_bytes((value.bit_length() + 7) // 8, byteorder="big")) + + +def base642int(value): + return int.from_bytes(base642bytes(value), byteorder="big") + + from mo_math import stats + _ = stats diff --git a/vendor/mo_math/aes_crypto.py b/vendor/mo_math/aes_crypto.py new file mode 100644 index 0000000..bd5a95c --- /dev/null +++ b/vendor/mo_math/aes_crypto.py @@ -0,0 +1,107 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. +# + +from __future__ import absolute_import, division, unicode_literals + +from mo_dots import Data, get_module +from mo_future import PY2, binary_type +from mo_future import is_text, is_binary +from mo_logs import Log +from mo_math import base642bytes, crypto, bytes2base64 +from mo_math.vendor.aespython import aes_cipher, cbc_mode, key_expander + +DEBUG = False + + +def encrypt(text, _key, salt=None): + """ + RETURN {"salt":s, "length":l, "data":d} -> JSON -> UTF8 + """ + + if is_text(text): + encoding = "utf8" + data = bytearray(text.encode("utf8")) + elif is_binary(text): + encoding = None + if PY2: + data = bytearray(text) + else: + data = text + + if _key is None: + Log.error("Expecting a key") + if is_binary(_key): + _key = bytearray(_key) + if salt is None: + salt = crypto.bytes(16) + + # Initialize encryption using key and iv + key_expander_256 = key_expander.KeyExpander(256) + expanded_key = key_expander_256.expand(_key) + aes_cipher_256 = aes_cipher.AESCipher(expanded_key) + aes_cbc_256 = cbc_mode.CBCMode(aes_cipher_256, 16) + aes_cbc_256.set_iv(salt) + + output = Data() + output.type = "AES256" + output.salt = bytes2base64(salt) + output.length = len(data) + output.encoding = encoding + + encrypted = bytearray() + for _, d in _groupby16(data): + encrypted.extend(aes_cbc_256.encrypt_block(d)) + output.data = bytes2base64(encrypted) + json = get_module("mo_json").value2json(output, pretty=True).encode("utf8") + + if DEBUG: + test = decrypt(json, _key) + if test != text: + Log.error("problem with encryption") + + return json + + +def decrypt(data, _key): + """ + ACCEPT BYTES -> UTF8 -> JSON -> {"salt":s, "length":l, "data":d} + """ + # Key and iv have not been generated or provided, bail out + if _key is None: + Log.error("Expecting a key") + + _input = get_module("mo_json").json2value( + data.decode("utf8"), leaves=False, flexible=False + ) + + # Initialize encryption using key and iv + key_expander_256 = key_expander.KeyExpander(256) + expanded_key = key_expander_256.expand(_key) + aes_cipher_256 = aes_cipher.AESCipher(expanded_key) + aes_cbc_256 = cbc_mode.CBCMode(aes_cipher_256, 16) + aes_cbc_256.set_iv(base642bytes(_input.salt)) + + raw = base642bytes(_input.data) + out_data = bytearray() + for _, e in _groupby16(raw): + out_data.extend(aes_cbc_256.decrypt_block(e)) + + if _input.encoding: + return binary_type(out_data[: _input.length :]).decode(_input.encoding) + else: + return binary_type(out_data[: _input.length :]) + + +def _groupby16(bytes): + count = 0 + index = 0 + length = len(bytes) + while index < length: + yield count, bytes[index : index + 16] + count += 1 + index += 16 diff --git a/vendor/mo_math/crypto.py b/vendor/mo_math/crypto.py index 8371096..4c267d2 100644 --- a/vendor/mo_math/crypto.py +++ b/vendor/mo_math/crypto.py @@ -5,120 +5,11 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -import base64 - -from mo_dots import Data, get_module -from mo_future import text_type, binary_type, PY2 -from mo_logs import Log -from mo_math.randoms import Random -from mo_math.vendor.aespython import key_expander, aes_cipher, cbc_mode - -DEBUG = False +import secrets -def encrypt(text, _key, salt=None): - """ - RETURN {"salt":s, "length":l, "data":d} -> JSON -> UTF8 - """ - - if isinstance(text, text_type): - encoding = 'utf8' - data = bytearray(text.encode("utf8")) - elif isinstance(text, binary_type): - encoding = None - if PY2: - data = bytearray(text) - else: - data = text - - if _key is None: - Log.error("Expecting a key") - if isinstance(_key, binary_type): - _key = bytearray(_key) - if salt is None: - salt = Random.bytes(16) - - # Initialize encryption using key and iv - key_expander_256 = key_expander.KeyExpander(256) - expanded_key = key_expander_256.expand(_key) - aes_cipher_256 = aes_cipher.AESCipher(expanded_key) - aes_cbc_256 = cbc_mode.CBCMode(aes_cipher_256, 16) - aes_cbc_256.set_iv(salt) - - output = Data() - output.type = "AES256" - output.salt = bytes2base64(salt) - output.length = len(data) - output.encoding = encoding - - encrypted = bytearray() - for _, d in _groupby16(data): - encrypted.extend(aes_cbc_256.encrypt_block(d)) - output.data = bytes2base64(encrypted) - json = get_module("mo_json").value2json(output, pretty=True).encode('utf8') - - if DEBUG: - test = decrypt(json, _key) - if test != text: - Log.error("problem with encryption") - - return json - - -def decrypt(data, _key): - """ - ACCEPT BYTES -> UTF8 -> JSON -> {"salt":s, "length":l, "data":d} - """ - # Key and iv have not been generated or provided, bail out - if _key is None: - Log.error("Expecting a key") - - _input = get_module("mo_json").json2value(data.decode('utf8'), leaves=False, flexible=False) - - # Initialize encryption using key and iv - key_expander_256 = key_expander.KeyExpander(256) - expanded_key = key_expander_256.expand(_key) - aes_cipher_256 = aes_cipher.AESCipher(expanded_key) - aes_cbc_256 = cbc_mode.CBCMode(aes_cipher_256, 16) - aes_cbc_256.set_iv(base642bytearray(_input.salt)) - - raw = base642bytearray(_input.data) - out_data = bytearray() - for _, e in _groupby16(raw): - out_data.extend(aes_cbc_256.decrypt_block(e)) - - if _input.encoding: - return binary_type(out_data[:_input.length:]).decode(_input.encoding) - else: - return binary_type(out_data[:_input.length:]) - - - -def bytes2base64(value): - if isinstance(value, bytearray): - value = binary_type(value) - return base64.b64encode(value).decode("utf8") - - -def base642bytearray(value): - if value == None: - return bytearray(b"") - else: - return bytearray(base64.b64decode(value)) - - -def _groupby16(bytes): - count = 0 - index = 0 - length = len(bytes) - while index < length: - yield count, bytes[index: index + 16] - count += 1 - index += 16 +def bytes(count): + return secrets.token_bytes(count) diff --git a/vendor/mo_math/hashes.py b/vendor/mo_math/hashes.py new file mode 100644 index 0000000..a2625fb --- /dev/null +++ b/vendor/mo_math/hashes.py @@ -0,0 +1,20 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# + +from __future__ import absolute_import, division, unicode_literals + +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives.hashes import SHA256, Hash + + +def sha256(bytes): + digest = Hash(SHA256(), backend=default_backend()) + digest.update(bytes) + return digest.finalize() diff --git a/vendor/mo_math/randoms.py b/vendor/mo_math/randoms.py index 4fe95aa..7ea689d 100644 --- a/vendor/mo_math/randoms.py +++ b/vendor/mo_math/randoms.py @@ -1,9 +1,7 @@ # encoding: utf-8 # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import random import string @@ -19,18 +17,18 @@ class Random(object): @staticmethod def string(length, alphabet=SIMPLE_ALPHABET): - result = '' + result = "" for i in range(length): result += SEED.choice(alphabet) return result @staticmethod def hex(length): - return Random.string(length, string.digits + 'ABCDEF') + return Random.string(length, string.digits + "ABCDEF") @staticmethod - def base64(length): - return Random.string(length, SIMPLE_ALPHABET + '+/') + def base64(length, extra="+/"): + return Random.string(length, SIMPLE_ALPHABET + extra) @staticmethod def int(*args): @@ -43,7 +41,7 @@ class Random(object): @staticmethod def float(*args): if args: - return SEED.random()*args[0] + return SEED.random() * args[0] else: return SEED.random() @@ -58,7 +56,7 @@ class Random(object): data = list(data) num = len(data) for i in range(num): - n = Random.int(num-i) + n = Random.int(num - i) output.append(data[n]) del data[n] return output diff --git a/vendor/mo_math/rsa_crypto.py b/vendor/mo_math/rsa_crypto.py new file mode 100644 index 0000000..67b1e3a --- /dev/null +++ b/vendor/mo_math/rsa_crypto.py @@ -0,0 +1,77 @@ +# encoding: utf-8 +# +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. +# + +from __future__ import absolute_import, division, unicode_literals + +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives import hashes +from cryptography.hazmat.primitives.asymmetric import padding +from cryptography.hazmat.primitives.asymmetric import rsa +from cryptography.hazmat.primitives.asymmetric.rsa import RSAPublicNumbers + +from mo_dots import Data, wrap +from mo_json import value2json, json2value +from mo_math import bytes2base64, base642bytes, int2base64, base642int + + +SHA256 = hashes.SHA256() +PSS = padding.PSS( + mgf=padding.MGF1(SHA256), salt_length=padding.PSS.MAX_LENGTH +) +PADDING = { + "PSS": PSS +} +ALGORITHM = { + "SHA256": SHA256 +} + +BACKEND = default_backend() + + +def generate_key(bits=512): + private_key = rsa.generate_private_key( + public_exponent=65537, + key_size=bits, + backend=BACKEND + ) + nums = private_key.public_key().public_numbers() + public_key = Data(e=nums.e, n=int2base64(nums.n)) + return public_key, private_key + + +def sign(message, private_key): + data = value2json(message).encode("utf8") + + # SIGN DATA/STRING + signature = private_key.sign(data=data, padding=PSS, algorithm=SHA256) + + return wrap({ + "data": bytes2base64(data), + "signature": bytes2base64(signature), + "padding": "PSS", + "algorithm=": "SHA256" + }) + + +def verify(signed, public_key): + data = base642bytes(signed.data) + signature = base642bytes(signed.signature) + + key = RSAPublicNumbers( + public_key.e, + base642int(public_key.n) + ).public_key(BACKEND) + + key.verify( + signature=signature, + data=data, + padding=PADDING.get(signed.padding, PSS), + algorithm=ALGORITHM.get(signed.algorithm, SHA256), + ) + + return json2value(data.decode("utf8")) diff --git a/vendor/mo_math/stats.py b/vendor/mo_math/stats.py index b601d50..b58a09c 100644 --- a/vendor/mo_math/stats.py +++ b/vendor/mo_math/stats.py @@ -5,26 +5,23 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import math import sys from math import sqrt -from mo_future import text_type -from mo_dots import coalesce, Data, Null +from mo_dots import Data, Null, coalesce +from mo_future import text, zip_longest from mo_logs import Log -from mo_math import OR -from mo_math import almost_equal +from mo_math import OR, almost_equal from mo_math.vendor import strangman DEBUG = True -DEBUG_STRANGMAN = True +DEBUG_STRANGMAN = False EPSILON = 0.000000001 ABS_EPSILON = sys.float_info.min * 2 # *2 FOR SAFETY @@ -39,21 +36,17 @@ if DEBUG_STRANGMAN: def chisquare(f_obs, f_exp): try: - py_result = strangman.stats.chisquare( - f_obs, - f_exp - ) + py_result = strangman.stats.chisquare(f_obs, f_exp) except Exception as e: Log.error("problem with call", e) if DEBUG_STRANGMAN: from mo_testing.fuzzytestcase import assertAlmostEqualValue - sp_result = scipy.stats.chisquare( - np.array(f_obs), - f_exp=np.array(f_exp) - ) - if not assertAlmostEqualValue(sp_result[0], py_result[0], digits=9) and assertAlmostEqualValue(sp_result[1], py_result[1], delta=1e-8): + sp_result = scipy.stats.chisquare(np.array(f_obs), f_exp=np.array(f_exp)) + if not assertAlmostEqualValue( + sp_result[0], py_result[0], digits=9 + ) and assertAlmostEqualValue(sp_result[1], py_result[1], delta=1e-8): Log.error("problem with stats lib") return py_result @@ -62,14 +55,20 @@ def chisquare(f_obs, f_exp): def Stats2ZeroMoment(stats): # MODIFIED FROM http://statsmodels.sourceforge.net/devel/_modules/statsmodels/stats/moment_helpers.html # ADDED count - mc0, mc1, mc2, skew, kurt = stats.count, coalesce(stats.mean, 0), coalesce(stats.variance, 0), coalesce(stats.skew, 0), coalesce(stats.kurtosis, 0) + mc0, mc1, mc2, skew, kurt = ( + stats.count, + coalesce(stats.mean, 0), + coalesce(stats.variance, 0), + coalesce(stats.skew, 0), + coalesce(stats.kurtosis, 0), + ) mz0 = mc0 mz1 = mc1 * mc0 mz2 = (mc2 + mc1 * mc1) * mc0 - mc3 = coalesce(skew, 0) * (mc2 ** 1.5) # 3rd central moment + mc3 = coalesce(skew, 0) * (mc2 ** 1.5) # 3rd central moment mz3 = (mc3 + 3 * mc1 * mc2 + mc1 ** 3) * mc0 # 3rd non-central moment - mc4 = (coalesce(kurt, 0) + 3.0) * (mc2 ** 2.0) # 4th central moment + mc4 = (coalesce(kurt, 0) + 3.0) * (mc2 ** 2.0) # 4th central moment mz4 = (mc4 + 4 * mc1 * mc3 + 6 * mc1 * mc1 * mc2 + mc1 ** 4) * mc0 m = ZeroMoment(mz0, mz1, mz2, mz3, mz4) @@ -93,6 +92,8 @@ def Stats2ZeroMoment(stats): def ZeroMoment2Stats(z_moment): Z = z_moment.S + if not Z: + return Stats() N = Z[0] if N == 0: return Stats() @@ -112,19 +113,13 @@ def ZeroMoment2Stats(z_moment): skew = None kurtosis = None else: - variance = (Z2 - mean * mean) - mc3 = (Z3 - (3 * mean * variance + mean ** 3)) # 3rd central moment - mc4 = (Z4 - (4 * mean * mc3 + 6 * mean * mean * variance + mean ** 4)) + variance = Z2 - mean * mean + mc3 = Z3 - (3 * mean * variance + mean ** 3) # 3rd central moment + mc4 = Z4 - (4 * mean * mc3 + 6 * mean * mean * variance + mean ** 4) skew = mc3 / (variance ** 1.5) kurtosis = (mc4 / (variance ** 2.0)) - 3.0 - stats = Stats( - count=N, - mean=mean, - variance=variance, - skew=skew, - kurtosis=kurtosis - ) + stats = Stats(count=N, mean=mean, variance=variance, skew=skew, kurtosis=kurtosis) if DEBUG: from mo_testing.fuzzytestcase import assertAlmostEqualValue @@ -136,11 +131,12 @@ def ZeroMoment2Stats(z_moment): for i in range(5): assertAlmostEqualValue(v.S[i], Z[i], places=7) except Exception as e: - Log.error("Conversion failed. Programmer error:\nfrom={{from|indent}},\nresult stats={{stats|indent}},\nexpected param={{expected|indent}}", + Log.error( + "Conversion failed. Programmer error:\nfrom={{from|indent}},\nresult stats={{stats|indent}},\nexpected param={{expected|indent}}", {"from": Z}, stats=stats, expected=v.S, - cause=e + cause=e, ) globals()["DEBUG"] = True @@ -187,19 +183,25 @@ class Stats(Data): elif "skew" not in kwargs: self.count = kwargs["count"] self.mean = kwargs["mean"] - self.variance = kwargs["variance"] if "variance" in kwargs else kwargs["std"] ** 2 + self.variance = ( + kwargs["variance"] if "variance" in kwargs else kwargs["std"] ** 2 + ) self.skew = None self.kurtosis = None elif "kurtosis" not in kwargs: self.count = kwargs["count"] self.mean = kwargs["mean"] - self.variance = kwargs["variance"] if "variance" in kwargs else kwargs["std"] ** 2 + self.variance = ( + kwargs["variance"] if "variance" in kwargs else kwargs["std"] ** 2 + ) self.skew = kwargs["skew"] self.kurtosis = None else: self.count = kwargs["count"] self.mean = kwargs["mean"] - self.variance = kwargs["variance"] if "variance" in kwargs else kwargs["std"] ** 2 + self.variance = ( + kwargs["variance"] if "variance" in kwargs else kwargs["std"] ** 2 + ) self.skew = kwargs["skew"] self.kurtosis = kwargs["kurtosis"] @@ -214,75 +216,66 @@ class ZeroMoment(object): """ def __init__(self, *args): - self.S = tuple(args) + self.S = args def __add__(self, other): + output = ZeroMoment() + output += other + return output + + def __iadd__(self, other): if isinstance(other, ZeroMoment): - return ZeroMoment(*map(add, self.S, other.S)) + return ZeroMoment(*array_add(self.S, other.S)) elif hasattr(other, "__iter__"): - return ZeroMoment(*map(add, self.S, ZeroMoment.new_instance(other))) + return ZeroMoment(*array_add(self.S, ZeroMoment.new_instance(other))) elif other == None: return self else: - return ZeroMoment(*map(add, self.S, ( - 1, - other, - pow(other, 2), - pow(other, 3), - pow(other, 4), - pow(other, 2) - ))) - - - def __sub__(self, other): - if isinstance(other, ZeroMoment): - return ZeroMoment(*map(sub, self.S, other.S)) - elif hasattr(other, "__iter__"): - return ZeroMoment(*map(sub, self.S, ZeroMoment.new_instance(other))) - elif other == None: - return self - else: - return ZeroMoment(*map(sub, self.S, ( - 1, - other, - pow(other, 2), - pow(other, 3), - pow(other, 4) - ))) + return ZeroMoment( + *array_add( + self.S, + ( + 1, + other, + pow(other, 2), + pow(other, 3), + pow(other, 4), + pow(other, 2), + ), + ) + ) @property def tuple(self): - # RETURN AS ORDERED TUPLE + # RETURN AS ORDERED TUPLE return self.S - @property - def dict(self): - # RETURN HASH OF SUMS - return {u"s" + text_type(i): m for i, m in enumerate(self.S)} - + def __data__(self): + # RETURN HASH OF SUMS + return {"s" + text(i): m for i, m in enumerate(self.S)} @staticmethod def new_instance(values=None): if values == None: return ZeroMoment() - vals = [v for v in values if v != None] + vals = tuple(values) return ZeroMoment( len(vals), sum(vals), sum([pow(n, 2) for n in vals]), sum([pow(n, 3) for n in vals]), - sum([pow(n, 4) for n in vals]) + sum([pow(n, 4) for n in vals]), ) @property - def stats(self, *args, **kwargs): - return ZeroMoment2Stats(self, *args, **kwargs) + def stats(self): + return ZeroMoment2Stats(self) -def add(a, b): - return coalesce(a, 0) + coalesce(b, 0) +def array_add(A, B): + return tuple(coalesce(a, 0) + coalesce(b, 0) for a, b in zip_longest(A, B)) def sub(a, b): @@ -291,7 +284,7 @@ def sub(a, b): def ZeroMoment2dict(z): # RETURN HASH OF SUMS - return {u"s" + text_type(i): m for i, m in enumerate(z.S)} + return {"s" + text(i): m for i, m in enumerate(z.S)} def median(values, simple=True, mean_weight=0.0): @@ -345,11 +338,13 @@ def median(values, simple=True, mean_weight=0.0): return (_median - 0.5) + (middle - start_index) / num_middle else: if num_middle == 1: - return (1 - mean_weight) * _median + mean_weight * (_sorted[middle - 1] + _sorted[middle + 1]) / 2 + return (1 - mean_weight) * _median + mean_weight * ( + _sorted[middle - 1] + _sorted[middle + 1] + ) / 2 else: return (_median - 0.5) + (middle + 0.5 - start_index) / num_middle except Exception as e: - Log.error("problem with median of {{values}}", values= values, cause=e) + Log.error("problem with median of {{values}}", values=values, cause=e) def percentile(values, percent): diff --git a/vendor/mo_math/vendor/aespython/mode_test.py b/vendor/mo_math/vendor/aespython/mode_test.py index e874d17..8785673 100644 --- a/vendor/mo_math/vendor/aespython/mode_test.py +++ b/vendor/mo_math/vendor/aespython/mode_test.py @@ -3,10 +3,12 @@ Cipher Mode of operation Abstract encryption mode test harness. """ -from .key_expander import KeyExpander -from .aes_cipher import AESCipher - import unittest + +from .aes_cipher import AESCipher +from .key_expander import KeyExpander + + class GeneralTestEncryptionMode(unittest.TestCase): def get_keyed_cipher(self, key): diff --git a/vendor/mo_math/vendor/strangman/pstat.py b/vendor/mo_math/vendor/strangman/pstat.py index b22235f..3ce3d43 100644 --- a/vendor/mo_math/vendor/strangman/pstat.py +++ b/vendor/mo_math/vendor/strangman/pstat.py @@ -103,8 +103,6 @@ functions/methods. Their inclusion here is for function name consistency. ## ## 11/08/98 ... fixed aput to output large arrays correctly -import string -import copy from types import * __version__ = 0.4 diff --git a/vendor/mo_math/vendor/strangman/stats.py b/vendor/mo_math/vendor/strangman/stats.py index a7f68a9..acd7b60 100644 --- a/vendor/mo_math/vendor/strangman/stats.py +++ b/vendor/mo_math/vendor/strangman/stats.py @@ -224,12 +224,13 @@ SUPPORT FUNCTIONS: writecc ## changed name of skewness and askewness to skew and askew ## fixed (a)histogram (which sometimes counted points ") +SQL_GE = SQL(" >= ") +SQL_EQ = SQL(" = ") +SQL_LT = SQL(" < ") +SQL_LE = SQL(" <= ") +SQL_DOT = SQL(".") +SQL_CR = SQL("\n") + + +class DB(object): + def quote_column(self, *path): + raise NotImplementedError() + + def db_type_to_json_type(self, type): + raise NotImplementedError() + + +def sql_list(list_): + return ConcatSQL(SQL_SPACE, JoinSQL(SQL_COMMA, list_), SQL_SPACE) + + +def sql_iso(*sql): + return ConcatSQL(*((SQL_OP,) + sql + (SQL_CP,))) + + +def sql_count(sql): + return "COUNT(" + sql + ")" + + +def sql_concat_text(list_): + """ + TEXT CONCATENATION WITH "||" + """ + return JoinSQL(SQL_CONCAT, [sql_iso(l) for l in list_]) + + +def sql_coalesce(list_): + return ConcatSQL(SQL("COALESCE("), JoinSQL(SQL_COMMA, list_), SQL_CP) diff --git a/vendor/mo_testing/fuzzytestcase.py b/vendor/mo_testing/fuzzytestcase.py index b7c8d41..f1b9b43 100644 --- a/vendor/mo_testing/fuzzytestcase.py +++ b/vendor/mo_testing/fuzzytestcase.py @@ -5,22 +5,23 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # from __future__ import unicode_literals import types import unittest -from collections import Mapping +from datetime import datetime -import mo_dots from mo_collections.unique_index import UniqueIndex -from mo_dots import coalesce, literal_field, unwrap, wrap -from mo_future import text_type -from mo_future import zip_longest -from mo_logs import Log, Except, suppress_exception -from mo_logs.strings import expand_template -from mo_math import Math +import mo_dots +from mo_dots import coalesce, is_container, is_list, literal_field, unwrap, wrap, is_data +from mo_future import is_text, zip_longest +from mo_logs import Except, Log, suppress_exception +from mo_logs.strings import expand_template, quote +import mo_math +from mo_math import is_number, log10 +from mo_times import dates class FuzzyTestCase(unittest.TestCase): @@ -61,7 +62,7 @@ class FuzzyTestCase(unittest.TestCase): function(*args, **kwargs) except Exception as e: f = Except.wrap(e) - if isinstance(problem, text_type): + if is_text(problem): if problem in f: return Log.error( @@ -82,27 +83,27 @@ def assertAlmostEqual(test, expected, digits=None, places=None, msg=None, delta= test = unwrap(test) expected = unwrap(expected) try: - if test is None and expected is None: + if test is None and (is_null(expected) or expected is None): return elif test is expected: return - elif isinstance(expected, text_type): + elif is_text(expected): assertAlmostEqualValue(test, expected, msg=msg, digits=digits, places=places, delta=delta) elif isinstance(test, UniqueIndex): if test ^ expected: Log.error("Sets do not match") - elif isinstance(expected, Mapping) and isinstance(test, Mapping): + elif is_data(expected) and is_data(test): for k, v2 in unwrap(expected).items(): v1 = test.get(k) - assertAlmostEqual(v1, v2, msg=msg, digits=digits, places=places, delta=delta) - elif isinstance(expected, Mapping): + assertAlmostEqual(v1, v2, msg=coalesce(msg, "")+"key "+quote(k)+": ", digits=digits, places=places, delta=delta) + elif is_data(expected): for k, v2 in expected.items(): - if isinstance(k, text_type): + if is_text(k): v1 = mo_dots.get_attr(test, literal_field(k)) else: v1 = test[k] assertAlmostEqual(v1, v2, msg=msg, digits=digits, places=places, delta=delta) - elif isinstance(test, (set, list)) and isinstance(expected, set): + elif is_container(test) and isinstance(expected, set): test = set(wrap(t) for t in test) if len(test) != len(expected): Log.error( @@ -124,7 +125,14 @@ def assertAlmostEqual(test, expected, digits=None, places=None, msg=None, delta= elif isinstance(expected, types.FunctionType): return expected(test) elif hasattr(test, "__iter__") and hasattr(expected, "__iter__"): - if test == None and not expected: + if test.__class__.__name__ == "ndarray": # numpy + test = test.tolist() + elif test.__class__.__name__ == "DataFrame": # pandas + test = test[test.columns[0]].values.tolist() + elif test.__class__.__name__ == "Series": # pandas + test = test.values.tolist() + + if not expected and test == None: return if expected == None: expected = [] # REPRESENT NOTHING @@ -145,27 +153,35 @@ def assertAlmostEqualValue(test, expected, digits=None, places=None, msg=None, d """ Snagged from unittest/case.py, then modified (Aug2014) """ - if expected.__class__.__name__ == "NullOp": - if test == None: + if is_null(expected): + if test == None: # pandas dataframes reject any comparision with an exception! return else: - raise AssertionError(expand_template("{{test}} != {{expected}}", locals())) + raise AssertionError(expand_template("{{test|json}} != NULL", locals())) if expected == None: # None has no expectations return if test == expected: # shortcut return + if isinstance(expected, dates.Date): + return assertAlmostEqualValue(dates.Date(test).unix, expected.unix) - if not Math.is_number(expected): + if not is_number(expected): # SOME SPECIAL CASES, EXPECTING EMPTY CONTAINERS IS THE SAME AS EXPECTING NULL - if isinstance(expected, list) and len(expected) == 0 and test == None: + if is_list(expected) and len(expected) == 0 and test == None: return - if isinstance(expected, Mapping) and not expected.keys() and test == None: + if is_data(expected) and not expected.keys() and test == None: return if test != expected: - raise AssertionError(expand_template("{{test}} != {{expected}}", locals())) + raise AssertionError(expand_template("{{test|json}} != {{expected|json}}", locals())) return + elif not is_number(test): + try: + # ASSUME IT IS A UTC DATE + test = dates.parse(test).unix + except Exception as e: + raise AssertionError(expand_template("{{test|json}} != {{expected}}", locals())) num_param = 0 if digits != None: @@ -174,30 +190,36 @@ def assertAlmostEqualValue(test, expected, digits=None, places=None, msg=None, d num_param += 1 if delta != None: num_param += 1 - if num_param>1: + if num_param > 1: raise TypeError("specify only one of digits, places or delta") if digits is not None: with suppress_exception: - diff = Math.log10(abs(test-expected)) + diff = log10(abs(test-expected)) if diff < digits: return - standardMsg = expand_template("{{test}} != {{expected}} within {{digits}} decimal places", locals()) + standardMsg = expand_template("{{test|json}} != {{expected|json}} within {{digits}} decimal places", locals()) elif delta is not None: if abs(test - expected) <= delta: return - standardMsg = expand_template("{{test}} != {{expected}} within {{delta}} delta", locals()) + standardMsg = expand_template("{{test|json}} != {{expected|json}} within {{delta}} delta", locals()) else: if places is None: places = 15 with suppress_exception: - diff = Math.log10(abs(test-expected)) - if diff < Math.ceiling(Math.log10(abs(test)))-places: + diff = mo_math.log10(abs(test-expected)) + if diff == None: + return # Exactly the same + if diff < mo_math.ceiling(mo_math.log10(abs(test)))-places: return standardMsg = expand_template("{{test|json}} != {{expected|json}} within {{places}} places", locals()) raise AssertionError(coalesce(msg, "") + ": (" + standardMsg + ")") + + +def is_null(v): + return v.__class__.__name__ == "NullOp" diff --git a/vendor/mo_threads/__init__.py b/vendor/mo_threads/__init__.py index cc047a6..89f5cc8 100644 --- a/vendor/mo_threads/__init__.py +++ b/vendor/mo_threads/__init__.py @@ -5,80 +5,42 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # # THIS THREADING MODULE IS PERMEATED BY THE please_stop SIGNAL. # THIS SIGNAL IS IMPORTANT FOR PROPER SIGNALLING WHICH ALLOWS # FOR FAST AND PREDICTABLE SHUTDOWN AND CLEANUP OF THREADS -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals +from mo_threads import till from mo_threads.lock import Lock from mo_threads.multiprocess import Process from mo_threads.queues import Queue, ThreadedQueue -from mo_threads.signal import Signal -from mo_threads.threads import Thread, THREAD_STOP, THREAD_TIMEOUT, MainThread, stop_main_thread, MAIN_THREAD +from mo_threads.signals import Signal, DONE +from mo_threads.threads import ( + MAIN_THREAD, + MainThread, + THREAD_STOP, + THREAD_TIMEOUT, + Thread, + stop_main_thread, +) from mo_threads.till import Till MAIN_THREAD.timers = Thread.run("timers daemon", till.daemon) -MAIN_THREAD.children.remove(threads.MAIN_THREAD.timers) - - - - - -# from threading import Thread as _threading_Thread -# _temp = _threading_Thread.setDaemon -# -# fixes = [] -# # WE NOW ADD A FIX FOR EACH KNOWN BAD ACTOR -# try: -# from paramiko import Transport -# -# def fix(self): -# if isinstance(self, Transport): -# self.stop = self.close # WE KNOW Transport DOES NOT HAVE A stop() METHOD, SO ADDING SHOULD BE FINE -# parent = Thread.current() -# parent.add_child(self) -# return True -# -# fixes.append(fix) -# except Exception: -# pass -# -# -# _known_daemons = [ -# ('thread_handling', 17), # fabric/thread_handling.py -# ('pydevd_comm.py', 285), # plugins/python/helpers/pydev/_pydevd_bundle/pydevd_comm.py", -# ] -# -# -# # WE WRAP THE setDaemon METHOD TO APPLY THE FIX WHEN CALLED -# def _setDaemon(self, daemonic): -# for fix in fixes: -# if fix(self): -# break -# else: -# from mo_logs import Log -# from mo_logs.exceptions import extract_stack -# from mo_files import File -# -# get_function_name(self.__target) -# -# stack = extract_stack(1)[0] -# uid = (File(stack['file']).name, stack['line']) -# if uid in _known_daemons: -# pass -# else: -# _known_daemons.append(uid) -# Log.warning("daemons in threading.Thread do not shutdown clean. {{type}} not handled.", type=repr(self)) -# -# _temp(self, daemonic) -# -# -# _threading_Thread.setDaemon = _setDaemon -# -# - +MAIN_THREAD.children.remove(MAIN_THREAD.timers) +till.enabled.wait() +keep_import = ( + Till, + Lock, + Process, + Queue, + ThreadedQueue, + Signal, + DONE, + MainThread, + THREAD_STOP, + THREAD_TIMEOUT, + stop_main_thread, +) diff --git a/vendor/mo_threads/busy_lock.py b/vendor/mo_threads/busy_lock.py index f565a0a..7b01a9e 100644 --- a/vendor/mo_threads/busy_lock.py +++ b/vendor/mo_threads/busy_lock.py @@ -6,15 +6,13 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # # THIS THREADING MODULE IS PERMEATED BY THE please_stop SIGNAL. # THIS SIGNAL IS IMPORTANT FOR PROPER SIGNALLING WHICH ALLOWS # FOR FAST AND PREDICTABLE SHUTDOWN AND CLEANUP OF THREADS -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals from time import sleep diff --git a/vendor/mo_threads/lock.py b/vendor/mo_threads/lock.py index 89b1768..595c502 100644 --- a/vendor/mo_threads/lock.py +++ b/vendor/mo_threads/lock.py @@ -5,18 +5,17 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # # THIS THREADING MODULE IS PERMEATED BY THE please_stop SIGNAL. # THIS SIGNAL IS IMPORTANT FOR PROPER SIGNALLING WHICH ALLOWS # FOR FAST AND PREDICTABLE SHUTDOWN AND CLEANUP OF THREADS -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals from mo_future import allocate_lock as _allocate_lock -from mo_threads.signal import Signal +from mo_math.randoms import Random +from mo_threads.signals import Signal _Log = None _Except = None @@ -37,7 +36,7 @@ def _late_import(): return from mo_logs.exceptions import Except as _Except - from mo_logs.exceptions import extract_stack as _extract_stack + from mo_logs.exceptions import get_stacktrace as _extract_stack from mo_threads.threads import Thread as _Thread from mo_logs import Log as _Log @@ -51,27 +50,34 @@ class Lock(object): """ A NON-RE-ENTRANT LOCK WITH wait() """ - __slots__ = ["name", "lock", "waiting"] + __slots__ = ["name", "debug", "sample", "lock", "waiting"] - def __init__(self, name=""): - if DEBUG and not _Log: + def __init__(self, name="", debug=DEBUG, sample=False): + if (debug or sample) and not _Log: _late_import() + self.debug = debug + self.sample = sample self.name = name self.lock = _allocate_lock() self.waiting = None def __enter__(self): - # with mo_times.timer.Timer("get lock"): + if self.sample and Random.int(100) == 0: + _Log.warning("acquire lock {{name|quote}}", name=self.name) + + self.debug and _Log.note("acquire lock {{name|quote}}", name=self.name) self.lock.acquire() + self.debug and _Log.note("acquired lock {{name|quote}}", name=self.name) return self def __exit__(self, a, b, c): if self.waiting: - if DEBUG: - _Log.note("signaling {{num}} waiters", num=len(self.waiting)) - waiter = self.waiting.pop() - waiter.go() + self.debug and _Log.note("signaling {{num}} waiters on {{name|quote}}", name=self.name, num=len(self.waiting)) + # TELL ANOTHER THAT THE LOCK IS READY SOON + other = self.waiting.pop() + other.go() self.lock.release() + self.debug and _Log.note("released lock {{name|quote}}", name=self.name) def wait(self, till=None): """ @@ -81,34 +87,31 @@ class Lock(object): """ waiter = Signal() if self.waiting: - if DEBUG: - _Log.note("waiting with {{num}} others on {{name|quote}}", num=len(self.waiting), name=self.name) + # TELL ANOTHER THAT THE LOCK IS READY SOON + other = self.waiting.pop() + other.go() + self.debug and _Log.note("waiting with {{num}} others on {{name|quote}}", num=len(self.waiting), name=self.name, stack_depth=1) self.waiting.insert(0, waiter) else: - if DEBUG: - _Log.note("waiting by self on {{name|quote}}", name=self.name) + self.debug and _Log.note("waiting by self on {{name|quote}}", name=self.name) self.waiting = [waiter] try: self.lock.release() - if DEBUG: - _Log.note("out of lock {{name|quote}}", name=self.name) + self.debug and _Log.note("out of lock {{name|quote}}", name=self.name) (waiter | till).wait() - if DEBUG: - _Log.note("done minimum wait (for signal {{till|quote}})", till=till.name if till else "", name=self.name) + self.debug and _Log.note("done minimum wait (for signal {{till|quote}})", till=till.name if till else "", name=self.name) except Exception as e: if not _Log: _late_import() _Log.warning("problem", cause=e) finally: self.lock.acquire() - if DEBUG: - _Log.note("re-acquired lock {{name|quote}}", name=self.name) + self.debug and _Log.note("re-acquired lock {{name|quote}}", name=self.name) try: self.waiting.remove(waiter) - if DEBUG: - _Log.note("removed own signal from {{name|quote}}", name=self.name) + self.debug and _Log.note("removed own signal from {{name|quote}}", name=self.name) except Exception: pass diff --git a/vendor/mo_threads/multiprocess.py b/vendor/mo_threads/multiprocess.py index d134974..c3eda3e 100644 --- a/vendor/mo_threads/multiprocess.py +++ b/vendor/mo_threads/multiprocess.py @@ -4,52 +4,78 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import os +import platform import subprocess -from mo_dots import set_default, NullType -from mo_future import none_type +from mo_dots import set_default, wrap, Null +from mo_future import text from mo_logs import Log, strings from mo_logs.exceptions import Except from mo_threads.lock import Lock from mo_threads.queues import Queue -from mo_threads.signal import Signal -from mo_threads.threads import Thread, THREAD_STOP +from mo_threads.signals import Signal +from mo_threads.threads import THREAD_STOP, Thread from mo_threads.till import Till +from mo_times import Timer -DEBUG = False +DEBUG = True class Process(object): + next_process_id = 0 + def __init__(self, name, params, cwd=None, env=None, debug=False, shell=False, bufsize=-1): - self.name = name + """ + Spawns multiple threads to manage the stdin/stdout/stderr of the child process; communication is done + via proper thread-safe queues of the same name. + + Since the process is managed and monitored by threads, the main thread is not blocked when the child process + encounters problems + + :param name: name given to this process + :param params: list of strings for program name and parameters + :param cwd: current working directory + :param env: enviroment variables + :param debug: true to be verbose about stdin/stdout + :param shell: true to run as command line + :param bufsize: if you want to screw stuff up + """ + self.debug = debug or DEBUG + self.process_id = Process.next_process_id + Process.next_process_id += 1 + self.name = name + " (" + text(self.process_id) + ")" self.service_stopped = Signal("stopped signal for " + strings.quote(name)) - self.stdin = Queue("stdin for process " + strings.quote(name), silent=True) - self.stdout = Queue("stdout for process " + strings.quote(name), silent=True) - self.stderr = Queue("stderr for process " + strings.quote(name), silent=True) + self.stdin = Queue("stdin for process " + strings.quote(name), silent=not self.debug) + self.stdout = Queue("stdout for process " + strings.quote(name), silent=not self.debug) + self.stderr = Queue("stderr for process " + strings.quote(name), silent=not self.debug) try: - self.debug = debug or DEBUG + if cwd == None: + cwd = os.getcwd() + else: + cwd = str(cwd) + + command = [str(p) for p in params] + self.debug and Log.note("command: {{command}}", command=command) self.service = service = subprocess.Popen( [str(p) for p in params], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=bufsize, - cwd=cwd if isinstance(cwd, (str, NullType, none_type)) else cwd.abspath, + cwd=cwd, env={str(k): str(v) for k, v in set_default(env, os.environ).items()}, shell=shell ) self.please_stop = Signal() - self.please_stop.on_go(self._kill) - self.thread_locker = Lock() + self.please_stop.then(self._kill) + self.child_locker = Lock() self.children = [ Thread.run(self.name + " stdin", self._writer, service.stdin, self.stdin, please_stop=self.service_stopped, parent_thread=self), Thread.run(self.name + " stdout", self._reader, "stdout", service.stdout, self.stdout, please_stop=self.service_stopped, parent_thread=self), @@ -73,7 +99,7 @@ class Process(object): def join(self, raise_on_error=False): self.service_stopped.wait() - with self.thread_locker: + with self.child_locker: child_threads, self.children = self.children, [] for c in child_threads: c.join() @@ -87,7 +113,7 @@ class Process(object): return self def remove_child(self, child): - with self.thread_locker: + with self.child_locker: try: self.children.remove(child) except Exception: @@ -102,48 +128,38 @@ class Process(object): return self.service.returncode def _monitor(self, please_stop): - self.service.wait() - self.debug and Log.note("{{process}} STOP: returncode={{returncode}}", process=self.name, returncode=self.service.returncode) - self.service_stopped.go() - please_stop.go() + with Timer(self.name, verbose=self.debug): + self.service.wait() + self.debug and Log.note("{{process}} STOP: returncode={{returncode}}", process=self.name, returncode=self.service.returncode) + self.service_stopped.go() + please_stop.go() def _reader(self, name, pipe, receive, please_stop): try: while not please_stop and self.service.returncode is None: - line = pipe.readline().rstrip() + line = to_text(pipe.readline().rstrip()) if line: receive.add(line) self.debug and Log.note("{{process}} ({{name}}): {{line}}", name=name, process=self.name, line=line) - continue - - # GRAB A FEW MORE LINES - for _ in range(100): - try: - line = pipe.readline().rstrip() - if line: - receive.add(line) - self.debug and Log.note("{{process}} ({{name}}): {{line}}", name=name, process=self.name, line=line) - break - except Exception: - break else: - Till(seconds=5).wait() + (Till(seconds=1) | please_stop).wait() # GRAB A FEW MORE LINES max = 100 while max: try: - line = pipe.readline().rstrip() + line = to_text(pipe.readline().rstrip()) if line: max = 100 receive.add(line) - self.debug and Log.note("{{process}} ({{name}}): {{line}}", name=name, process=self.name, line=line) + self.debug and Log.note("{{process}} RESIDUE: ({{name}}): {{line}}", name=name, process=self.name, line=line) else: max -= 1 except Exception: break finally: pipe.close() + receive.add(THREAD_STOP) self.debug and Log.note("{{process}} ({{name}} is closed)", name=name, process=self.name) receive.add(THREAD_STOP) @@ -151,14 +167,15 @@ class Process(object): def _writer(self, pipe, send, please_stop): while not please_stop: line = send.pop(till=please_stop) - if line == THREAD_STOP: + if line is THREAD_STOP: please_stop.go() break + elif line is None: + continue - if line: - self.debug and Log.note("{{process}} (stdin): {{line}}", process=self.name, line=line.rstrip()) - pipe.write(line.encode('utf8') + b"\n") - pipe.flush() + self.debug and Log.note("{{process}} (stdin): {{line}}", process=self.name, line=line.rstrip()) + pipe.write(line.encode('utf8') + b"\n") + pipe.flush() def _kill(self): try: @@ -174,3 +191,152 @@ class Process(object): Log.warning("Failure to kill process {{process|quote}}", process=self.name, cause=ee) +WINDOWS_ESCAPE_DCT = { + u"%": u"%%", + u"&": u"^&", + u"\\": u"^\\", + u"<": u"^<", + u">": u"^>", + u"^": u"^^", + u"|": u"^|", + u"\t": u"^\t", + u"\n": u"^\n", + u"\r": u"^\r", + u" ": u"^ ", +} + +PROMPT = "READY_FOR_MORE" + +if "windows" in platform.system().lower(): + # def cmd_escape(v): + # return "".join(WINDOWS_ESCAPE_DCT.get(c, c) for c in v) + cmd_escape = strings.quote + + def set_prompt(): + return "prompt "+PROMPT+"$g" + + def cmd(): + return "%windir%\\system32\\cmd.exe" + + def to_text(value): + return value.decode("latin1") + +else: + cmd_escape = strings.quote + + def set_prompt(): + return "set prompt="+cmd_escape(PROMPT+">") + + def cmd(): + return "bash" + + def to_text(value): + return value.decode("latin1") + + +class Command(object): + """ + FASTER Process CLASS - OPENS A COMMAND_LINE APP (CMD on windows) AND KEEPS IT OPEN FOR MULTIPLE COMMANDS + EACH WORKING DIRECTORY WILL HAVE ITS OWN PROCESS, MULTIPLE PROCESSES WILL OPEN FOR THE SAME DIR IF MULTIPLE + THREADS ARE REQUESTING Commands + """ + + available_locker = Lock("cmd lock") + available_process = {} + + def __init__(self, name, params, cwd=None, env=None, debug=False, shell=False, bufsize=-1): + shell = True + self.name = name + self.key = (cwd, wrap(env), debug, shell) + self.stdout = Queue("stdout for "+name) + self.stderr = Queue("stderr for "+name) + + with Command.available_locker: + avail = Command.available_process.setdefault(self.key, []) + if not avail: + self.process = Process("command shell", [cmd()], cwd, env, debug, shell, bufsize) + self.process.stdin.add(set_prompt()) + self.process.stdin.add("echo %errorlevel%") + _wait_for_start(self.process.stdout, Null) + else: + self.process = avail.pop() + + self.process.stdin.add(" ".join(cmd_escape(p) for p in params)) + self.process.stdin.add("echo %errorlevel%") + self.stdout_thread = Thread.run("", self._stream_relay, self.process.stdout, self.stdout) + self.stderr_thread = Thread.run("", self._stream_relay, self.process.stderr, self.stderr) + self.returncode = None + + def join(self, raise_on_error=False, till=None): + try: + try: + # WAIT FOR COMMAND LINE RESPONSE ON stdout + self.stdout_thread.join() + except Exception as e: + Log.error("unexpected problem processing stdout", cause=e) + + try: + self.stderr_thread.please_stop.go() + self.stderr_thread.join() + except Exception as e: + Log.error("unexpected problem processing stderr", cause=e) + + if raise_on_error and self.returncode != 0: + Log.error( + "{{process}} FAIL: returncode={{code}}\n{{stderr}}", + process=self.name, + code=self.returncode, + stderr=list(self.stderr) + ) + return self + finally: + with Command.available_locker: + Command.available_process[self.key].append(self.process) + + + def _stream_relay(self, source, destination, please_stop=None): + """ + :param source: + :param destination: + :param error: Throw error if line shows up + :param please_stop: + :return: + """ + prompt_count = 0 + prompt = PROMPT + ">" + line_count = 0 + + while not please_stop: + value = source.pop(till=please_stop) + if value is None: + destination.add(THREAD_STOP) + return + elif value is THREAD_STOP: + destination.add(THREAD_STOP) + return + elif line_count==0 and "is not recognized as an internal or external command" in value: + Log.error("Problem with command: {{desc}}", desc=value) + elif value.startswith(prompt): + if prompt_count: + # GET THE ERROR LEVEL + self.returncode = int(source.pop(till=please_stop)) + destination.add(THREAD_STOP) + return + else: + prompt_count += 1 + else: + line_count += 1 + destination.add(value) + + +def _wait_for_start(source, destination): + prompt = PROMPT + ">" + + while True: + value = source.pop() + if value.startswith(prompt): + # GET THE ERROR LEVEL + returncode = int(source.pop()) + destination.add(THREAD_STOP) + return + destination.add(value) diff --git a/vendor/mo_threads/profiles.py b/vendor/mo_threads/profiles.py index 6b531a7..aaab558 100644 --- a/vendor/mo_threads/profiles.py +++ b/vendor/mo_threads/profiles.py @@ -4,17 +4,15 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import cProfile import pstats -from datetime import datetime +from mo_dots import wrap from mo_future import iteritems from mo_logs import Log @@ -84,7 +82,6 @@ def write_profiles(main_thread_profile): if cprofiler_stats is None: return - from pyLibrary import convert from mo_files import File cprofiler_stats.add(pstats.Stats(main_thread_profile.cprofiler)) @@ -108,6 +105,23 @@ def write_profiles(main_thread_profile): } for f, d, in iteritems(acc.stats) ] - stats_file = File(FILENAME, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S")) - stats_file.write(convert.list2tab(stats)) + from mo_times import Date + + stats_file = File(FILENAME, suffix=Date.now().format("_%Y%m%d_%H%M%S")) + stats_file.write(list2tab(stats)) Log.note("profile written to {{filename}}", filename=stats_file.abspath) + + +def list2tab(rows): + from mo_json import value2json + + columns = set() + for r in wrap(rows): + columns |= set(k for k, v in r.leaves()) + keys = list(columns) + + output = [] + for r in wrap(rows): + output.append("\t".join(value2json(r[k]) for k in keys)) + + return "\t".join(keys) + "\n" + "\n".join(output) \ No newline at end of file diff --git a/vendor/mo_threads/python.py b/vendor/mo_threads/python.py index 1877ff8..37345af 100644 --- a/vendor/mo_threads/python.py +++ b/vendor/mo_threads/python.py @@ -4,19 +4,18 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import os +import platform -from mo_dots import wrap, set_default -from mo_json import value2json, json2value -from mo_logs import Log, Except +from mo_dots import set_default, wrap +from mo_json import json2value, value2json +from mo_logs import Except, Log -from mo_threads import Process, Lock, Thread, Signal, THREAD_STOP +from mo_threads import Lock, Process, Signal, THREAD_STOP, Thread, DONE PYTHON = "python" DEBUG = True @@ -29,11 +28,22 @@ class Python(object): if config.debug.logs: Log.error("not allowed to configure logging on other process") - self.process = Process(name, [PYTHON, "mo_threads" + os.sep + "python_worker.py"], shell=True) - self.process.stdin.add(value2json(set_default({"debug": {"trace": True}}, config))) - + Log.note("begin process") + # WINDOWS REQUIRED shell, WHILE LINUX NOT + shell = "windows" in platform.system().lower() + self.process = Process( + name, + [PYTHON, "-u", "mo_threads" + os.sep + "python_worker.py"], + debug=False, + cwd=os.getcwd(), + shell=shell + ) + self.process.stdin.add(value2json(set_default({}, config, {"debug": {"trace": True}}))) + status = self.process.stdout.pop() + if status != '{"out":"ok"}': + Log.error("could not start python\n{{error|indent}}", error=self.process.stderr.pop_all()+[status]+self.process.stdin.pop_all()) self.lock = Lock("wait for response from "+name) - self.current_task = None + self.current_task = DONE self.current_response = None self.current_error = None @@ -42,21 +52,23 @@ class Python(object): def _execute(self, command): with self.lock: - if self.current_task is not None: - self.current_task.wait() + self.current_task.wait() self.current_task = Signal() self.current_response = None self.current_error = None - self.process.stdin.add(value2json(command)) - self.current_task.wait() - with self.lock: + + if self.process.service_stopped: + Log.error("python is not running") + self.process.stdin.add(value2json(command)) + (self.current_task | self.process.service_stopped).wait() + try: if self.current_error: Log.error("problem with process call", cause=Except.new_instance(self.current_error)) else: return self.current_response finally: - self.current_task = None + self.current_task = DONE self.current_response = None self.current_error = None @@ -66,18 +78,16 @@ class Python(object): if line == THREAD_STOP: break try: - data = json2value(line.decode('utf8')) + data = json2value(line) if "log" in data: Log.main_log.write(*data.log) elif "out" in data: - with self.lock: - self.current_response = data.out - self.current_task.go() + self.current_response = data.out + self.current_task.go() elif "err" in data: - with self.lock: - self.current_error = data.err - self.current_task.go() - except Exception: + self.current_error = data.err + self.current_task.go() + except Exception as e: Log.note("non-json line: {{line}}", line=line) DEBUG and Log.note("stdout reader is done") @@ -110,7 +120,7 @@ class Python(object): def __getattr__(self, item): def output(*args, **kwargs): if len(args): - if len(kwargs.keys()): + if kwargs.keys(): Log.error("Not allowed to use both args and kwargs") return self._execute({item: args}) else: diff --git a/vendor/mo_threads/python_worker.py b/vendor/mo_threads/python_worker.py index adad9c8..0e5ccbd 100644 --- a/vendor/mo_threads/python_worker.py +++ b/vendor/mo_threads/python_worker.py @@ -4,77 +4,103 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals from copy import copy -context = copy(globals()) -del context['copy'] - - -import sys - -from mo_dots import set_default, listwrap, coalesce -from mo_future import text_type, PY3 +from mo_dots import is_list +from mo_dots import listwrap, coalesce +from mo_future import is_text, text from mo_json import json2value, value2json -from mo_logs import Log, constants +from mo_logs import Log, constants, Except +from mo_logs.log_usingNothing import StructuredLogger + from mo_threads import Signal +from mo_threads.threads import STDOUT, STDIN -if PY3: - STDOUT = sys.stdout.buffer -else: - STDOUT = sys.stdout +context = copy(globals()) +del context["copy"] -DEBUG = True -DONE = value2json({"out": {}}).encode('utf8') + b"\n" +DEBUG = False +DONE = value2json({"out": {}}).encode("utf8") + b"\n" please_stop = Signal() def command_loop(local): - DEBUG and Log.note("mo-python process running with {{config|json}}", config=local['config']) + STDOUT.write(b'{"out":"ok"}\n') + DEBUG and Log.note("python process running") + while not please_stop: - line = sys.stdin.readline() + line = STDIN.readline() try: - command = json2value(line.decode('utf8')) + command = json2value(line.decode("utf8")) DEBUG and Log.note("got {{command}}", command=command) if "import" in command: - dummy={} - if isinstance(command['import'], text_type): - exec ("from " + command['import'] + " import *", dummy, context) + dummy = {} + if is_text(command["import"]): + exec("from " + command["import"] + " import *", dummy, context) else: - exec ("from " + command['import']['from'] + " import " + ",".join(listwrap(command['import']['vars'])), dummy, context) + exec( + "from " + + command["import"]["from"] + + " import " + + ",".join(listwrap(command["import"]["vars"])), + dummy, + context, + ) STDOUT.write(DONE) elif "set" in command: for k, v in command.set.items(): context[k] = v STDOUT.write(DONE) elif "get" in command: - STDOUT.write(value2json({"out": coalesce(local.get(command['get']), context.get(command['get']))})) - STDOUT.write('\n') + STDOUT.write( + value2json( + { + "out": coalesce( + local.get(command["get"]), context.get(command["get"]) + ) + } + ).encode("utf8") + ) + STDOUT.write(b"\n") elif "stop" in command: STDOUT.write(DONE) please_stop.go() elif "exec" in command: - if not isinstance(command['exec'], text_type): + if not is_text(command["exec"]): Log.error("exec expects only text") - exec (command['exec'], context, local) + exec(command["exec"], context, local) STDOUT.write(DONE) else: for k, v in command.items(): - if isinstance(v, list): - exec ("_return = " + k + "(" + ",".join(map(value2json, v)) + ")", context, local) + if is_list(v): + exec( + "_return = " + k + "(" + ",".join(map(value2json, v)) + ")", + context, + local, + ) else: - exec ("_return = " + k + "(" + ",".join(kk + "=" + value2json(vv) for kk, vv in v.items()) + ")", context, local) - STDOUT.write(value2json({"out": local['_return']})) - STDOUT.write('\n') + exec( + "_return = " + + k + + "(" + + ",".join( + kk + "=" + value2json(vv) for kk, vv in v.items() + ) + + ")", + context, + local, + ) + STDOUT.write(value2json({"out": local["_return"]}).encode("utf8")) + STDOUT.write(b"\n") except Exception as e: - STDOUT.write(value2json({"err": e})) - STDOUT.write('\n') + e = Except.wrap(e) + STDOUT.write(value2json({"err": e}).encode("utf8")) + STDOUT.write(b"\n") finally: STDOUT.flush() @@ -85,18 +111,29 @@ num_temps = 0 def temp_var(): global num_temps try: - return "temp_var" + text_type(num_temps) + return "temp_var" + text(num_temps) finally: num_temps += 1 -if __name__ == "__main__": +class RawLogger(StructuredLogger): + def write(self, template, params): + STDOUT.write(value2json({"log": {"template": template, "params": params}})) + + +def start(): try: - config = json2value(sys.stdin.readline().decode('utf8')) + line = STDIN.readline().decode("utf8") + config = json2value(line) constants.set(config.constants) - Log.start(set_default(config.debug, {"logs": [{"type": "raw"}]})) + Log.start(config.debug) + Log.set_logger(RawLogger()) command_loop({"config": config}) except Exception as e: Log.error("problem staring worker", cause=e) finally: Log.stop() + + +if __name__ == "__main__": + start() diff --git a/vendor/mo_threads/queues.py b/vendor/mo_threads/queues.py index b37f4e4..3ba40b0 100644 --- a/vendor/mo_threads/queues.py +++ b/vendor/mo_threads/queues.py @@ -6,26 +6,26 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # # THIS THREADING MODULE IS PERMEATED BY THE please_stop SIGNAL. # THIS SIGNAL IS IMPORTANT FOR PROPER SIGNALLING WHICH ALLOWS # FOR FAST AND PREDICTABLE SHUTDOWN AND CLEANUP OF THREADS -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import types from collections import deque +from copy import copy from datetime import datetime from time import time -from mo_dots import coalesce, Null +from mo_dots import Null, coalesce from mo_future import long -from mo_logs import Log, Except +from mo_logs import Except, Log + from mo_threads.lock import Lock -from mo_threads.signal import Signal +from mo_threads.signals import Signal from mo_threads.threads import THREAD_STOP, THREAD_TIMEOUT, Thread from mo_threads.till import Till @@ -54,15 +54,14 @@ class Queue(object): self.silent = silent self.allow_add_after_close=allow_add_after_close self.unique = unique - self.please_stop = Signal("stop signal for " + name) + self.closed = Signal("stop adding signal for " + name) # INDICATE THE PRODUCER IS DONE GENERATING ITEMS TO QUEUE self.lock = Lock("lock for queue " + name) self.queue = deque() - self.next_warning = time() # FOR DEBUGGING def __iter__(self): try: while True: - value = self.pop(self.please_stop) + value = self.pop() if value is THREAD_STOP: break if value is not None: @@ -70,38 +69,57 @@ class Queue(object): except Exception as e: Log.warning("Tell me about what happened here", e) - def add(self, value, timeout=None): + def add(self, value, timeout=None, force=False): + """ + :param value: ADDED THE THE QUEUE + :param timeout: HOW LONG TO WAIT FOR QUEUE TO NOT BE FULL + :param force: ADD TO QUEUE, EVEN IF FULL (USE ONLY WHEN CONSUMER IS RETURNING WORK TO THE QUEUE) + :return: self + """ with self.lock: if value is THREAD_STOP: # INSIDE THE lock SO THAT EXITING WILL RELEASE wait() self.queue.append(value) - self.please_stop.go() + self.closed.go() return - self._wait_for_queue_space(timeout=timeout) - if self.please_stop and not self.allow_add_after_close: + if not force: + self._wait_for_queue_space(timeout=timeout) + if self.closed and not self.allow_add_after_close: Log.error("Do not add to closed queue") - else: - if self.unique: - if value not in self.queue: - self.queue.append(value) - else: + if self.unique: + if value not in self.queue: self.queue.append(value) + else: + self.queue.append(value) return self def push(self, value): """ SNEAK value TO FRONT OF THE QUEUE """ - if self.please_stop and not self.allow_add_after_close: + if self.closed and not self.allow_add_after_close: Log.error("Do not push to closed queue") with self.lock: self._wait_for_queue_space() - if not self.please_stop: + if not self.closed: self.queue.appendleft(value) return self + def push_all(self, values): + """ + SNEAK values TO FRONT OF THE QUEUE + """ + if self.closed and not self.allow_add_after_close: + Log.error("Do not push to closed queue") + + with self.lock: + self._wait_for_queue_space() + if not self.closed: + self.queue.extendleft(values) + return self + def pop_message(self, till=None): """ RETURN TUPLE (message, payload) CALLER IS RESPONSIBLE FOR CALLING message.delete() WHEN DONE @@ -113,63 +131,57 @@ class Queue(object): return Null, self.pop(till=till) def extend(self, values): - if self.please_stop and not self.allow_add_after_close: + if self.closed and not self.allow_add_after_close: Log.error("Do not push to closed queue") with self.lock: # ONCE THE queue IS BELOW LIMIT, ALLOW ADDING MORE self._wait_for_queue_space() - if not self.please_stop: + if not self.closed: if self.unique: for v in values: if v is THREAD_STOP: - self.please_stop.go() + self.closed.go() continue if v not in self.queue: self.queue.append(v) else: for v in values: if v is THREAD_STOP: - self.please_stop.go() + self.closed.go() continue self.queue.append(v) return self - def _wait_for_queue_space(self, timeout=DEFAULT_WAIT_TIME): + def _wait_for_queue_space(self, timeout=None): """ EXPECT THE self.lock TO BE HAD, WAITS FOR self.queue TO HAVE A LITTLE SPACE + + :param timeout: IN SECONDS """ wait_time = 5 (DEBUG and len(self.queue) > 1 * 1000 * 1000) and Log.warning("Queue {{name}} has over a million items") - now = time() - if timeout != None: - time_to_stop_waiting = now + timeout - else: - time_to_stop_waiting = now + DEFAULT_WAIT_TIME + start = time() + stop_waiting = Till(till=start+coalesce(timeout, DEFAULT_WAIT_TIME)) - if self.next_warning < now: - self.next_warning = now + wait_time - - while not self.please_stop and len(self.queue) >= self.max: - if now > time_to_stop_waiting: + while not self.closed and len(self.queue) >= self.max: + if stop_waiting: Log.error(THREAD_TIMEOUT) if self.silent: - self.lock.wait(Till(till=time_to_stop_waiting)) + self.lock.wait(stop_waiting) else: self.lock.wait(Till(seconds=wait_time)) - if len(self.queue) >= self.max: + if not stop_waiting and len(self.queue) >= self.max: now = time() - if self.next_warning < now: - self.next_warning = now + wait_time - Log.alert( - "Queue by name of {{name|quote}} is full with ({{num}} items), thread(s) have been waiting {{wait_time}} sec", - name=self.name, - num=len(self.queue), - wait_time=wait_time - ) + Log.alert( + "Queue with name {{name|quote}} is full with ({{num}} items), thread(s) have been waiting {{wait_time}} sec", + name=self.name, + num=len(self.queue), + wait_time=now-start + ) def __len__(self): with self.lock: @@ -194,15 +206,14 @@ class Queue(object): with self.lock: while True: if self.queue: - value = self.queue.popleft() - return value - if self.please_stop: + return self.queue.popleft() + if self.closed: break - if not self.lock.wait(till=till | self.please_stop): - if self.please_stop: + if not self.lock.wait(till=self.closed | till): + if self.closed: break return None - (DEBUG or not self.silent) and Log.note(self.name + " queue stopped") + (DEBUG or not self.silent) and Log.note(self.name + " queue closed") return THREAD_STOP def pop_all(self): @@ -220,19 +231,18 @@ class Queue(object): NON-BLOCKING POP IN QUEUE, IF ANY """ with self.lock: - if self.please_stop: - return [THREAD_STOP] + if self.closed: + return THREAD_STOP elif not self.queue: return None else: - v =self.queue.pop() + v =self.queue.popleft() if v is THREAD_STOP: # SENDING A STOP INTO THE QUEUE IS ALSO AN OPTION - self.please_stop.go() + self.closed.go() return v def close(self): - with self.lock: - self.please_stop.go() + self.closed.go() def commit(self): pass @@ -260,7 +270,7 @@ class PriorityQueue(Queue): def __iter__(self): try: while True: - value = self.pop(self.please_stop) + value = self.pop(self.closed) if value is THREAD_STOP: break if value is not None: @@ -276,11 +286,11 @@ class PriorityQueue(Queue): if value is THREAD_STOP: # INSIDE THE lock SO THAT EXITING WILL RELEASE wait() self.queue[priority].queue.append(value) - self.please_stop.go() + self.closed.go() return self.queue[priority]._wait_for_queue_space(timeout=timeout) - if self.please_stop and not self.queue[priority].allow_add_after_close: + if self.closed and not self.queue[priority].allow_add_after_close: Log.error("Do not add to closed queue") else: if self.unique: @@ -294,12 +304,12 @@ class PriorityQueue(Queue): """ SNEAK value TO FRONT OF THE QUEUE """ - if self.please_stop and not self.queue[priority].allow_add_after_close: + if self.closed and not self.queue[priority].allow_add_after_close: Log.error("Do not push to closed queue") with self.lock: self.queue[priority]._wait_for_queue_space() - if not self.please_stop: + if not self.closed: self.queue[priority].queue.appendleft(value) return self @@ -336,10 +346,10 @@ class PriorityQueue(Queue): if priority: value = self.queue[priority].queue.popleft() return value - if self.please_stop: + if self.closed: break - if not self.lock.wait(till=till | self.please_stop): - if self.please_stop: + if not self.lock.wait(till=till | self.closed): + if self.closed: break return None (DEBUG or not self.silent) and Log.note(self.name + " queue stopped") @@ -377,14 +387,14 @@ class PriorityQueue(Queue): with self.lock: if not priority: priority = self.highest_entry() - if self.please_stop: + if self.closed: return [THREAD_STOP] elif not self.queue: return None else: v =self.pop(priority=priority) if v is THREAD_STOP: # SENDING A STOP INTO THE QUEUE IS ALSO AN OPTION - self.please_stop.go() + self.closed.go() return v @@ -398,145 +408,136 @@ class ThreadedQueue(Queue): def __init__( self, name, - queue, # THE SLOWER QUEUE + slow_queue, # THE SLOWER QUEUE batch_size=None, # THE MAX SIZE OF BATCHES SENT TO THE SLOW QUEUE - max_size=None, # SET THE MAXIMUM SIZE OF THE QUEUE, WRITERS WILL BLOCK IF QUEUE IS OVER THIS LIMIT + max_size=None, # SET THE MAXIMUM SIZE OF THE QUEUE, WRITERS WILL BLOCK IF QUEUE IS OVER THIS LIMIT period=None, # MAX TIME (IN SECONDS) BETWEEN FLUSHES TO SLOWER QUEUE silent=False, # WRITES WILL COMPLAIN IF THEY ARE WAITING TOO LONG - error_target=None # CALL THIS WITH ERROR **AND THE LIST OF OBJECTS ATTEMPTED** + error_target=None # CALL error_target(error, buffer) **buffer IS THE LIST OF OBJECTS ATTEMPTED** # BE CAREFUL! THE THREAD MAKING THE CALL WILL NOT BE YOUR OWN! # DEFAULT BEHAVIOUR: THIS WILL KEEP RETRYING WITH WARNINGS ): if period !=None and not isinstance(period, (int, float, long)): Log.error("Expecting a float for the period") - + period = coalesce(period, 1) # SECONDS batch_size = coalesce(batch_size, int(max_size / 2) if max_size else None, 900) max_size = coalesce(max_size, batch_size * 2) # REASONABLE DEFAULT - period = coalesce(period, 1) # SECONDS Queue.__init__(self, name=name, max=max_size, silent=silent) - def worker_bee(please_stop): - def stopper(): - self.add(THREAD_STOP) + self.name = name + self.slow_queue = slow_queue + self.thread = Thread.run("threaded queue for " + name, self.worker_bee, batch_size, period, error_target) # parent_thread=self) - please_stop.on_go(stopper) + def worker_bee(self, batch_size, period, error_target, please_stop): + please_stop.then(lambda: self.add(THREAD_STOP)) - _buffer = [] - _post_push_functions = [] - now = time() - next_push = Till(till=now + period) # THE TIME WE SHOULD DO A PUSH - last_push = now - period + _buffer = [] + _post_push_functions = [] + now = time() + next_push = Till(till=now + period) # THE TIME WE SHOULD DO A PUSH + last_push = now - period - def push_to_queue(): - queue.extend(_buffer) - del _buffer[:] - for ppf in _post_push_functions: - ppf() - del _post_push_functions[:] + def push_to_queue(): + if self.slow_queue.__class__.__name__ == "Index": + if self.slow_queue.settings.index.startswith("saved"): + Log.alert("INSERT SAVED QUERY {{data|json}}", data=copy(_buffer)) + self.slow_queue.extend(_buffer) + del _buffer[:] + for ppf in _post_push_functions: + ppf() + del _post_push_functions[:] - while not please_stop: - try: - if not _buffer: - item = self.pop() - now = time() - if now > last_push + period: - # Log.note("delay next push") - next_push = Till(till=now + period) - else: - item = self.pop(till=next_push) - now = time() - - if item is THREAD_STOP: - push_to_queue() - please_stop.go() - break - elif isinstance(item, types.FunctionType): - _post_push_functions.append(item) - elif item is not None: - _buffer.append(item) - - except Exception as e: - e = Except.wrap(e) - if error_target: - try: - error_target(e, _buffer) - except Exception as f: - Log.warning( - "`error_target` should not throw, just deal", - name=name, - cause=f - ) - else: - Log.warning( - "Unexpected problem", - name=name, - cause=e - ) - - try: - if len(_buffer) >= batch_size or next_push: - if _buffer: - push_to_queue() - last_push = now = time() + while not please_stop: + try: + if not _buffer: + item = self.pop() + now = time() + if now > last_push + period: next_push = Till(till=now + period) + else: + item = self.pop(till=next_push) + now = time() - except Exception as e: - e = Except.wrap(e) - if error_target: - try: - error_target(e, _buffer) - except Exception as f: - Log.warning( - "`error_target` should not throw, just deal", - name=name, - cause=f - ) - else: + if item is THREAD_STOP: + push_to_queue() + please_stop.go() + break + elif isinstance(item, types.FunctionType): + _post_push_functions.append(item) + elif item is not None: + _buffer.append(item) + except Exception as e: + e = Except.wrap(e) + if error_target: + try: + error_target(e, _buffer) + except Exception as f: Log.warning( - "Problem with {{name}} pushing {{num}} items to data sink", - name=name, - num=len(_buffer), - cause=e + "`error_target` should not throw, just deal", + name=self.name, + cause=f ) + else: + Log.warning( + "Unexpected problem", + name=self.name, + cause=e + ) - if _buffer: - # ONE LAST PUSH, DO NOT HAVE TIME TO DEAL WITH ERRORS - push_to_queue() + try: + if len(_buffer) >= batch_size or next_push: + if _buffer: + push_to_queue() + last_push = now = time() + next_push = Till(till=now + period) + except Exception as e: + e = Except.wrap(e) + if error_target: + try: + error_target(e, _buffer) + except Exception as f: + Log.warning( + "`error_target` should not throw, just deal", + name=self.name, + cause=f + ) + else: + Log.warning( + "Problem with {{name}} pushing {{num}} items to data sink", + name=self.name, + num=len(_buffer), + cause=e + ) - self.thread = Thread.run("threaded queue for " + name, worker_bee) # parent_thread=self) + if _buffer: + # ONE LAST PUSH, DO NOT HAVE TIME TO DEAL WITH ERRORS + push_to_queue() + self.slow_queue.add(THREAD_STOP) def add(self, value, timeout=None): with self.lock: self._wait_for_queue_space(timeout=timeout) - if not self.please_stop: + if not self.closed: self.queue.append(value) - # if Random.range(0, 50) == 0: - # sizes = wrap([{"id":i["id"], "size":len(value2json(i))} for i in self.queue if isinstance(i, Mapping)]) - # size=sum(sizes.size) - # if size>50000000: - # from jx_python import jx - # - # biggest = jx.sort(sizes, "size").last().id - # Log.note("Big record {{id}}", id=biggest) - # Log.note("{{name}} has {{num}} items with json size of {{size|comma}}", name=self.name, num=len(self.queue), size=size) return self def extend(self, values): with self.lock: # ONCE THE queue IS BELOW LIMIT, ALLOW ADDING MORE self._wait_for_queue_space() - if not self.please_stop: + if not self.closed: self.queue.extend(values) - Log.note("{{name}} has {{num}} items", name=self.name, num=len(self.queue)) + if not self.silent: + Log.note("{{name}} has {{num}} items", name=self.name, num=len(self.queue)) return self def __enter__(self): return self - def __exit__(self, a, b, c): + def __exit__(self, exc_type, exc_val, exc_tb): self.add(THREAD_STOP) - if isinstance(b, BaseException): + if isinstance(exc_val, BaseException): self.thread.please_stop.go() self.thread.join() diff --git a/vendor/mo_threads/signal.py b/vendor/mo_threads/signals.py similarity index 90% rename from vendor/mo_threads/signal.py rename to vendor/mo_threads/signals.py index 9a504d0..e215fc8 100644 --- a/vendor/mo_threads/signal.py +++ b/vendor/mo_threads/signals.py @@ -5,20 +5,18 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # # THIS THREADING MODULE IS PERMEATED BY THE please_stop SIGNAL. # THIS SIGNAL IS IMPORTANT FOR PROPER SIGNALLING WHICH ALLOWS # FOR FAST AND PREDICTABLE SHUTDOWN AND CLEANUP OF THREADS -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import random from weakref import ref -from mo_future import allocate_lock as _allocate_lock, text_type +from mo_future import allocate_lock as _allocate_lock, text from mo_logs import Log DEBUG = False @@ -26,14 +24,13 @@ DEBUG_SIGNAL = False SEED = random.Random() - class Signal(object): """ SINGLE-USE THREAD SAFE SIGNAL go() - ACTIVATE SIGNAL (DOES NOTHING IF SIGNAL IS ALREADY ACTIVATED) wait() - PUT THREAD IN WAIT STATE UNTIL SIGNAL IS ACTIVATED - on_go() - METHOD FOR OTHER THREAD TO RUN WHEN ACTIVATING SIGNAL + then() - METHOD FOR OTHER THREAD TO RUN WHEN ACTIVATING SIGNAL """ __slots__ = ["_name", "lock", "_go", "job_queue", "waiting_threads", "__weakref__"] @@ -107,7 +104,7 @@ class Signal(object): except Exception as e: Log.warning("Trigger on Signal.go() failed!", cause=e) - def on_go(self, target): + def then(self, target): """ RUN target WHEN SIGNALED """ @@ -146,16 +143,18 @@ class Signal(object): return self._name def __str__(self): - return self.name.decode(text_type) + return self.name.decode(text) def __repr__(self): - return text_type(repr(self._go)) + return text(repr(self._go)) def __or__(self, other): if other == None: return self if not isinstance(other, Signal): Log.error("Expecting OR with other signal") + if self or other: + return DONE output = Signal(self.name + " | " + other.name) OrSignal(output, (self, other)) @@ -165,7 +164,7 @@ class Signal(object): return self.__or__(other) def __and__(self, other): - if other == None: + if other == None or other: return self if not isinstance(other, Signal): Log.error("Expecting OR with other signal") @@ -176,8 +175,8 @@ class Signal(object): output = Signal(self.name + " and " + other.name) gen = AndSignals(output, 2) - self.on_go(gen.done) - other.on_go(gen.done) + self.then(gen.done) + other.then(gen.done) return output @@ -194,6 +193,8 @@ class AndSignals(object): self.signal = signal self.locker = _allocate_lock() self.remaining = count + if not count: + self.signal.go() def done(self): with self.locker: @@ -214,12 +215,13 @@ class OrSignal(object): self.dependencies = dependencies self.signal = ref(signal, self.cleanup) for d in dependencies: - d.on_go(self) - signal.on_go(self.cleanup) + d.then(self) + signal.then(self.cleanup) def cleanup(self, r=None): for d in self.dependencies: d.remove_go(self) + self.dependencies = [] def __call__(self, *args, **kwargs): s = self.signal() diff --git a/vendor/mo_threads/threads.py b/vendor/mo_threads/threads.py index 69b8356..eef2b2f 100644 --- a/vendor/mo_threads/threads.py +++ b/vendor/mo_threads/threads.py @@ -5,38 +5,55 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # # THIS THREADING MODULE IS PERMEATED BY THE please_stop SIGNAL. # THIS SIGNAL IS IMPORTANT FOR PROPER SIGNALLING WHICH ALLOWS # FOR FAST AND PREDICTABLE SHUTDOWN AND CLEANUP OF THREADS -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import signal as _signal import sys from copy import copy from datetime import datetime, timedelta -from time import sleep, time +from time import sleep -from mo_dots import Data, unwraplist -from mo_future import get_ident, start_new_thread, get_function_name, text_type, allocate_lock -from mo_logs import Log, Except +from mo_dots import Data, coalesce, unwraplist +from mo_future import ( + allocate_lock, + get_function_name, + get_ident, + start_new_thread, + text, + decorate, + PY3, +) +from mo_logs import Except, Log from mo_threads.lock import Lock from mo_threads.profiles import CProfiler, write_profiles -from mo_threads.signal import AndSignals, Signal +from mo_threads.signals import AndSignals, Signal from mo_threads.till import Till DEBUG = False +PLEASE_STOP = str("please_stop") # REQUIRED thread PARAMETER TO SIGNAL STOP +PARENT_THREAD = str( + "parent_thread" +) # OPTIONAL PARAMETER TO ASSIGN THREAD TO SOMETHING OTHER THAN CURRENT THREAD MAX_DATETIME = datetime(2286, 11, 20, 17, 46, 39) DEFAULT_WAIT_TIME = timedelta(minutes=10) THREAD_STOP = "stop" THREAD_TIMEOUT = "TIMEOUT" -datetime.strptime('2012-01-01', '%Y-%m-%d') # http://bugs.python.org/issue7980 +datetime.strptime("2012-01-01", "%Y-%m-%d") # http://bugs.python.org/issue7980 + +if PY3: + STDOUT = sys.stdout.buffer + STDIN = sys.stdin.buffer +else: + STDOUT = sys.stdout + STDIN = sys.stdin class AllThread(object): @@ -65,33 +82,36 @@ class AllThread(object): if exceptions: Log.error("Problem in child threads", cause=exceptions) - - def add(self, target, *args, **kwargs): + def add(self, name, target, *args, **kwargs): """ target IS THE FUNCTION TO EXECUTE IN THE THREAD """ - t = Thread.run(target.__name__, target, *args, **kwargs) + t = Thread.run(name, target, *args, **kwargs) self.threads.append(t) + run = add + class BaseThread(object): - __slots__ = ["id", "name", "children", "child_lock", "cprofiler"] + __slots__ = ["id", "name", "children", "child_locker", "cprofiler", "trace_func"] - def __init__(self, ident): + def __init__(self, ident, name=None): self.id = ident + self.name = name if ident != -1: - self.name = "Unknown Thread " + text_type(ident) - self.child_lock = allocate_lock() + self.name = "Unknown Thread " + text(ident) + self.child_locker = allocate_lock() self.children = [] self.cprofiler = None + self.trace_func = sys.gettrace() def add_child(self, child): - with self.child_lock: + with self.child_locker: self.children.append(child) def remove_child(self, child): try: - with self.child_lock: + with self.child_locker: self.children.remove(child) except Exception: pass @@ -102,13 +122,14 @@ class MainThread(BaseThread): BaseThread.__init__(self, get_ident()) self.name = "Main Thread" self.please_stop = Signal() + self.stopped = Signal() self.stop_logging = Log.stop self.timers = None + self.shutdown_locker = allocate_lock() def stop(self): """ - BLOCKS UNTIL ALL THREADS HAVE STOPPED - THEN RUNS sys.exit(0) + BLOCKS UNTIL ALL KNOWN THREADS, EXCEPT MainThread, HAVE STOPPED """ global DEBUG @@ -120,7 +141,7 @@ class MainThread(BaseThread): self.please_stop.go() join_errors = [] - with self.child_lock: + with self.child_locker: children = copy(self.children) for c in reversed(children): DEBUG and c.name and Log.note("Stopping thread {{name|quote}}", name=c.name) @@ -130,30 +151,41 @@ class MainThread(BaseThread): join_errors.append(e) for c in children: - DEBUG and c.name and Log.note("Joining on thread {{name|quote}}", name=c.name) + DEBUG and c.name and Log.note( + "Joining on thread {{name|quote}}", name=c.name + ) try: c.join() except Exception as e: join_errors.append(e) - DEBUG and c.name and Log.note("Done join on thread {{name|quote}}", name=c.name) + DEBUG and c.name and Log.note( + "Done join on thread {{name|quote}}", name=c.name + ) if join_errors: - Log.error("Problem while stopping {{name|quote}}", name=self.name, cause=unwraplist(join_errors)) + Log.error( + "Problem while stopping {{name|quote}}", + name=self.name, + cause=unwraplist(join_errors), + ) - self.stop_logging() - self.timers.stop() - self.timers.join() + with self.shutdown_locker: + if self.stopped: + return + self.stop_logging() + self.timers.stop() + self.timers.join() - write_profiles(self.cprofiler) - DEBUG and Log.note("Thread {{name|quote}} now stopped", name=self.name) - sys.exit() + write_profiles(self.cprofiler) + DEBUG and Log.note("Thread {{name|quote}} now stopped", name=self.name) + self.stopped.go() def wait_for_shutdown_signal( self, please_stop=False, # ASSIGN SIGNAL TO STOP EARLY allow_exit=False, # ALLOW "exit" COMMAND ON CONSOLE TO ALSO STOP THE APP - wait_forever=True # IGNORE CHILD THREADS, NEVER EXIT. False => IF NO CHILD THREADS LEFT, THEN EXIT + wait_forever=True, # IGNORE CHILD THREADS, NEVER EXIT. False => IF NO CHILD THREADS LEFT, THEN EXIT ): """ FOR USE BY PROCESSES THAT NEVER DIE UNLESS EXTERNAL SHUTDOWN IS REQUESTED @@ -167,29 +199,32 @@ class MainThread(BaseThread): """ self_thread = Thread.current() if self_thread != MAIN_THREAD or self_thread != self: - Log.error("Only the main thread can sleep forever (waiting for KeyboardInterrupt)") + Log.error( + "Only the main thread can sleep forever (waiting for KeyboardInterrupt)" + ) if isinstance(please_stop, Signal): # MUTUAL SIGNALING MAKES THESE TWO EFFECTIVELY THE SAME SIGNAL - self.please_stop.on_go(please_stop.go) - please_stop.on_go(self.please_stop.go) + self.please_stop.then(please_stop.go) + please_stop.then(self.please_stop.go) else: please_stop = self.please_stop if not wait_forever: - # TRIGGER SIGNAL WHEN ALL CHILDREN THEADS ARE DONE - with self_thread.child_lock: + # TRIGGER SIGNAL WHEN ALL CHILDREN THREADS ARE DONE + with self_thread.child_locker: pending = copy(self_thread.children) - children_done = AndSignals(please_stop, len(pending)) - children_done.signal.on_go(self.please_stop.go) + children_done = AndSignals(self.please_stop, len(pending)) + children_done.signal.then(self.please_stop.go) for p in pending: - p.stopped.on_go(children_done.done) + p.stopped.then(children_done.done) try: if allow_exit: - _wait_for_exit(please_stop) + _wait_for_exit(self.please_stop) else: - _wait_for_interrupt(please_stop) + _wait_for_interrupt(self.please_stop) + Log.alert("Stop requested! Stopping...") except KeyboardInterrupt as _: Log.alert("SIGINT Detected! Stopping...") except SystemExit as _: @@ -207,24 +242,26 @@ class Thread(BaseThread): num_threads = 0 def __init__(self, name, target, *args, **kwargs): - BaseThread.__init__(self, -1) - self.name = name + BaseThread.__init__(self, -1, coalesce(name, "thread_" + text(object.__hash__(self)))) self.target = target self.end_of_thread = Data() - self.synch_lock = Lock("response synch lock") self.args = args # ENSURE THERE IS A SHARED please_stop SIGNAL self.kwargs = copy(kwargs) - self.kwargs["please_stop"] = self.kwargs.get("please_stop", Signal("please_stop for " + self.name)) - self.please_stop = self.kwargs["please_stop"] + self.please_stop = self.kwargs.get(PLEASE_STOP) + if self.please_stop is None: + self.please_stop = self.kwargs[PLEASE_STOP] = Signal( + "please_stop for " + self.name + ) self.thread = None + self.ready_to_stop = Signal("joining with " + self.name) self.stopped = Signal("stopped signal for " + self.name) - if "parent_thread" in kwargs: - del self.kwargs["parent_thread"] - self.parent = kwargs["parent_thread"] + if PARENT_THREAD in kwargs: + del self.kwargs[PARENT_THREAD] + self.parent = kwargs[PARENT_THREAD] else: self.parent = Thread.current() self.parent.add_child(self) @@ -252,7 +289,7 @@ class Thread(BaseThread): """ SEND STOP SIGNAL, DO NOT BLOCK """ - with self.child_lock: + with self.child_locker: children = copy(self.children) for c in children: DEBUG and c.name and Log.note("Stopping thread {{name|quote}}", name=c.name) @@ -268,50 +305,101 @@ class Thread(BaseThread): if self.target is not None: a, k, self.args, self.kwargs = self.args, self.kwargs, None, None self.end_of_thread.response = self.target(*a, **k) - self.parent.remove_child(self) # IF THREAD ENDS OK, THEN FORGET ABOUT IT except Exception as e: e = Except.wrap(e) - with self.synch_lock: - self.end_of_thread.exception = e - with self.parent.child_lock: + self.end_of_thread.exception = e + with self.parent.child_locker: emit_problem = self not in self.parent.children if emit_problem: # THREAD FAILURES ARE A PROBLEM ONLY IF NO ONE WILL BE JOINING WITH IT try: - Log.fatal("Problem in thread {{name|quote}}", name=self.name, cause=e) + Log.error( + "Problem in thread {{name|quote}}", name=self.name, cause=e + ) except Exception: - sys.stderr.write(str("ERROR in thread: " + self.name + " " + text_type(e) + "\n")) + sys.stderr.write( + str("ERROR in thread: " + self.name + " " + text(e) + "\n") + ) finally: try: - with self.child_lock: + with self.child_locker: children = copy(self.children) for c in children: try: - DEBUG and sys.stdout.write(str("Stopping thread " + c.name + "\n")) + DEBUG and Log.note("Stopping thread " + c.name + "\n") c.stop() except Exception as e: - Log.warning("Problem stopping thread {{thread}}", thread=c.name, cause=e) + Log.warning( + "Problem stopping thread {{thread}}", + thread=c.name, + cause=e, + ) for c in children: try: - DEBUG and sys.stdout.write(str("Joining on thread " + c.name + "\n")) + DEBUG and Log.note("Joining on thread " + c.name + "\n") c.join() except Exception as e: - Log.warning("Problem joining thread {{thread}}", thread=c.name, cause=e) + Log.warning( + "Problem joining thread {{thread}}", + thread=c.name, + cause=e, + ) finally: - DEBUG and sys.stdout.write(str("Joined on thread " + c.name + "\n")) + DEBUG and Log.note("Joined on thread " + c.name + "\n") del self.target, self.args, self.kwargs DEBUG and Log.note("thread {{name|quote}} stopping", name=self.name) except Exception as e: - DEBUG and Log.warning("problem with thread {{name|quote}}", cause=e, name=self.name) + DEBUG and Log.warning( + "problem with thread {{name|quote}}", cause=e, name=self.name + ) finally: + if not self.ready_to_stop: + DEBUG and Log.note("thread {{name|quote}} is done, wait for join", name=self.name) + # WHERE DO WE PUT THE THREAD RESULT? + # IF NO THREAD JOINS WITH THIS, THEN WHAT DO WE DO WITH THE RESULT? + # HOW LONG DO WE WAIT FOR ANOTHER TO ACCEPT THE RESULT? + # + # WAIT 60seconds, THEN SEND RESULT TO LOGGER + (Till(seconds=60) | self.ready_to_stop).wait() + self.stopped.go() - DEBUG and Log.note("thread {{name|quote}} is done", name=self.name) + + if not self.ready_to_stop: + if self.end_of_thread.exception: + # THREAD FAILURES ARE A PROBLEM ONLY IF NO ONE WILL BE JOINING WITH IT + try: + Log.error( + "Problem in thread {{name|quote}}", name=self.name, cause=e + ) + except Exception: + sys.stderr.write( + str("ERROR in thread: " + self.name + " " + text(e) + "\n") + ) + elif self.end_of_thread.response != None: + Log.warning( + "Thread {{thread}} returned a response, but was not joined with {{parent}} after 10min", + thread=self.name, + parent=self.parent.name + ) + else: + # IF THREAD ENDS OK, AND NOTHING RETURNED, THEN FORGET ABOUT IT + self.parent.remove_child(self) def is_alive(self): return not self.stopped + def release(self): + """ + RELEASE THREAD TO FEND FOR ITSELF. THREAD CAN EXPECT TO NEVER + JOIN. WILL SEND RESULTS TO LOGS WHEN DONE. + + PARENT THREAD WILL STILL ENSURE self HAS STOPPED PROPERLY + """ + self.ready_to_stop.go() + return self + def join(self, till=None): """ RETURN THE RESULT {"response":r, "exception":e} OF THE THREAD EXECUTION (INCLUDING EXCEPTION, IF EXISTS) @@ -319,29 +407,40 @@ class Thread(BaseThread): if self is Thread: Log.error("Thread.join() is not a valid call, use t.join()") - with self.child_lock: + with self.child_locker: children = copy(self.children) for c in children: c.join(till=till) - DEBUG and Log.note("{{parent|quote}} waiting on thread {{child|quote}}", parent=Thread.current().name, child=self.name) + DEBUG and Log.note( + "{{parent|quote}} waiting on thread {{child|quote}}", + parent=Thread.current().name, + child=self.name, + ) + self.ready_to_stop.go() (self.stopped | till).wait() if self.stopped: self.parent.remove_child(self) if not self.end_of_thread.exception: return self.end_of_thread.response else: - Log.error("Thread {{name|quote}} did not end well", name=self.name, cause=self.end_of_thread.exception) + Log.error( + "Thread {{name|quote}} did not end well", + name=self.name, + cause=self.end_of_thread.exception, + ) else: - raise Except(type=THREAD_TIMEOUT) + raise Except(context=THREAD_TIMEOUT) @staticmethod def run(name, target, *args, **kwargs): # ENSURE target HAS please_stop ARGUMENT - if get_function_name(target) == 'wrapper': + if get_function_name(target) == "wrapper": pass # GIVE THE override DECORATOR A PASS - elif "please_stop" not in target.__code__.co_varnames: - Log.error("function must have please_stop argument for signalling emergency shutdown") + elif PLEASE_STOP not in target.__code__.co_varnames: + Log.error( + "function must have please_stop argument for signalling emergency shutdown" + ) Thread.num_threads += 1 @@ -361,16 +460,25 @@ class Thread(BaseThread): thread.cprofiler.__enter__() with ALL_LOCK: ALL[ident] = thread - Log.warning("this thread is not known. Register this thread at earliest known entry point.") + Log.warning( + "this thread is not known. Register this thread at earliest known entry point." + ) return thread return output class RegisterThread(object): + """ + A context manager to handle threads spawned by other libs + This will ensure the thread has unregistered, or + has completed before MAIN_THREAD is shutdown + """ - def __init__(self, thread=None): + __slots__ = ["thread"] + + def __init__(self, thread=None, name=None): if thread is None: - thread = BaseThread(get_ident()) + thread = BaseThread(get_ident(), name) self.thread = thread def __enter__(self): @@ -382,30 +490,29 @@ class RegisterThread(object): def __exit__(self, exc_type, exc_val, exc_tb): self.thread.cprofiler.__exit__(exc_type, exc_val, exc_tb) + with self.thread.child_locker: + if self.thread.children: + Log.error( + "Thread {{thread|quote}} has not joined with child threads {{children|json}}", + children=[c.name for c in self.thread.children], + thread=self.thread.name + ) with ALL_LOCK: del ALL[self.thread.id] -def stop_main_thread(*args): +def register_thread(func): """ - CLEAN OF ALL THREADS CREATED WITH THIS LIBRARY + Call `with RegisterThread():` + Track this thread to ensure controlled shutdown """ - try: - if len(args) and args[0]: - Log.warning("exit with {{value}}", value=_describe_exit_codes.get(args[0], args[0])) - except Exception as _: - pass - finally: - MAIN_THREAD.stop() + @decorate(func) + def output(*args, **kwargs): + with RegisterThread(): + return func(*args, **kwargs) -_describe_exit_codes = { - _signal.SIGTERM: "SIGTERM", - _signal.SIGINT: "SIGINT" -} - -_signal.signal(_signal.SIGTERM, stop_main_thread) -_signal.signal(_signal.SIGINT, stop_main_thread) + return output def _wait_for_exit(please_stop): @@ -414,43 +521,58 @@ def _wait_for_exit(please_stop): """ cr_count = 0 # COUNT NUMBER OF BLANK LINES - while not please_stop: - # if DEBUG: - # Log.note("inside wait-for-shutdown loop") - if cr_count > 30: - (Till(seconds=3) | please_stop).wait() - try: - line = sys.stdin.readline() - except Exception as e: - Except.wrap(e) - if "Bad file descriptor" in e: - _wait_for_interrupt(please_stop) - break + try: + while not please_stop: + # DEBUG and Log.note("inside wait-for-shutdown loop") + if cr_count > 30: + (Till(seconds=3) | please_stop).wait() + try: + # line = "" + line = STDIN.readline() + except Exception as e: + Except.wrap(e) + if "Bad file descriptor" in e: + Log.note("can not read from stdin") + _wait_for_interrupt(please_stop) + break - # if DEBUG: - # Log.note("read line {{line|quote}}, count={{count}}", line=line, count=cr_count) - if line == "": - cr_count += 1 - else: - cr_count = -1000000 # NOT /dev/null + # DEBUG and Log.note("read line {{line|quote}}, count={{count}}", line=line, count=cr_count) + if not line: + cr_count += 1 + else: + cr_count = -1000000 # NOT /dev/null - if line.strip() == "exit": - Log.alert("'exit' Detected! Stopping...") - return + if line.strip() == b"exit": + Log.alert("'exit' Detected! Stopping...") + return + except Exception as e: + Log.warning("programming error", cause=e) + finally: + if please_stop: + Log.note("please_stop has been requested") + Log.note("done waiting for exit") def _wait_for_interrupt(please_stop): - DEBUG and Log.note("inside wait-for-shutdown loop") - while not please_stop: - try: - sleep(1) - except Exception: - pass + DEBUG and Log.note("wait for stop signal") + try: + # ALTERNATE BETWEEN please_stop CHECK AND SIGINT CHECK + while not please_stop: + sleep(1) # LOCKS CAN NOT BE INTERRUPTED, ONLY sleep() CAN + finally: + please_stop.go() MAIN_THREAD = MainThread() + +def stop_main_thread(signum=0, frame=None): + MAIN_THREAD.please_stop.go() + + +_signal.signal(_signal.SIGTERM, stop_main_thread) +_signal.signal(_signal.SIGINT, stop_main_thread) + ALL_LOCK = allocate_lock() ALL = dict() ALL[get_ident()] = MAIN_THREAD - diff --git a/vendor/mo_threads/till.py b/vendor/mo_threads/till.py index f48a05e..616d9dd 100644 --- a/vendor/mo_threads/till.py +++ b/vendor/mo_threads/till.py @@ -5,27 +5,26 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # # THIS THREADING MODULE IS PERMEATED BY THE please_stop SIGNAL. # THIS SIGNAL IS IMPORTANT FOR PROPER SIGNALLING WHICH ALLOWS # FOR FAST AND PREDICTABLE SHUTDOWN AND CLEANUP OF THREADS -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals from collections import namedtuple from time import sleep, time from weakref import ref -from mo_future import allocate_lock as _allocate_lock -from mo_future import text_type +from mo_future import allocate_lock as _allocate_lock, text from mo_logs import Log -from mo_threads.signal import Signal, DONE + +from mo_threads.signals import DONE, Signal DEBUG = False INTERVAL = 0.1 +enabled = Signal() class Till(Signal): @@ -36,11 +35,11 @@ class Till(Signal): locker = _allocate_lock() next_ping = time() - enabled = False new_timers = [] def __new__(cls, till=None, seconds=None): - if not Till.enabled: + if not enabled: + Log.note("Till daemon not enabled") return DONE elif till != None: return object.__new__(cls) @@ -53,7 +52,8 @@ class Till(Signal): def __init__(self, till=None, seconds=None): """ - ONE OF THESE PARAMETERS IS REQUIRED + Signal after some elapsed time: Till(seconds=1).wait() + :param till: UNIX TIMESTAMP OF WHEN TO SIGNAL :param seconds: PREFERRED OVER timeout """ @@ -61,15 +61,15 @@ class Till(Signal): if till != None: if not isinstance(till, (float, int)): from mo_logs import Log - Log.error("Date objects for Till are no longer allowed") timeout = till elif seconds != None: timeout = now + seconds else: + from mo_logs import Log raise Log.error("Should not happen") - Signal.__init__(self, name=text_type(timeout)) + Signal.__init__(self, name=text(timeout)) with Till.locker: if timeout != None: @@ -78,7 +78,8 @@ class Till(Signal): def daemon(please_stop): - Till.enabled = True + global enabled + enabled.go() sorted_timers = [] try: @@ -139,7 +140,7 @@ def daemon(please_stop): Log.warning("unexpected timer shutdown", cause=e) finally: DEBUG and Log.alert("TIMER SHUTDOWN") - Till.enabled = False + enabled = Signal() # TRIGGER ALL REMAINING TIMERS RIGHT NOW with Till.locker: new_work, Till.new_timers = Till.new_timers, [] diff --git a/vendor/mo_times/__init__.py b/vendor/mo_times/__init__.py index cac06bc..4fb0ddc 100644 --- a/vendor/mo_times/__init__.py +++ b/vendor/mo_times/__init__.py @@ -5,16 +5,13 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - +from __future__ import absolute_import, division, unicode_literals from mo_times.dates import Date -from mo_times.durations import Duration, ZERO, SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, YEAR +from mo_times.durations import DAY, Duration, HOUR, MINUTE, MONTH, QUARTER, SECOND, WEEK, YEAR, ZERO from mo_times.timer import Timer diff --git a/vendor/mo_times/dates.py b/vendor/mo_times/dates.py index 22df095..b6ff297 100644 --- a/vendor/mo_times/dates.py +++ b/vendor/mo_times/dates.py @@ -5,25 +5,23 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import math import re -from datetime import datetime, date, timedelta +from datetime import date, datetime, timedelta from decimal import Decimal from time import time as _time -from mo_dots import Null -from mo_future import unichr, text_type, long +import mo_math +from mo_dots import Null, NullType, coalesce +from mo_future import is_text, PY3 +from mo_future import long, none_type, text, unichr from mo_logs import Except from mo_logs.strings import deformat -from mo_math import Math - from mo_times.durations import Duration, MILLI_VALUES from mo_times.vendor.dateutil.parser import parse as parse_date @@ -35,6 +33,7 @@ except Exception: pass ISO8601 = '%Y-%m-%dT%H:%M:%SZ' +RFC1123 = '%a, %d %b %Y %H:%M:%S GMT' class Date(object): @@ -56,14 +55,39 @@ class Date(object): def __hash__(self): return self.unix.__hash__() - def __eq__(self, val): - if val is not None and type(val) == Date: - return self.unix == val.unix - return False + def __eq__(self, other): + try: + type_ = other.__class__ + if type_ in (none_type, NullType): + return False + elif type_ is Date: + return self.unix == other.unix + elif type_ in (float, int): + return self.unix == other + other = Date(other) + return self.unix == other.unix + except Exception: + return False def __nonzero__(self): return True + def __float__(self): + return self.unix + + def __int__(self): + return int(self.unix) + + def ceiling(self, duration=Null): + if duration.month: + from mo_logs import Log + + Log.error("do not know how to handle") + + neg_self = _unix2Date(-self.unix) + neg_floor = neg_self.floor(duration) + return _unix2Date(-neg_floor.unix) + def floor(self, duration=None): if duration is None: # ASSUME DAY return _unix2Date(math.floor(self.unix / 86400) * 86400) @@ -74,19 +98,26 @@ class Date(object): month -= 12*year return Date(datetime(year, month+1, 1)) elif duration.milli % (7 * 86400000) == 0: - offset = 4*86400 + offset = 4 * 86400 return _unix2Date(math.floor((self.unix + offset) / duration.seconds) * duration.seconds - offset) else: return _unix2Date(math.floor(self.unix / duration.seconds) * duration.seconds) def format(self, format="%Y-%m-%d %H:%M:%S"): try: - return text_type(unix2datetime(self.unix).strftime(format)) + return text(unix2datetime(self.unix).strftime(format)) except Exception as e: from mo_logs import Log Log.error("Can not format {{value}} with {{format}}", value=unix2datetime(self.unix), format=format, cause=e) + @property + def datetime(self): + """ + RETURN AS PYTHON DATETIME (GMT) + """ + return datetime.utcfromtimestamp(self.unix) + @property def milli(self): return self.unix*1000 @@ -151,7 +182,9 @@ class Date(object): @staticmethod def today(): - return _unix2Date(math.floor(_time() / 86400) * 86400) + now = _utcnow() + now_unix = datetime2unix(now) + return _unix2Date(math.floor(now_unix / 86400) * 86400) @staticmethod def range(min, max, interval): @@ -164,7 +197,7 @@ class Date(object): return str(unix2datetime(self.unix)) def __repr__(self): - return unix2datetime(self.unix).__repr__() + unix2datetime(self.unix).__repr__() def __sub__(self, other): if other == None: @@ -178,37 +211,26 @@ class Date(object): def __lt__(self, other): try: - if other == None: + type_ = other.__class__ + if type_ in (none_type, NullType): return False - elif isinstance(other, Date): + elif type_ is Date: return self.unix < other.unix - elif isinstance(other, (float, int)): + elif type_ in (float, int): return self.unix < other other = Date(other) return self.unix < other.unix except Exception: return False - def __eq__(self, other): - try: - if other == None: - return False - elif isinstance(other, Date): - return self.unix == other.unix - elif isinstance(other, (float, int)): - return self.unix == other - other = Date(other) - return self.unix == other.unix - except Exception: - return False - def __le__(self, other): try: - if other == None: + type_ = other.__class__ + if type_ in (none_type, NullType): return False - elif isinstance(other, Date): + elif type_ is Date: return self.unix <= other.unix - elif isinstance(other, (float, int)): + elif type_ in (float, int): return self.unix <= other other = Date(other) return self.unix <= other.unix @@ -217,11 +239,12 @@ class Date(object): def __gt__(self, other): try: - if other == None: + type_ = other.__class__ + if type_ in (none_type, NullType): return False - elif isinstance(other, Date): + elif type_ is Date: return self.unix > other.unix - elif isinstance(other, (float, int)): + elif type_ in (float, int): return self.unix > other other = Date(other) return self.unix > other.unix @@ -230,11 +253,12 @@ class Date(object): def __ge__(self, other): try: - if other == None: + type_ = other.__class__ + if type_ in (none_type, NullType): return False - elif isinstance(other, Date): + elif type_ is Date: return self.unix >= other.unix - elif isinstance(other, (float, int)): + elif type_ in (float, int): return self.unix >= other other = Date(other) return self.unix >= other.unix @@ -257,6 +281,16 @@ class Date(object): output = v return output + @classmethod + def max(cls, *values): + output = Null + for v in values: + if output == None and v != None: + output = v + elif v < output: + output = v + return output + def parse(*args): try: @@ -272,18 +306,18 @@ def parse(*args): output = _unix2Date(a0 / 1000) else: output = _unix2Date(a0) - elif isinstance(a0, text_type) and len(a0) in [9, 10, 12, 13] and Math.is_integer(a0): + elif is_text(a0) and len(a0) in [9, 10, 12, 13] and mo_math.is_integer(a0): a0 = float(a0) if a0 > 9999999999: # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP output = _unix2Date(a0 / 1000) else: output = _unix2Date(a0) - elif isinstance(a0, text_type): + elif is_text(a0): output = unicode2Date(a0) else: output = _unix2Date(datetime2unix(datetime(*args))) else: - if isinstance(args[0], text_type): + if is_text(args[0]): output = unicode2Date(*args) else: output = _unix2Date(datetime2unix(datetime(*args))) @@ -405,7 +439,7 @@ def unicode2Date(value, format=None): try: # 2.7 DOES NOT SUPPORT %z local_value = parse_date(value) #eg 2014-07-16 10:57 +0200 - return _unix2Date(datetime2unix((local_value - local_value.utcoffset()).replace(tzinfo=None))) + return _unix2Date(datetime2unix((local_value - coalesce(local_value.utcoffset(), 0)).replace(tzinfo=None))) except Exception as e: e = Except.wrap(e) # FOR DEBUGGING pass @@ -431,6 +465,11 @@ def unicode2Date(value, format=None): "%d%b%y", "%d%B%Y", "%d%B%y", + "%B%d%Y", + "%b%d%Y", + "%B%d%", + "%b%d%y", + "%Y%m%d%H%M%S%f", "%Y%m%d%H%M%S", "%Y%m%dT%H%M%S", "%d%m%Y%H%M%S", @@ -452,7 +491,11 @@ def unicode2Date(value, format=None): Log.error("Can not interpret {{value}} as a datetime", value=value) -DATETIME_EPOCH = datetime(1970, 1, 1) +if PY3: + from datetime import timezone + DATETIME_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc) +else: + DATETIME_EPOCH = datetime(1970, 1, 1) DATE_EPOCH = date(1970, 1, 1) @@ -461,7 +504,10 @@ def datetime2unix(value): if value == None: return None elif isinstance(value, datetime): - diff = value - DATETIME_EPOCH + if value.tzinfo: + diff = value - DATETIME_EPOCH + else: + diff = value - DATETIME_EPOCH.replace(tzinfo=None) return diff.total_seconds() elif isinstance(value, date): diff = value - DATE_EPOCH @@ -477,7 +523,11 @@ def datetime2unix(value): def unix2datetime(unix): if unix == None: return Null - return datetime.utcfromtimestamp(unix) + try: + return datetime.utcfromtimestamp(unix) + except Exception as e: + from mo_logs import Log + Log.error("Can not convert {{value}} to datetime", value=unix, cause=e) def unix2Date(unix): @@ -508,9 +558,15 @@ def deformat(value): return "".join(output) -Date.MIN = Date(datetime(1, 1, 1)) -Date.MAX = Date(datetime(2286, 11, 20, 17, 46, 39)) -Date.EPOCH = _unix2Date(0) +if PY3: + from datetime import timezone + Date.MIN = Date(datetime(1, 1, 1, tzinfo=timezone.utc)) + Date.MAX = Date(datetime(2286, 11, 20, 17, 46, 39, tzinfo=timezone.utc)) + Date.EPOCH = _unix2Date(0) +else: + Date.MIN = Date(datetime(1, 1, 1)) + Date.MAX = Date(datetime(2286, 11, 20, 17, 46, 39)) + Date.EPOCH = _unix2Date(0) def _mod(value, mod=1): """ diff --git a/vendor/mo_times/durations.py b/vendor/mo_times/durations.py index cdcf22f..133ad24 100644 --- a/vendor/mo_times/durations.py +++ b/vendor/mo_times/durations.py @@ -5,18 +5,16 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import datetime import re from mo_dots import get_module, wrap -from mo_future import text_type -from mo_math import MIN, Math +from mo_future import is_text, text +from mo_math import MIN, is_nan, is_number, abs, floor, round from mo_times.vendor.dateutil.relativedelta import relativedelta _Date = None @@ -47,17 +45,17 @@ class Duration(object): else: return None - if Math.is_number(value): + if is_number(value): output._milli = float(value) * 1000 output.month = 0 return output - elif isinstance(value, text_type): + elif is_text(value): return parse(value) elif isinstance(value, Duration): output.milli = value.milli output.month = value.month return output - elif isinstance(value, float) and Math.is_nan(value): + elif isinstance(value, float) and is_nan(value): return None else: from mo_logs import Log @@ -126,7 +124,7 @@ class Duration(object): r = r - tod if m == 0 and r > (MILLI_VALUES.year / 3): - m = Math.floor(12 * self.milli / MILLI_VALUES.year) + m = floor(12 * self.milli / MILLI_VALUES.year) r -= (m / 12) * MILLI_VALUES.year else: r = r - (self.month * MILLI_VALUES.month) @@ -135,9 +133,9 @@ class Duration(object): Log.error("Do not know how to handle") r = MIN([29 / 30, (r + tod) / (MILLI_VALUES.day * 30)]) - output = Math.floor(m / amount.month) + r + output = floor(m / amount.month) + r return output - elif Math.is_number(amount): + elif is_number(amount): output = Duration(0) output.milli = self.milli / amount output.month = self.month / amount @@ -199,15 +197,15 @@ class Duration(object): output = Duration(0) if interval.month: if self.month: - output.month = int(Math.floor(self.month / interval.month) * interval.month) + output.month = int(floor(self.month / interval.month) * interval.month) output.milli = output.month * MILLI_VALUES.month return output # A MONTH OF DURATION IS BIGGER THAN A CANONICAL MONTH - output.month = int(Math.floor(self.milli * 12 / MILLI_VALUES["year"] / interval.month) * interval.month) + output.month = int(floor(self.milli * 12 / MILLI_VALUES["year"] / interval.month) * interval.month) output.milli = output.month * MILLI_VALUES.month else: - output.milli = Math.floor(self.milli / (interval.milli)) * (interval.milli) + output.milli = floor(self.milli / (interval.milli)) * (interval.milli) return output @property @@ -228,6 +226,9 @@ class Duration(object): def total_seconds(self): return float(self.milli) / 1000 + def __float__(self): + return self.seconds + def __str__(self): return str(self.__unicode__()) @@ -238,31 +239,31 @@ class Duration(object): output = "" rest = (self.milli - (MILLI_VALUES.month * self.month)) # DO NOT INCLUDE THE MONTH'S MILLIS isNegative = (rest < 0) - rest = Math.abs(rest) + rest = abs(rest) # MILLI rem = rest % 1000 if rem != 0: - output = "+" + text_type(rem) + "milli" + output - rest = Math.floor(rest / 1000) + output = "+" + text(rem) + "milli" + output + rest = floor(rest / 1000) # SECOND rem = rest % 60 if rem != 0: - output = "+" + text_type(rem) + "second" + output - rest = Math.floor(rest / 60) + output = "+" + text(rem) + "second" + output + rest = floor(rest / 60) # MINUTE rem = rest % 60 if rem != 0: - output = "+" + text_type(rem) + "minute" + output - rest = Math.floor(rest / 60) + output = "+" + text(rem) + "minute" + output + rest = floor(rest / 60) # HOUR rem = rest % 24 if rem != 0: - output = "+" + text_type(rem) + "hour" + output - rest = Math.floor(rest / 24) + output = "+" + text(rem) + "hour" + output + rest = floor(rest / 24) # DAY if (rest < 11 and rest != 7) or rest % 10 == 0: @@ -270,14 +271,14 @@ class Duration(object): rest = 0 else: rem = rest % 7 - rest = Math.floor(rest / 7) + rest = floor(rest / 7) if rem != 0: - output = "+" + text_type(rem) + "day" + output + output = "+" + text(rem) + "day" + output # WEEK if rest != 0: - output = "+" + text_type(rest) + "week" + output + output = "+" + text(rest) + "week" + output if isNegative: output = output.replace("+", "-") @@ -285,20 +286,20 @@ class Duration(object): # MONTH AND YEAR if self.month: sign = "-" if self.month < 0 else "+" - month = Math.abs(self.month) + month = abs(self.month) if month <= 18 and month != 12: - output = sign + text_type(month) + "month" + output + output = sign + text(month) + "month" + output else: m = month % 12 if m != 0: - output = sign + text_type(m) + "month" + output - y = Math.floor(month / 12) - output = sign + text_type(y) + "year" + output + output = sign + text(m) + "month" + output + y = floor(month / 12) + output = sign + text(y) + "year" + output if output[0] == "+": output = output[1::] - if output[0] == '1' and not Math.is_number(output[1]): + if output[0] == '1' and not is_number(output[1]): output = output[1::] return output @@ -308,7 +309,7 @@ class Duration(object): def round(self, interval, decimal=0): output = self / interval - output = Math.round(output, decimal) + output = round(output, decimal) return output diff --git a/vendor/mo_times/timer.py b/vendor/mo_times/timer.py index 447b611..18426c5 100644 --- a/vendor/mo_times/timer.py +++ b/vendor/mo_times/timer.py @@ -4,12 +4,10 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals from datetime import timedelta from time import time @@ -33,10 +31,10 @@ class Timer(object): debug - SET TO False TO DISABLE THIS TIMER """ - def __init__(self, description, param=None, silent=False, too_long=0): + def __init__(self, description, param=None, silent=None, verbose=None, too_long=0): self.template = description self.param = wrap(coalesce(param, {})) - self.silent = silent + self.verbose = coalesce(verbose, False if silent is True else True) self.agg = 0 self.too_long = too_long # ONLY SHOW TIMING FOR DURATIONS THAT ARE too_long self.start = 0 @@ -44,7 +42,7 @@ class Timer(object): self.interval = None def __enter__(self): - if not self.silent and self.too_long == 0: + if self.verbose and self.too_long == 0: Log.note("Timer start: " + self.template, stack_depth=1, **self.param) self.start = time() return self @@ -54,7 +52,7 @@ class Timer(object): self.interval = self.end - self.start self.agg += self.interval self.param.duration = timedelta(seconds=self.interval) - if not self.silent: + if self.verbose: if self.too_long == 0: Log.note("Timer end : " + self.template + " (took {{duration}})", default_params=self.param, stack_depth=1) elif self.interval >= self.too_long: diff --git a/vendor/mo_times/vendor/dateutil/parser.py b/vendor/mo_times/vendor/dateutil/parser.py index 6f59803..e748906 100644 --- a/vendor/mo_times/vendor/dateutil/parser.py +++ b/vendor/mo_times/vendor/dateutil/parser.py @@ -5,19 +5,14 @@ Copyright (c) 2003-2007 Gustavo Niemeyer This module offers extensions to the standard Python datetime module. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -import collections import datetime import string import time -from mo_future import text_type, integer_types, binary_type, StringIO - -from . import relativedelta -from . import tz +from mo_future import StringIO, integer_types, is_binary, is_text, Callable +from . import relativedelta, tz __license__ = "Simplified BSD" __all__ = ["parse", "parserinfo"] @@ -36,7 +31,7 @@ __all__ = ["parse", "parserinfo"] class _timelex(object): def __init__(self, instream): - if isinstance(instream, text_type): + if is_text(instream): instream = StringIO(instream) self.instream = instream self.wordchars = ('abcdfeghijklmnopqrstuvwxyz' @@ -311,14 +306,14 @@ class parser(object): if res.weekday is not None and not res.day: ret = ret+relativedelta.relativedelta(weekday=res.weekday) if not ignoretz: - if isinstance(tzinfos, collections.Callable) or tzinfos and res.tzname in tzinfos: - if isinstance(tzinfos, collections.Callable): + if isinstance(tzinfos, Callable) or tzinfos and res.tzname in tzinfos: + if isinstance(tzinfos, Callable): tzdata = tzinfos(res.tzname, res.tzoffset) else: tzdata = tzinfos.get(res.tzname) if isinstance(tzdata, datetime.tzinfo): tzinfo = tzdata - elif isinstance(tzdata, text_type): + elif is_text(tzdata): tzinfo = tz.tzstr(tzdata) elif isinstance(tzdata, integer_types): tzinfo = tz.tzoffset(res.tzname, tzdata) @@ -705,7 +700,7 @@ def parse(timestr, parserinfo=None, **kwargs): # Python 2.x support: datetimes return their string presentation as # bytes in 2.x and unicode in 3.x, so it's reasonable to expect that # the parser will get both kinds. Internally we use unicode only. - if isinstance(timestr, binary_type): + if is_binary(timestr): timestr = timestr.decode() if parserinfo: return parser(parserinfo).parse(timestr, **kwargs) diff --git a/vendor/mo_times/vendor/dateutil/rrule.py b/vendor/mo_times/vendor/dateutil/rrule.py index 7f45909..f6ba302 100644 --- a/vendor/mo_times/vendor/dateutil/rrule.py +++ b/vendor/mo_times/vendor/dateutil/rrule.py @@ -6,9 +6,10 @@ datetime module. """ __license__ = "Simplified BSD" -import itertools -import datetime import calendar +import datetime +import itertools + try: import _thread except ImportError: diff --git a/vendor/mo_times/vendor/dateutil/tzwin.py b/vendor/mo_times/vendor/dateutil/tzwin.py index 041c6cc..d3d9892 100644 --- a/vendor/mo_times/vendor/dateutil/tzwin.py +++ b/vendor/mo_times/vendor/dateutil/tzwin.py @@ -1,8 +1,8 @@ # This code was originally contributed by Jeffrey Harris. import datetime import struct -import winreg +import winreg __all__ = ["tzwin", "tzwinlocal"] diff --git a/vendor/mo_times/vendor/dateutil/zoneinfo/__init__.py b/vendor/mo_times/vendor/dateutil/zoneinfo/__init__.py index a1b3487..ccdd9f0 100644 --- a/vendor/mo_times/vendor/dateutil/zoneinfo/__init__.py +++ b/vendor/mo_times/vendor/dateutil/zoneinfo/__init__.py @@ -5,9 +5,10 @@ Copyright (c) 2003-2005 Gustavo Niemeyer This module offers extensions to the standard Python datetime module. """ -from dateutil.tz import tzfile -from tarfile import TarFile import os +from tarfile import TarFile + +from dateutil.tz import tzfile __author__ = "Tomi Pieviläinen " __license__ = "Simplified BSD" diff --git a/vendor/pyLibrary/aws/__init__.py b/vendor/pyLibrary/aws/__init__.py index 87095cf..92598e4 100644 --- a/vendor/pyLibrary/aws/__init__.py +++ b/vendor/pyLibrary/aws/__init__.py @@ -5,31 +5,26 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -import requests import time -from boto import sqs -from boto import utils as boto_utils -from boto.sqs.message import Message -from mo_times import timer -from mo_dots import wrap, unwrap, coalesce +from boto import sqs, utils as boto_utils +from boto.sqs.message import Message +import requests + +from mo_dots import coalesce, unwrap, wrap +import mo_json from mo_json import value2json from mo_kwargs import override from mo_logs import Log, machine_metadata -from mo_math import Math -from mo_threads import Thread - -import mo_json from mo_logs.exceptions import Except, suppress_exception -from mo_threads.signal import Signal -from mo_threads.till import Till -from mo_times.durations import SECOND, Duration +import mo_math +from mo_threads import Thread, Till, Signal +from mo_times import timer +from mo_times.durations import Duration, SECOND class Queue(object): @@ -54,7 +49,7 @@ class Queue(object): aws_access_key_id=unwrap(kwargs.aws_access_key_id), aws_secret_access_key=unwrap(kwargs.aws_secret_access_key), ) - self.queue = conn.get_queue(kwargs.name) + self.queue = conn.get_queue(name) if self.queue == None: Log.error("Can not find queue with name {{queue}} in region {{region}}", queue=kwargs.name, region=kwargs.region) @@ -86,7 +81,7 @@ class Queue(object): if till is not None and not isinstance(till, Signal): Log.error("Expecting a signal") - m = self.queue.read(wait_time_seconds=Math.floor(wait.seconds)) + m = self.queue.read(wait_time_seconds=mo_math.floor(wait.seconds)) if not m: return None @@ -101,7 +96,7 @@ class Queue(object): if till is not None and not isinstance(till, Signal): Log.error("Expecting a signal") - message = self.queue.read(wait_time_seconds=Math.floor(wait.seconds)) + message = self.queue.read(wait_time_seconds=mo_math.floor(wait.seconds)) if not message: return None message.delete = lambda: self.queue.delete_message(message) @@ -110,8 +105,7 @@ class Queue(object): return message, payload def commit(self): - pending = self.pending - self.pending = [] + pending, self.pending = self.pending, [] for p in pending: self.queue.delete_message(p) @@ -119,16 +113,19 @@ class Queue(object): if self.pending: pending, self.pending = self.pending, [] - for p in pending: - m = Message() - m.set_body(p.get_body()) - self.queue.write(m) + try: + for p in pending: + m = Message() + m.set_body(p.get_body()) + self.queue.write(m) - for p in pending: - self.queue.delete_message(p) + for p in pending: + self.queue.delete_message(p) - if self.settings.debug: - Log.alert("{{num}} messages returned to queue", num=len(pending)) + if self.settings.debug: + Log.alert("{{num}} messages returned to queue", num=len(pending)) + except Exception as e: + Log.warning("Failed to return {{num}} messages to the queue", num=len(pending), cause=e) def close(self): self.commit() @@ -151,7 +148,7 @@ def capture_termination_signal(please_stop): except Exception as e: e = Except.wrap(e) if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e: - Log.note("AWS Spot Detection has shutdown, probably not a spot node, (http://169.254.169.254 is unreachable)") + Log.note("AWS Spot Detection has shutdown, this is probably not a spot node, (http://169.254.169.254 is unreachable)") return elif seen_problem: # IGNORE THE FIRST PROBLEM @@ -161,7 +158,7 @@ def capture_termination_signal(please_stop): (Till(seconds=61) | please_stop).wait() (Till(seconds=11) | please_stop).wait() - Thread.run("listen for termination", worker) + Thread.run("listen for termination", worker).release() def get_instance_metadata(timeout=None): @@ -195,6 +192,6 @@ def _get_metadata_from_from_aws(please_stop): machine_metadata.aws_instance_type = ec2.instance_type machine_metadata.name = ec2.instance_id -Thread.run("get aws machine metadata", _get_metadata_from_from_aws) +Thread.run("get aws machine metadata", _get_metadata_from_from_aws).release() from . import s3 diff --git a/vendor/pyLibrary/aws/s3.py b/vendor/pyLibrary/aws/s3.py index a37339a..b7bad75 100644 --- a/vendor/pyLibrary/aws/s3.py +++ b/vendor/pyLibrary/aws/s3.py @@ -5,11 +5,9 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals import gzip import zipfile @@ -19,17 +17,16 @@ import boto from boto.s3.connection import Location from bs4 import BeautifulSoup -from mo_dots import wrap, Null, coalesce, unwrap, Data +from mo_dots import Data, Null, coalesce, unwrap, wrap, is_many from mo_files.url import value2url_param -from mo_future import text_type, StringIO +from mo_future import StringIO, is_binary, text from mo_kwargs import override -from mo_logs import Log, Except -from mo_logs.strings import utf82unicode, unicode2utf8 +from mo_logs import Except, Log from mo_times.dates import Date from mo_times.timer import Timer from pyLibrary import convert -from pyLibrary.env import http -from pyLibrary.env.big_data import safe_size, MAX_STRING_SIZE, LazyLines, ibytes2ilines, scompressed2ibytes +from mo_http import http +from mo_http.big_data import LazyLines, MAX_STRING_SIZE, ibytes2ilines, safe_size, scompressed2ibytes TOO_MANY_KEYS = 1000 * 1000 * 1000 READ_ERROR = "S3 read error" @@ -264,7 +261,7 @@ class Bucket(object): elif source.key.endswith(".gz"): json = convert.zip2bytes(json) - return utf82unicode(json) + return json.decode('utf8') def read_bytes(self, key): source = self.get_meta(key) @@ -278,7 +275,7 @@ class Bucket(object): if source.key.endswith(".gz"): return LazyLines(ibytes2ilines(scompressed2ibytes(source))) else: - return utf82unicode(source.read()).split("\n") + return source.read().decode('utf8').split("\n") if source.key.endswith(".gz"): return LazyLines(ibytes2ilines(scompressed2ibytes(source))) @@ -310,16 +307,16 @@ class Bucket(object): if len(value) > 20 * 1000 and not disable_zip: self.bucket.delete_key(key + ".json") self.bucket.delete_key(key + ".json.gz") - if isinstance(value, str): + if is_binary(value): value = convert.bytes2zip(value) key += ".json.gz" else: - value = convert.bytes2zip(unicode2utf8(value)) + value = convert.bytes2zip(value).encode('utf8') key += ".json.gz" else: self.bucket.delete_key(key + ".json.gz") - if isinstance(value, str): + if is_binary(value): key += ".json" else: key += ".json" @@ -346,7 +343,7 @@ class Bucket(object): archive = gzip.GzipFile(fileobj=buff, mode='w') count = 0 for l in lines: - if hasattr(l, "__iter__"): + if is_many(l): for ll in l: archive.write(ll.encode("utf8")) archive.write(b"\n") @@ -362,7 +359,7 @@ class Bucket(object): retry = 3 while retry: try: - with Timer("Sending {{count}} lines in {{file_length|comma}} bytes", {"file_length": file_length, "count": count}, silent=not self.settings.debug): + with Timer("Sending {{count}} lines in {{file_length|comma}} bytes for {{key}}", {"key": key, "file_length": file_length, "count": count}, verbose=self.settings.debug): buff.seek(0) storage.set_contents_from_file(buff) break @@ -406,11 +403,11 @@ class SkeletonBucket(Bucket): content_keys={ - "key": text_type, + "key": text, "lastmodified": Date, - "etag": text_type, + "etag": text, "size": int, - "storageclass": text_type + "storageclass": text } @@ -450,7 +447,8 @@ class PublicBucket(object): state.get_more = data.find("istruncated").contents[0] == "true" contents = data.findAll("contents") - state.marker = contents[-1].find("key").contents[0] + if len(contents): + state.marker = contents[-1].find("key").contents[0] return [{k: t(d.find(k).contents[0]) for k, t in content_keys.items()} for d in contents] while state.get_more: diff --git a/vendor/pyLibrary/convert.py b/vendor/pyLibrary/convert.py index 9ef2957..0e18c39 100644 --- a/vendor/pyLibrary/convert.py +++ b/vendor/pyLibrary/convert.py @@ -4,13 +4,10 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, absolute_import, division, unicode_literals import ast import base64 @@ -26,8 +23,8 @@ from tempfile import TemporaryFile import mo_json import mo_math -from mo_dots import wrap, unwrap, unwraplist, concat_field -from mo_future import text_type, HTMLParser, StringIO, PY3, long +from mo_dots import concat_field, unwrap, unwraplist, wrap, is_many +from mo_future import HTMLParser, PY3, StringIO, is_binary, is_text, long, text from mo_logs import Log from mo_logs.exceptions import suppress_exception from mo_logs.strings import expand_template, quote @@ -43,7 +40,7 @@ json2value = mo_json.json2value def string2datetime(value, format=None): - return unix2datetime(Date(value, format).unix) + return Date(value, format).datetime def string2boolean(value): @@ -55,8 +52,25 @@ def string2boolean(value): return None +_v2b = { + True: True, + "true": True, + "T": True, + 1: True, + False: False, + "false": False, + "F": False, + 0: False, + None: None +} + + +def value2boolean(value): + return _v2b.get(value, True) + + def str2datetime(value, format=None): - return unix2datetime(Date(value, format).unix) + return Date(value, format).datetime def datetime2string(value, format="%Y-%m-%d %H:%M:%S"): @@ -162,65 +176,20 @@ def list2tab(rows): return "\t".join(keys) + "\n" + "\n".join(output) -def list2table(rows, column_names=None): - if column_names: - keys = list(set(column_names)) - else: - columns = set() - for r in rows: - columns |= set(r.keys()) - keys = list(columns) - - output = [[unwraplist(r.get(k)) for k in keys] for r in rows] - - return wrap({ - "meta": {"format": "table"}, - "header": keys, - "data": output - }) - - -def list2cube(rows, column_names=None): - if column_names: - keys = column_names - else: - columns = set() - for r in rows: - columns |= set(r.keys()) - keys = list(columns) - - data = {k: [] for k in keys} - output = wrap({ - "meta": {"format": "cube"}, - "edges": [ - { - "name": "rownum", - "domain": {"type": "rownum", "min": 0, "max": len(rows), "interval": 1} - } - ], - "data": data - }) - - for r in rows: - for k in keys: - data[k].append(unwraplist(r[k])) - - return output - def value2string(value): # PROPER NULL HANDLING if value == None: return None - return text_type(value) + return text(value) def value2quote(value): # RETURN PRETTY PYTHON CODE FOR THE SAME - if isinstance(value, text_type): + if is_text(value): return string2quote(value) else: - return text_type(repr(value)) + return text(repr(value)) def string2quote(value): @@ -233,9 +202,9 @@ string2regexp = re.escape def string2url(value): - if isinstance(value, text_type): + if is_text(value): return "".join([_map2url[c] for c in unicode2latin1(value)]) - elif isinstance(value, str): + elif is_binary(value): return "".join([_map2url[c] for c in value]) else: Log.error("Expecting a string") @@ -245,7 +214,7 @@ def string2url(value): # """ # CONVERT URL QUERY PARAMETERS INTO DICT # """ -# if isinstance(param, text_type): +# if is_text(param): # param = param.encode("ascii") # # def _decode(v): @@ -283,7 +252,7 @@ def string2url(value): # u = query.get(k) # if u is None: # query[k] = v -# elif isinstance(u, list): +# elif is_list(u): # u += [v] # else: # query[k] = [u, v] @@ -313,7 +282,7 @@ def quote2string(value): # RETURN PYTHON CODE FOR THE SAME def value2code(value): - return text_type(repr(value)) + return text(repr(value)) def DataFrame2string(df, columns=None): @@ -375,7 +344,7 @@ def bytes2base64(value): def bytes2sha1(value): - if isinstance(value, text_type): + if is_text(value): Log.error("can not convert unicode to sha1") sha = hashlib.sha1(value) return sha.hexdigest() @@ -384,7 +353,7 @@ def bytes2sha1(value): def value2intlist(value): if value == None: return [] - elif hasattr(value, '__iter__'): + elif is_many(value): output = [int(d) for d in value if d != "" and d != None] return output elif isinstance(value, int): @@ -394,6 +363,7 @@ def value2intlist(value): else: return [int(value)] + def value2int(value): if value == None: return None @@ -415,10 +385,10 @@ def value2number(v): def latin12unicode(value): - if isinstance(value, text_type): + if is_text(value): Log.error("can not convert unicode from latin1") try: - return text_type(value.decode('latin1')) + return text(value.decode('latin1')) except Exception as e: Log.error("Can not convert {{value|quote}} to unicode", value=value) @@ -450,7 +420,7 @@ def zip2bytes(compressed): buff = BytesIO(compressed) archive = gzip.GzipFile(fileobj=buff, mode='r') - from pyLibrary.env.big_data import safe_size + from mo_http.big_data import safe_size return safe_size(archive) @@ -465,7 +435,7 @@ def bytes2zip(bytes): archive.write(b) archive.close() buff.seek(0) - from pyLibrary.env.big_data import FileString, safe_size + from mo_http.big_data import FileString, safe_size return FileString(buff) buff = BytesIO() @@ -543,7 +513,7 @@ def json_schema_to_markdown(schema): def _inner(schema, parent_name, indent): more_lines = [] - for k,v in schema.items(): + for k, v in schema.items(): full_name = concat_field(parent_name, k) details = indent+"* "+_md_code(full_name) if v.type: @@ -560,7 +530,7 @@ def json_schema_to_markdown(schema): lines = [] if schema.title: - lines.append("#"+schema.title) + lines.append("# "+schema.title) lines.append(schema.description) lines.append("") @@ -568,7 +538,7 @@ def json_schema_to_markdown(schema): for k, v in jx.sort(schema.properties.items(), 0): full_name = k if v.type in ["object", "array", "nested"]: - lines.append("##"+_md_code(full_name)+" Property") + lines.append("## "+_md_code(full_name)+" Property") if v.description: lines.append(v.description) lines.append("") @@ -576,29 +546,23 @@ def json_schema_to_markdown(schema): if v.type in ["object", "array", "nested"]: lines.extend(_inner(v.properties, full_name, " ")) else: - lines.append("##"+_md_code(full_name)+" ("+v.type+")") + lines.append("## "+_md_code(full_name)+" ("+v.type+")") if v.description: lines.append(v.description) return "\n".join(lines) -def table2csv(table_data): - """ - :param table_data: expecting a list of tuples - :return: text in nice formatted csv - """ - text_data = [tuple(value2json(vals, pretty=True) for vals in rows) for rows in table_data] - - col_widths = [max(len(text) for text in cols) for cols in zip(*text_data)] - template = ", ".join( - "{{" + text_type(i) + "|left_align(" + text_type(w) + ")}}" - for i, w in enumerate(col_widths) - ) - text = "\n".join(expand_template(template, d) for d in text_data) - return text ZeroMoment2dict = mo_math.stats.ZeroMoment2dict +def text2QRCode(value): + from qrcode import QRCode + qr = QRCode() + qr.add_data(value) + qr_code = StringIO() + qr.print_ascii(out=qr_code) + ascii = qr_code.getvalue() + return ascii diff --git a/vendor/pyLibrary/env/README.md b/vendor/pyLibrary/env/README.md index 8b61e3d..923e47d 100644 --- a/vendor/pyLibrary/env/README.md +++ b/vendor/pyLibrary/env/README.md @@ -17,31 +17,3 @@ of settings. For connecting clients to [Mozilla's Pulse](https://pulse.mozilla.org/). -## elasticsearch - -This module handles the lifecycle of an Elasticsearch index in the context of -ETL. You only need this module if you are creating and retiring indexes. You -do not need this module for simply searching; for that I suggest using the -rest API directly. - -### Settings ### - -Both ```Cluster``` and ```Index``` objects accept the same settings dict, -selecting only the properties it requires. - - { - "host" : "http://192.168.0.98", - "port" : 9200, - "index" : "b2g_tests", - "type" : "test_result", - "debug" : true, - "limit_replicas" : true, - "schema_file" : "resources/schema/test_schema.json" - }, - - - -## Cluster - - -## Index diff --git a/vendor/pyLibrary/env/emailer.py b/vendor/pyLibrary/env/emailer.py index b192973..52b3032 100644 --- a/vendor/pyLibrary/env/emailer.py +++ b/vendor/pyLibrary/env/emailer.py @@ -5,23 +5,20 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import +from __future__ import absolute_import, division, unicode_literals +from mo_future import is_text, is_binary +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText import smtplib import sys -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText - -from mo_logs import Log -from mo_dots import listwrap -from mo_dots import coalesce +from mo_dots import coalesce, listwrap from mo_kwargs import override +from mo_logs import Log class Emailer: diff --git a/vendor/pyLibrary/env/flask_wrappers.py b/vendor/pyLibrary/env/flask_wrappers.py index cc6c026..7407552 100644 --- a/vendor/pyLibrary/env/flask_wrappers.py +++ b/vendor/pyLibrary/env/flask_wrappers.py @@ -4,21 +4,24 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals + +from functools import update_wrapper +from ssl import PROTOCOL_SSLv23, SSLContext import flask from flask import Response -from mo_dots import coalesce -from mo_files import File +from mo_dots import coalesce, is_data +from mo_files import File, TempFile, URL, mimetype +from mo_future import decorate, text from mo_json import value2json from mo_logs import Log -from mo_logs.strings import unicode2utf8 -from pyLibrary.env.big_data import ibytes2icompressed +from mo_threads.threads import register_thread, Thread +from pyLibrary.env import git +from mo_http.big_data import ibytes2icompressed TOO_SMALL_TO_COMPRESS = 510 # DO NOT COMPRESS DATA WITH LESS THAN THIS NUMBER OF BYTES @@ -28,11 +31,11 @@ def gzip_wrapper(func, compress_lower_limit=None): def output(*args, **kwargs): response = func(*args, **kwargs) - accept_encoding = flask.request.headers.get('Accept-Encoding', '') - if 'gzip' not in accept_encoding.lower(): + accept_encoding = flask.request.headers.get("Accept-Encoding", "") + if "gzip" not in accept_encoding.lower(): return response - response.headers['Content-Encoding'] = 'gzip' + response.headers["Content-Encoding"] = "gzip" response.response = ibytes2icompressed(response.response) return response @@ -46,24 +49,49 @@ def cors_wrapper(func): :param func: Flask method that handles requests and returns a response :return: Same, but with permissive CORS headers set """ + def _setdefault(obj, key, value): if value == None: return obj.setdefault(key, value) + @decorate(func) def output(*args, **kwargs): response = func(*args, **kwargs) headers = response.headers - _setdefault(headers, "Access-Control-Allow-Origin", "*") - _setdefault(headers, "Access-Control-Allow-Headers", flask.request.headers.get("Access-Control-Request-Headers")) - _setdefault(headers, "Access-Control-Allow-Methods", flask.request.headers.get("Access-Control-Request-Methods")) - _setdefault(headers, "Content-Type", "application/json") - _setdefault(headers, "Strict-Transport-Security", "max-age=31536000; includeSubDomains; preload") + + # WATCH OUT FOR THE RUBE GOLDBERG LOGIC! + # https://fetch.spec.whatwg.org/#cors-protocol-and-credentials + + origin = URL(flask.request.headers.get("Origin")) + if origin.host: + allow_origin = str(origin) + # allow_origin = origin.scheme + "://" + origin.host + else: + allow_origin = "*" + _setdefault(headers, "Access-Control-Allow-Origin", allow_origin) + _setdefault(headers, "Access-Control-Allow-Credentials", "true") + _setdefault( + headers, + "Access-Control-Allow-Headers", + flask.request.headers.get("Access-Control-Request-Headers"), + ) + _setdefault( + headers, + "Access-Control-Allow-Methods", # PLURAL "Methods" + flask.request.headers.get("Access-Control-Request-Method"), # SINGULAR "Method" + # "GET, PUT, POST, DELETE, PATCH, OPTIONS" + ) + _setdefault(headers, "Content-Type", mimetype.JSON) + _setdefault( + headers, + "Strict-Transport-Security", + "max-age=31536000; includeSubDomains; preload", + ) return response output.provide_automatic_options = False - output.__name__ = func.__name__ - return output + return update_wrapper(output, func) def dockerflow(flask_app, backend_check): @@ -82,11 +110,7 @@ def dockerflow(flask_app, backend_check): @cors_wrapper def version(): return Response( - VERSION_JSON, - status=200, - headers={ - "Content-Type": "application/json" - } + VERSION_JSON, status=200, headers={"Content-Type": mimetype.JSON} ) @cors_wrapper @@ -97,22 +121,188 @@ def dockerflow(flask_app, backend_check): except Exception as e: Log.warning("heartbeat failure", cause=e) return Response( - unicode2utf8(value2json(e)), + value2json(e).encode('utf8'), status=500, - headers={ - "Content-Type": "application/json" - } + headers={"Content-Type": mimetype.JSON}, ) @cors_wrapper def lbheartbeat(): return Response(status=200) - flask_app.add_url_rule(str('/__version__'), None, version, defaults={}, methods=[str('GET'), str('POST')]) - flask_app.add_url_rule(str('/__heartbeat__'), None, heartbeat, defaults={}, methods=[str('GET'), str('POST')]) - flask_app.add_url_rule(str('/__lbheartbeat__'), None, lbheartbeat, defaults={}, methods=[str('GET'), str('POST')]) + flask_app.add_url_rule( + str("/__version__"), + None, + version, + defaults={}, + methods=[str("GET"), str("POST")], + ) + flask_app.add_url_rule( + str("/__heartbeat__"), + None, + heartbeat, + defaults={}, + methods=[str("GET"), str("POST")], + ) + flask_app.add_url_rule( + str("/__lbheartbeat__"), + None, + lbheartbeat, + defaults={}, + methods=[str("GET"), str("POST")], + ) except Exception as e: Log.error("Problem setting up listeners for dockerflow", cause=e) VERSION_JSON = None + + +def add_version(flask_app): + """ + ADD ROUTING TO HANDLE REQUEST FOR /__version__ + :param flask_app: THE (Flask) APP + :return: + """ + try: + rev = coalesce(git.get_revision(), "") + branch = "https://github.com/mozilla/ActiveData/tree/" + coalesce(git.get_branch()) + + version_info = value2json( + { + "source": "https://github.com/mozilla/ActiveData/tree/" + rev, + "branch": branch, + "commit": rev, + }, + pretty=True, + ).encode('utf8') + text("\n") + + Log.note("Using github version\n{{version}}", version=version_info) + + @register_thread + @cors_wrapper + def version(): + return Response( + version_info, status=200, headers={"Content-Type": mimetype.JSON} + ) + + flask_app.add_url_rule( + str("/__version__"), + None, + version, + defaults={}, + methods=[str("GET"), str("POST")], + ) + except Exception as e: + Log.error("Problem setting up listeners for dockerflow", cause=e) + + +def setup_flask_ssl(flask_app, flask_config): + """ + SPAWN A NEW THREAD TO RUN AN SSL ENDPOINT + REMOVES ssl_context FROM flask_config BEFORE RETURNING + + :param flask_app: + :param flask_config: + :return: + """ + if not flask_config.ssl_context: + return + + ssl_flask = flask_config.copy() + ssl_flask.debug = False + ssl_flask.port = 443 + + if is_data(flask_config.ssl_context): + # EXPECTED PEM ENCODED FILE NAMES + # `load_cert_chain` REQUIRES CONCATENATED LIST OF CERTS + with TempFile() as tempfile: + try: + tempfile.write( + File(ssl_flask.ssl_context.certificate_file).read_bytes() + ) + if ssl_flask.ssl_context.certificate_chain_file: + tempfile.write( + File(ssl_flask.ssl_context.certificate_chain_file).read_bytes() + ) + tempfile.flush() + tempfile.close() + + context = SSLContext(PROTOCOL_SSLv23) + context.load_cert_chain( + tempfile.name, + keyfile=File(ssl_flask.ssl_context.privatekey_file).abspath, + ) + + ssl_flask.ssl_context = context + except Exception as e: + Log.error("Could not handle ssl context construction", cause=e) + + def runner(please_stop): + Log.warning( + "ActiveData listening on encrypted port {{port}}", port=ssl_flask.port + ) + flask_app.run(**ssl_flask) + + Thread.run("SSL Server", runner) + + if flask_config.ssl_context and flask_config.port != 80: + Log.warning( + "ActiveData has SSL context, but is still listening on non-encrypted http port {{port}}", + port=flask_config.port, + ) + + flask_config.ssl_context = None + + +def limit_body(size): + def decorator(func): + @decorate(func) + def output(*args, **kwargs): + if flask.request.headers.get("content-length", "") in ["", "0"]: + Log.error("Expecting Content-Length in request headers") + elif int(flask.request.headers["content-length"]) > size: + Log.error("Body is limited to {{size}} bytes", size=size) + return func(*args, **kwargs) + return output + return decorator + + +@register_thread +@cors_wrapper +def options(*args, **kwargs): + """ + USE THIS FOR THE OPTIONS AND HEAD REQUEST TYPES + """ + return Response("", status=200) + + +def add_flask_rule(flask_app, path, func): + flask_app.add_url_rule( + "/" + path.strip("/"), + None, + options, + methods=["OPTIONS", "HEAD"], + ) + flask_app.add_url_rule( + "/" + path.strip("/") + "/", + None, + options, + methods=["OPTIONS", "HEAD"], + ) + + flask_app.add_url_rule( + "/" + path.strip("/"), + None, + func, + methods=["GET", "POST"], + provide_automatic_options=False + ) + flask_app.add_url_rule( + "/" + path.strip("/") + "/", + None, + func, + methods=["GET", "POST"], + provide_automatic_options=False + ) + diff --git a/vendor/pyLibrary/env/git.py b/vendor/pyLibrary/env/git.py index 611e7cd..6e72615 100644 --- a/vendor/pyLibrary/env/git.py +++ b/vendor/pyLibrary/env/git.py @@ -5,15 +5,13 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals from mo_logs.exceptions import suppress_exception -from mo_threads import Process +from mo_threads import Process, THREAD_STOP from pyLibrary.meta import cache @@ -25,10 +23,7 @@ def get_revision(): proc = Process("git log", ["git", "log", "-1"]) try: - while True: - line = proc.stdout.pop().strip().decode('utf8') - if not line: - continue + for line in proc.stdout: if line.startswith("commit "): return line[7:] finally: @@ -44,9 +39,8 @@ def get_remote_revision(url, branch): proc = Process("git remote revision", ["git", "ls-remote", url, "refs/heads/" + branch]) try: - while True: - raw_line = proc.stdout.pop() - line = raw_line.strip().decode('utf8') + for line in proc.stdout: + line = line.strip() if not line: continue return line.split("\t")[0] @@ -65,9 +59,7 @@ def get_branch(): proc = Process("git status", ["git", "status"]) try: - while True: - raw_line = proc.stdout.pop() - line = raw_line.decode('utf8').strip() + for line in proc.stdout: if line.startswith("On branch "): return line[10:] finally: diff --git a/vendor/pyLibrary/env/pulse.py b/vendor/pyLibrary/env/pulse.py index 5cb40d6..99d0118 100644 --- a/vendor/pyLibrary/env/pulse.py +++ b/vendor/pyLibrary/env/pulse.py @@ -4,28 +4,26 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import +from __future__ import absolute_import, division, unicode_literals +from mo_future import is_text, is_binary import datetime from socket import timeout as socket_timeout -from kombu import Connection, Producer, Exchange -from pytz import timezone -from mozillapulse.utils import time_to_string - -from mo_logs import constants -from pyLibrary import jsons -from mo_logs.exceptions import Except, suppress_exception -from mo_logs import Log -from mo_dots import wrap, coalesce, Data, set_default -from mo_kwargs import override -from mo_threads import Thread, Lock +from kombu import Connection, Exchange, Producer from mozillapulse.consumers import GenericConsumer +from mozillapulse.utils import time_to_string +from pytz import timezone + +from mo_dots import Data, coalesce, set_default, wrap +from mo_kwargs import override +from mo_logs import Log +from mo_logs.exceptions import Except, suppress_exception +from mo_threads import Lock, Thread +from pyLibrary import jsons count_locker=Lock() count=0 @@ -107,7 +105,7 @@ class Consumer(Thread): self.pulse.disconnect() Log.note("pulse listener was given a disconnect()") - please_stop.on_go(disconnect) + please_stop.then(disconnect) while not please_stop: try: diff --git a/vendor/pyLibrary/meta.py b/vendor/pyLibrary/meta.py index f7a57e9..f60da1c 100644 --- a/vendor/pyLibrary/meta.py +++ b/vendor/pyLibrary/meta.py @@ -5,18 +5,17 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals from collections import namedtuple +import gc from types import FunctionType +from mo_dots import Null, _get_attr, set_default +from mo_future import get_function_arguments, get_function_name, is_text, text import mo_json -from mo_dots import set_default, _get_attr, Null -from mo_future import text_type, get_function_arguments from mo_logs import Log from mo_logs.exceptions import Except from mo_math.randoms import Random @@ -88,9 +87,6 @@ def get_function_by_name(full_name): Log.error("Can not find function {{name}}", name= full_name, cause=e) - - - class cache(object): """ @@ -200,10 +196,10 @@ class _FakeLock(): def value2quote(value): # RETURN PRETTY PYTHON CODE FOR THE SAME - if isinstance(value, text_type): + if is_text(value): return mo_json.quote(value) else: - return text_type(repr(value)) + return text(repr(value)) class extenstion_method(object): @@ -220,6 +216,18 @@ class extenstion_method(object): return func +def extend(cls): + """ + DECORATOR TO ADD METHODS TO CLASSES + :param cls: THE CLASS TO ADD THE METHOD TO + :return: + """ + def extender(func): + setattr(cls, get_function_name(func), func) + return func + return extender + + class MemorySample(object): def __init__(self, description, debug=False, **parameters): diff --git a/vendor/pyLibrary/sql/__init__.py b/vendor/pyLibrary/sql/__init__.py index a56ba27..e69de29 100644 --- a/vendor/pyLibrary/sql/__init__.py +++ b/vendor/pyLibrary/sql/__init__.py @@ -1,145 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http://mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# - -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from itertools import groupby -from operator import itemgetter - -from mo_future import text_type, PY3 -from mo_logs import Log -from mo_logs.strings import expand_template - -import pyLibrary.sql - - -class SQL(text_type): - """ - ACTUAL SQL, DO NOT QUOTE THIS STRING - """ - def __init__(self, template='', param=None): - text_type.__init__(self) - if isinstance(template, SQL): - Log.error("Expecting text, not SQL") - self.template = template - self.param = param - - @property - def sql(self): - return expand_template(self.template, self.param) - - def __add__(self, other): - if not isinstance(other, SQL): - if isinstance(other, text_type) and all(c not in other for c in ('"', '\'', '`')): - return SQL(self.sql + other) - Log.error("Can only concat other SQL") - else: - return SQL(self.sql+other.sql) - - def __radd__(self, other): - if not isinstance(other, SQL): - if isinstance(other, text_type) and all(c not in other for c in ('"', '\'', '`')): - return SQL(other + self.sql) - Log.error("Can only concat other SQL") - else: - return SQL(other.sql + self.sql) - - def join(self, list_): - list_ = list(list_) - if not all(isinstance(s, SQL) for s in list_): - Log.error("Can only join other SQL") - return SQL(self.sql.join(list_)) - - if PY3: - def __bytes__(self): - Log.error("do not do this") - else: - def __str__(self): - Log.error("do not do this") - - - -SQL_STAR = SQL(" * ") - -SQL_AND = SQL(" AND ") -SQL_OR = SQL(" OR ") -SQL_NOT = SQL(" NOT ") -SQL_ON = SQL(" ON ") - -SQL_CASE = SQL(" CASE ") -SQL_WHEN = SQL(" WHEN ") -SQL_THEN = SQL(" THEN ") -SQL_ELSE = SQL(" ELSE ") -SQL_END = SQL(" END ") - -SQL_COMMA = SQL(", ") -SQL_UNION_ALL = SQL("\nUNION ALL\n") -SQL_UNION = SQL("\nUNION\n") -SQL_LEFT_JOIN = SQL("\nLEFT JOIN\n") -SQL_INNER_JOIN = SQL("\nJOIN\n") -SQL_EMPTY_STRING = SQL("''") -SQL_TRUE = SQL(" 1 ") -SQL_FALSE = SQL(" 0 ") -SQL_ONE = SQL(" 1 ") -SQL_ZERO = SQL(" 0 ") -SQL_NEG_ONE = SQL(" -1 ") -SQL_NULL = SQL(" NULL ") -SQL_IS_NULL = SQL(" IS NULL ") -SQL_IS_NOT_NULL = SQL(" IS NOT NULL ") -SQL_SELECT = SQL("\nSELECT\n") -SQL_FROM = SQL("\nFROM\n") -SQL_WHERE = SQL("\nWHERE\n") -SQL_GROUPBY = SQL("\nGROUP BY\n") -SQL_ORDERBY = SQL("\nORDER BY\n") -SQL_DESC = SQL(" DESC ") -SQL_ASC = SQL(" ASC ") -SQL_LIMIT = SQL("\nLIMIT\n") - - -class DB(object): - - def quote_column(self, column_name, table=None): - raise NotImplementedError() - - def db_type_to_json_type(self, type): - raise NotImplementedError() - -def sql_list(list_): - list_ = list(list_) - if not all(isinstance(s, SQL) for s in list_): - Log.error("Can only join other SQL") - return SQL(", ".join(l.template for l in list_)) - - -def sql_iso(sql): - return "("+sql+")" - - -def sql_count(sql): - return "COUNT(" + sql + ")" - - -def sql_concat(list_): - return SQL(" || ").join(sql_iso(l) for l in list_) - - -def quote_set(list_): - return sql_iso(sql_list(map(pyLibrary.sql.sqlite.quote_value, list_))) - - -def sql_alias(value, alias): - return SQL(value.template + " AS " + alias.template) - - -def sql_coalesce(list_): - return "COALESCE(" + SQL_COMMA.join(list_) + ")" - diff --git a/vendor/pyLibrary/sql/mysql.py b/vendor/pyLibrary/sql/mysql.py deleted file mode 100644 index b947d13..0000000 --- a/vendor/pyLibrary/sql/mysql.py +++ /dev/null @@ -1,790 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http://mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# - -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -import subprocess -from collections import Mapping -from datetime import datetime - -from pymysql import connect, InterfaceError, cursors - -import mo_json -from jx_python import jx -from mo_dots import coalesce, wrap, listwrap, unwrap, split_field -from mo_files import File -from mo_future import text_type, utf8_json_encoder, binary_type, transpose -from mo_kwargs import override -from mo_logs import Log -from mo_logs.exceptions import Except, suppress_exception -from mo_logs.strings import expand_template, indent, outdent -from mo_math import Math -from mo_times import Date -from pyLibrary.sql import SQL, SQL_NULL, SQL_SELECT, SQL_LIMIT, SQL_WHERE, SQL_LEFT_JOIN, SQL_FROM, SQL_AND, sql_list, sql_iso, SQL_ASC, SQL_TRUE, SQL_ONE, SQL_DESC, SQL_IS_NULL, sql_alias -from pyLibrary.sql.sqlite import join_column - -DEBUG = False -MAX_BATCH_SIZE = 100 -EXECUTE_TIMEOUT = 5 * 600 * 1000 # in milliseconds SET TO ZERO (OR None) FOR HOST DEFAULT TIMEOUT - -all_db = [] - - -class MySQL(object): - """ - Parameterize SQL by name rather than by position. Return records as objects - rather than tuples. - """ - - @override - def __init__( - self, - host, - username, - password, - port=3306, - debug=False, - schema=None, - preamble=None, - readonly=False, - kwargs=None - ): - """ - OVERRIDE THE settings.schema WITH THE schema PARAMETER - preamble WILL BE USED TO ADD COMMENTS TO THE BEGINNING OF ALL SQL - THE INTENT IS TO HELP ADMINISTRATORS ID THE SQL RUNNING ON THE DATABASE - - schema - NAME OF DEFAULT database/schema IN QUERIES - - preamble - A COMMENT TO BE ADDED TO EVERY SQL STATEMENT SENT - - readonly - USED ONLY TO INDICATE IF A TRANSACTION WILL BE OPENED UPON - USE IN with CLAUSE, YOU CAN STILL SEND UPDATES, BUT MUST OPEN A - TRANSACTION BEFORE YOU DO - """ - all_db.append(self) - - self.settings = kwargs - self.cursor = None - self.query_cursor = None - if preamble == None: - self.preamble = "" - else: - self.preamble = indent(preamble, "# ").strip() + "\n" - - self.readonly = readonly - self.debug = coalesce(debug, DEBUG) - if host: - self._open() - - def _open(self): - """ DO NOT USE THIS UNLESS YOU close() FIRST""" - try: - self.db = connect( - host=self.settings.host, - port=self.settings.port, - user=coalesce(self.settings.username, self.settings.user), - passwd=coalesce(self.settings.password, self.settings.passwd), - db=coalesce(self.settings.schema, self.settings.db), - read_timeout=coalesce(self.settings.read_timeout, (EXECUTE_TIMEOUT / 1000) - 10 if EXECUTE_TIMEOUT else None, 5*60), - charset=u"utf8", - use_unicode=True, - ssl=coalesce(self.settings.ssl, None), - cursorclass=cursors.SSCursor - ) - except Exception as e: - if self.settings.host.find("://") == -1: - Log.error( - u"Failure to connect to {{host}}:{{port}}", - host=self.settings.host, - port=self.settings.port, - cause=e - ) - else: - Log.error(u"Failure to connect. PROTOCOL PREFIX IS PROBABLY BAD", e) - self.cursor = None - self.partial_rollback = False - self.transaction_level = 0 - self.backlog = [] # accumulate the write commands so they are sent at once - if self.readonly: - self.begin() - - def __enter__(self): - if not self.readonly: - self.begin() - return self - - def __exit__(self, type, value, traceback): - if self.readonly: - self.close() - return - - if isinstance(value, BaseException): - try: - if self.cursor: self.cursor.close() - self.cursor = None - self.rollback() - except Exception as e: - Log.warning(u"can not rollback()", cause=[value, e]) - finally: - self.close() - return - - try: - self.commit() - except Exception as e: - Log.warning(u"can not commit()", e) - finally: - self.close() - - def transaction(self): - """ - return not-started transaction (for with statement) - """ - return Transaction(self) - - def begin(self): - if self.transaction_level == 0: - self.cursor = self.db.cursor() - self.transaction_level += 1 - self.execute("SET TIME_ZONE='+00:00'") - if EXECUTE_TIMEOUT: - try: - self.execute("SET MAX_EXECUTION_TIME=" + text_type(EXECUTE_TIMEOUT)) - self._execute_backlog() - except Exception as e: - e = Except.wrap(e) - if "Unknown system variable 'MAX_EXECUTION_TIME'" in e: - globals()['EXECUTE_TIMEOUT'] = 0 # THIS VERSION OF MYSQL DOES NOT HAVE SESSION LEVEL VARIABLE - else: - raise e - - def close(self): - if self.transaction_level > 0: - if self.readonly: - self.commit() # AUTO-COMMIT - else: - Log.error("expecting commit() or rollback() before close") - self.cursor = None # NOT NEEDED - try: - self.db.close() - except Exception as e: - e = Except.wrap(e) - if "Already closed" in e: - return - - Log.warning("can not close()", e) - finally: - try: - all_db.remove(self) - except Exception as e: - Log.error("not expected", cause=e) - - def commit(self): - try: - self._execute_backlog() - except Exception as e: - with suppress_exception: - self.rollback() - Log.error("Error while processing backlog", e) - - if self.transaction_level == 0: - Log.error("No transaction has begun") - elif self.transaction_level == 1: - if self.partial_rollback: - with suppress_exception: - self.rollback() - - Log.error("Commit after nested rollback is not allowed") - else: - if self.cursor: - self.cursor.close() - self.cursor = None - self.db.commit() - - self.transaction_level -= 1 - - def flush(self): - try: - self.commit() - except Exception as e: - Log.error("Can not flush", e) - - try: - self.begin() - except Exception as e: - Log.error("Can not flush", e) - - def rollback(self): - self.backlog = [] # YAY! FREE! - if self.transaction_level == 0: - Log.error("No transaction has begun") - elif self.transaction_level == 1: - self.transaction_level -= 1 - if self.cursor != None: - self.cursor.close() - self.cursor = None - self.db.rollback() - else: - self.transaction_level -= 1 - self.partial_rollback = True - Log.warning("Can not perform partial rollback!") - - def call(self, proc_name, params): - self._execute_backlog() - params = [unwrap(v) for v in params] - try: - self.cursor.callproc(proc_name, params) - self.cursor.close() - self.cursor = self.db.cursor() - except Exception as e: - Log.error("Problem calling procedure " + proc_name, e) - - def query(self, sql, param=None, stream=False, row_tuples=False): - """ - RETURN LIST OF dicts - """ - if not self.cursor: # ALLOW NON-TRANSACTIONAL READS - Log.error("must perform all queries inside a transaction") - self._execute_backlog() - - try: - if param: - sql = expand_template(sql, quote_param(param)) - sql = self.preamble + outdent(sql) - self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql)) - - self.cursor.execute(sql) - if row_tuples: - if stream: - result = self.cursor - else: - result = wrap(list(self.cursor)) - else: - columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])] - if stream: - result = (wrap({c: utf8_to_unicode(v) for c, v in zip(columns, row)}) for row in self.cursor) - else: - result = wrap([{c: utf8_to_unicode(v) for c, v in zip(columns, row)} for row in self.cursor]) - - return result - except Exception as e: - e = Except.wrap(e) - if "InterfaceError" in e: - Log.error("Did you close the db connection?", e) - Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1) - - def column_query(self, sql, param=None): - """ - RETURN RESULTS IN [column][row_num] GRID - """ - self._execute_backlog() - try: - old_cursor = self.cursor - if not old_cursor: # ALLOW NON-TRANSACTIONAL READS - self.cursor = self.db.cursor() - self.cursor.execute("SET TIME_ZONE='+00:00'") - self.cursor.close() - self.cursor = self.db.cursor() - - if param: - sql = expand_template(sql, quote_param(param)) - sql = self.preamble + outdent(sql) - self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql)) - - self.cursor.execute(sql) - grid = [[utf8_to_unicode(c) for c in row] for row in self.cursor] - # columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])] - result = transpose(*grid) - - if not old_cursor: # CLEANUP AFTER NON-TRANSACTIONAL READS - self.cursor.close() - self.cursor = None - - return result - except Exception as e: - if isinstance(e, InterfaceError) or e.message.find("InterfaceError") >= 0: - Log.error("Did you close the db connection?", e) - Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1) - - # EXECUTE GIVEN METHOD FOR ALL ROWS RETURNED - def forall(self, sql, param=None, _execute=None): - assert _execute - num = 0 - - self._execute_backlog() - try: - old_cursor = self.cursor - if not old_cursor: # ALLOW NON-TRANSACTIONAL READS - self.cursor = self.db.cursor() - - if param: - sql = expand_template(sql, quote_param(param)) - sql = self.preamble + outdent(sql) - self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql)) - self.cursor.execute(sql) - - columns = tuple([utf8_to_unicode(d[0]) for d in self.cursor.description]) - for r in self.cursor: - num += 1 - _execute(wrap(dict(zip(columns, [utf8_to_unicode(c) for c in r])))) - - if not old_cursor: # CLEANUP AFTER NON-TRANSACTIONAL READS - self.cursor.close() - self.cursor = None - - except Exception as e: - Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1) - - return num - - def execute(self, sql, param=None): - if self.transaction_level == 0: - Log.error("Expecting transaction to be started before issuing queries") - - if param: - sql = expand_template(sql, quote_param(param)) - sql = outdent(sql) - self.backlog.append(sql) - if self.debug or len(self.backlog) >= MAX_BATCH_SIZE: - self._execute_backlog() - - def _execute_backlog(self): - if not self.backlog: return - - backlog, self.backlog = self.backlog, [] - if self.db.__module__.startswith("pymysql"): - # BUG IN PYMYSQL: CAN NOT HANDLE MULTIPLE STATEMENTS - # https://github.com/PyMySQL/PyMySQL/issues/157 - for b in backlog: - sql = self.preamble + b - try: - self.debug and Log.note("Execute SQL:\n{{sql|indent}}", sql=sql) - self.cursor.execute(b) - except Exception as e: - Log.error("Can not execute sql:\n{{sql}}", sql=sql, cause=e) - - self.cursor.close() - self.cursor = self.db.cursor() - else: - for i, g in jx.groupby(backlog, size=MAX_BATCH_SIZE): - sql = self.preamble + ";\n".join(g) - try: - self.debug and Log.note("Execute block of SQL:\n{{sql|indent}}", sql=sql) - self.cursor.execute(sql) - self.cursor.close() - self.cursor = self.db.cursor() - except Exception as e: - Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1) - - ## Insert dictionary of values into table - def insert(self, table_name, record): - keys = list(record.keys()) - - try: - command = ( - "INSERT INTO " + quote_column(table_name) + - sql_iso(sql_list([quote_column(k) for k in keys])) + - " VALUES " + - sql_iso(sql_list([quote_value(record[k]) for k in keys])) - ) - self.execute(command) - except Exception as e: - Log.error("problem with record: {{record}}", record=record, cause=e) - - # candidate_key IS LIST OF COLUMNS THAT CAN BE USED AS UID (USUALLY PRIMARY KEY) - # ONLY INSERT IF THE candidate_key DOES NOT EXIST YET - def insert_new(self, table_name, candidate_key, new_record): - candidate_key = listwrap(candidate_key) - - condition = SQL_AND.join([ - quote_column(k) + "=" + quote_value(new_record[k]) - if new_record[k] != None - else quote_column(k) + SQL_IS_NULL - for k in candidate_key - ]) - command = ( - "INSERT INTO " + quote_column(table_name) + sql_iso(sql_list( - quote_column(k) for k in new_record.keys() - )) + - SQL_SELECT + "a.*" + SQL_FROM + sql_iso( - SQL_SELECT + sql_list([quote_value(v) + " " + quote_column(k) for k, v in new_record.items()]) + - SQL_FROM + "DUAL" - ) + " a" + - SQL_LEFT_JOIN + sql_iso( - SQL_SELECT + "'dummy' exist " + - SQL_FROM + quote_column(table_name) + - SQL_WHERE + condition + - SQL_LIMIT + SQL_ONE - ) + " b ON " + SQL_TRUE + SQL_WHERE + " exist " + SQL_IS_NULL - ) - self.execute(command, {}) - - # ONLY INSERT IF THE candidate_key DOES NOT EXIST YET - def insert_newlist(self, table_name, candidate_key, new_records): - for r in new_records: - self.insert_new(table_name, candidate_key, r) - - def insert_list(self, table_name, records): - if not records: - return - - keys = set() - for r in records: - keys |= set(r.keys()) - keys = jx.sort(keys) - - try: - command = ( - "INSERT INTO " + quote_column(table_name) + - sql_iso(sql_list([quote_column(k) for k in keys])) + - " VALUES " + sql_list([ - sql_iso(sql_list([quote_value(r[k]) for k in keys])) - for r in records - ]) - ) - self.execute(command) - except Exception as e: - Log.error("problem with record: {{record}}", record=records, cause=e) - - def update(self, table_name, where_slice, new_values): - """ - where_slice - A Data WHICH WILL BE USED TO MATCH ALL IN table - eg {"id": 42} - new_values - A dict WITH COLUMN NAME, COLUMN VALUE PAIRS TO SET - """ - new_values = quote_param(new_values) - - where_clause = SQL_AND.join([ - quote_column(k) + "=" + quote_value(v) if v != None else quote_column(k) + SQL_IS_NULL - for k, v in where_slice.items() - ]) - - command = ( - "UPDATE " + quote_column(table_name) + "\n" + - "SET " + - sql_list([quote_column(k) + "=" + v for k, v in new_values.items()]) + - SQL_WHERE + - where_clause - ) - self.execute(command, {}) - - def sort2sqlorderby(self, sort): - sort = jx.normalize_sort_parameters(sort) - return sql_list([quote_column(s.field) + (SQL_DESC if s.sort == -1 else SQL_ASC) for s in sort]) - -@override -def execute_sql( - host, - username, - password, - sql, - schema=None, - param=None, - kwargs=None -): - """EXECUTE MANY LINES OF SQL (FROM SQLDUMP FILE, MAYBE?""" - kwargs.schema = coalesce(kwargs.schema, kwargs.database) - - if param: - with MySQL(kwargs) as temp: - sql = expand_template(sql, quote_param(param)) - - # We have no way to execute an entire SQL file in bulk, so we - # have to shell out to the commandline client. - args = [ - "mysql", - "-h{0}".format(host), - "-u{0}".format(username), - "-p{0}".format(password) - ] - if schema: - args.append("{0}".format(schema)) - - try: - proc = subprocess.Popen( - args, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - bufsize=-1 - ) - if isinstance(sql, text_type): - sql = sql.encode("utf8") - (output, _) = proc.communicate(sql) - except Exception as e: - raise Log.error("Can not call \"mysql\"", e) - - if proc.returncode: - if len(sql) > 10000: - sql = "<" + text_type(len(sql)) + " bytes of sql>" - Log.error( - "Unable to execute sql: return code {{return_code}}, {{output}}:\n {{sql}}\n", - sql=indent(sql), - return_code=proc.returncode, - output=output - ) - -@override -def execute_file( - filename, - host, - username, - password, - schema=None, - param=None, - ignore_errors=False, - kwargs=None -): - # MySQLdb provides no way to execute an entire SQL file in bulk, so we - # have to shell out to the commandline client. - file = File(filename) - if file.extension == 'zip': - sql = file.read_zipfile() - else: - sql = File(filename).read() - - if ignore_errors: - with suppress_exception: - execute_sql(sql=sql, kwargs=kwargs) - else: - execute_sql(sql=sql, kwargs=kwargs) - -ESCAPE_DCT = { - u"\\": u"\\\\", - u"\0": u"\\0", - u"\"": u'\\"', - u"\'": u"''", - u"\b": u"\\b", - u"\f": u"\\f", - u"\n": u"\\n", - u"\r": u"\\r", - u"\t": u"\\t" -} - - -def quote_value(value): - """ - convert values to mysql code for the same - mostly delegate directly to the mysql lib, but some exceptions exist - """ - try: - if value == None: - return SQL_NULL - elif isinstance(value, SQL): - return quote_sql(value.template, value.param) - elif isinstance(value, text_type): - return SQL("'" + "".join(ESCAPE_DCT.get(c, c) for c in value) + "'") - elif isinstance(value, Mapping): - return quote_value(json_encode(value)) - elif Math.is_number(value): - return SQL(text_type(value)) - elif isinstance(value, datetime): - return SQL("str_to_date('" + value.strftime("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')") - elif isinstance(value, Date): - return SQL("str_to_date('" + value.format("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')") - elif hasattr(value, '__iter__'): - return quote_value(json_encode(value)) - else: - return quote_value(text_type(value)) - except Exception as e: - Log.error("problem quoting SQL {{value}}", value=repr(value), cause=e) - - -def quote_column(column_name, table=None): - if column_name == None: - Log.error("missing column_name") - elif isinstance(column_name, text_type): - if table: - return join_column(table, column_name) - else: - return SQL("`" + '`.`'.join(split_field(column_name)) + "`") # MYSQL QUOTE OF COLUMN NAMES - elif isinstance(column_name, binary_type): - return quote_column(column_name.decode('utf8'), table) - elif isinstance(column_name, list): - if table: - return sql_list(join_column(table, c) for c in column_name) - return sql_list(quote_column(c) for c in column_name) - else: - # ASSUME {"name":name, "value":value} FORM - return SQL(sql_alias(column_name.value, quote_column(column_name.name))) - - -def quote_sql(value, param=None): - """ - USED TO EXPAND THE PARAMETERS TO THE SQL() OBJECT - """ - try: - if isinstance(value, SQL): - if not param: - return value - param = {k: quote_sql(v) for k, v in param.items()} - return SQL(expand_template(value, param)) - elif isinstance(value, text_type): - return SQL(value) - elif isinstance(value, Mapping): - return quote_value(json_encode(value)) - elif hasattr(value, '__iter__'): - return quote_list(value) - else: - return text_type(value) - except Exception as e: - Log.error("problem quoting SQL", e) - - -def quote_param(param): - return {k: quote_value(v) for k, v in param.items()} - - -def quote_list(values): - return sql_iso(sql_list(map(quote_value, values))) - - - -def utf8_to_unicode(v): - try: - if isinstance(v, binary_type): - return v.decode("utf8") - else: - return v - except Exception as e: - Log.error("not expected", e) - - -def int_list_packer(term, values): - """ - return singletons, ranges and exclusions - """ - DENSITY = 10 # a range can have holes, this is inverse of the hole density - MIN_RANGE = 20 # min members before a range is allowed to be used - - singletons = set() - ranges = [] - exclude = set() - - sorted = jx.sort(values) - - last = sorted[0] - curr_start = last - curr_excl = set() - - for v in sorted[1::]: - if v <= last + 1: - pass - elif v - last > 3: - # big step, how do we deal with it? - if last == curr_start: - # not a range yet, so just add as singlton - singletons.add(last) - elif last - curr_start - len(curr_excl) < MIN_RANGE or ((last - curr_start) < len(curr_excl) * DENSITY): - # small ranges are singletons, sparse ranges are singletons - singletons |= set(range(curr_start, last + 1)) - singletons -= curr_excl - else: - # big enough, and dense enough range - ranges.append({"gte": curr_start, "lte": last}) - exclude |= curr_excl - curr_start = v - curr_excl = set() - else: - if 1 + last - curr_start >= len(curr_excl) * DENSITY: - # high density, keep track of excluded and continue - add_me = set(range(last + 1, v)) - curr_excl |= add_me - elif 1 + last - curr_start - len(curr_excl) < MIN_RANGE: - # not big enough, convert range to singletons - new_singles = set(range(curr_start, last + 1)) - curr_excl - singletons = singletons | new_singles - - curr_start = v - curr_excl = set() - else: - ranges.append({"gte": curr_start, "lte": last}) - exclude |= curr_excl - curr_start = v - curr_excl = set() - last = v - - if last == curr_start: - # not a range yet, so just add as singlton - singletons.add(last) - elif last - curr_start - len(curr_excl) < MIN_RANGE or ((last - curr_start) < len(curr_excl) * DENSITY): - # small ranges are singletons, sparse ranges are singletons - singletons |= set(range(curr_start, last + 1)) - singletons -= curr_excl - else: - # big enough, and dense enough range - ranges.append({"gte": curr_start, "lte": last}) - exclude |= curr_excl - - if ranges: - r = {"or": [{"range": {term: r}} for r in ranges]} - if exclude: - r = {"and": [r, {"not": {"terms": {term: jx.sort(exclude)}}}]} - if singletons: - return {"or": [ - {"terms": {term: jx.sort(singletons)}}, - r - ]} - else: - return r - else: - return {"terms": {term: values}} - - -class Transaction(object): - def __init__(self, db): - self.db = db - - def __enter__(self): - self.db.begin() - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - if isinstance(exc_val, Exception): - self.db.rollback() - else: - self.db.commit() - - -def json_encode(value): - """ - FOR PUTTING JSON INTO DATABASE (sort_keys=True) - dicts CAN BE USED AS KEYS - """ - return text_type(utf8_json_encoder(mo_json.scrub(value))) - - -mysql_type_to_json_type = { - "bigint": "number", - "blob": "string", - "char": "string", - "datetime": "number", - "decimal": "number", - "double": "number", - "enum": "number", - "float": "number", - "int": "number", - "longblob": "string", - "longtext": "string", - "mediumblob": "string", - "mediumint": "number", - "mediumtext": "string", - "set": "array", - "smallint": "number", - "text": "string", - "time": "number", - "timestamp": "number", - "tinyint": "number", - "tinytext": "number", - "varchar": "string" -} diff --git a/vendor/pyLibrary/sql/redshift.py b/vendor/pyLibrary/sql/redshift.py index b0c0b6d..b279118 100644 --- a/vendor/pyLibrary/sql/redshift.py +++ b/vendor/pyLibrary/sql/redshift.py @@ -5,26 +5,24 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals +from mo_future import is_text, is_binary # FOR WINDOWS INSTALL OF psycopg2 # http://stickpeople.com/projects/python/win-psycopg/2.6.0/psycopg2-2.6.0.win32-py2.7-pg9.4.1-release.exe import psycopg2 from psycopg2.extensions import adapt -from pyLibrary import convert -from mo_logs.exceptions import suppress_exception -from mo_logs import Log -from mo_kwargs import override from jx_python import jx -from pyLibrary.sql import SQL +from mo_kwargs import override +from mo_logs import Log +from mo_logs.exceptions import suppress_exception from mo_logs.strings import expand_template from mo_threads import Lock +from mo_sql import SQL, SQL_INSERT, sql_list, SQL_VALUES, sql_iso class Redshift(object): @@ -87,11 +85,10 @@ class Redshift(object): try: command = ( - "INSERT INTO " + self.quote_column(table_name) + "(" + - ",".join([self.quote_column(k) for k in keys]) + - ") VALUES (" + - ",".join([self.quote_value(record[k]) for k in keys]) + - ")" + SQL_INSERT + self.quote_column(table_name) + + sql_iso(sql_list(self.quote_column(k) for k in keys)) + + SQL_VALUES + + sql_iso(sql_list(self.quote_value(record[k]) for k in keys)) ) self.execute(command) @@ -110,17 +107,19 @@ class Redshift(object): try: self.execute( - "DELETE FROM " + self.quote_column(table_name) + " WHERE _id IN {{ids}}", + "DELETE FROM " + self.quote_column(table_name) + SQL_WHERE + "_id IN {{ids}}", {"ids": self.quote_column([r["_id"] for r in records])} ) command = ( - "INSERT INTO " + self.quote_column(table_name) + "(" + - ",".join([self.quote_column(k) for k in columns]) + - ") VALUES " + ",\n".join([ - sql_iso(",".join([self.quote_value(r.get(k, None)) for k in columns])) - for r in records - ]) + SQL_INSERT + self.quote_column(table_name) + + sql_iso(sql_list(self.quote_column(k) for k in columns)) + + SQL_VALUES + + sql_iso(sql_list( + self.quote_value(r.get(k, None)) + for k in columns + for r in records + )) ) self.execute(command) except Exception as e: @@ -137,19 +136,14 @@ class Redshift(object): output[k]=self.quote_value(v) return output - def quote_column(self, name): - if isinstance(name, text_type): - return SQL('"' + name.replace('"', '""') + '"') - return SQL(sql_iso((", ".join(self.quote_value(v) for v in name)))) - def quote_value(self, value): if value ==None: return SQL_NULL - if isinstance(value, list): + if is_list(value): json = value2json(value) return self.quote_value(json) - if isinstance(value, text_type) and len(value) > 256: + if is_text(value) and len(value) > 256: value = value[:256] return SQL(adapt(value)) diff --git a/vendor/pyLibrary/sql/sqlite.py b/vendor/pyLibrary/sql/sqlite.py deleted file mode 100644 index f53f96d..0000000 --- a/vendor/pyLibrary/sql/sqlite.py +++ /dev/null @@ -1,570 +0,0 @@ -# encoding: utf-8 -# -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this file, -# You can obtain one at http://mozilla.org/MPL/2.0/. -# -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) -# - -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -import os -import re -import sys -from collections import Mapping, namedtuple - -from jx_base.expressions import jx_expression -from mo_dots import Data, coalesce, unwraplist, Null -from mo_files import File -from mo_future import allocate_lock as _allocate_lock, text_type -from mo_kwargs import override -from mo_kwargs import override -from mo_logs import Log -from mo_logs.exceptions import Except, extract_stack, ERROR, format_trace -from mo_logs.strings import quote -from mo_math.stats import percentile -from mo_threads import Queue, Thread, Lock, Till -from mo_times import Date, Duration -from mo_times.timer import Timer -from pyLibrary import convert -from pyLibrary.sql import DB, SQL, SQL_TRUE, SQL_FALSE, SQL_NULL, SQL_SELECT, sql_iso, sql_list - -DEBUG = False -TRACE = True - -FORMAT_COMMAND = "Running command\n{{command|limit(100)|indent}}" -DOUBLE_TRANSACTION_ERROR = "You can not query outside a transaction you have open already" -TOO_LONG_TO_HOLD_TRANSACTION = 10 - -sqlite3 = None -_load_extension_warning_sent = False -_upgraded = False -known_databases = {Null: None} - - -def _upgrade(): - try: - Log.note("sqlite not upgraded") - # return - # - # import sys - # import platform - # if "windows" in platform.system().lower(): - # original_dll = File.new_instance(sys.exec_prefix, "dlls/sqlite3.dll") - # if platform.architecture()[0]=='32bit': - # source_dll = File("vendor/pyLibrary/vendor/sqlite/sqlite3_32.dll") - # else: - # source_dll = File("vendor/pyLibrary/vendor/sqlite/sqlite3_64.dll") - # - # if not all(a == b for a, b in zip_longest(source_dll.read_bytes(), original_dll.read_bytes())): - # original_dll.backup() - # File.copy(source_dll, original_dll) - # else: - # pass - except Exception as e: - Log.warning("could not upgrade python's sqlite", cause=e) - - -class Sqlite(DB): - """ - Allows multi-threaded access - Loads extension functions (like SQRT) - """ - - @override - def __init__(self, filename=None, db=None, get_trace=None, upgrade=True, load_functions=False, kwargs=None): - """ - :param filename: FILE TO USE FOR DATABASE - :param db: AN EXISTING sqlite3 DB YOU WOULD LIKE TO USE (INSTEAD OF USING filename) - :param get_trace: GET THE STACK TRACE AND THREAD FOR EVERY DB COMMAND (GOOD FOR DEBUGGING) - :param upgrade: REPLACE PYTHON sqlite3 DLL WITH MORE RECENT ONE, WITH MORE FUNCTIONS (NOT WORKING) - :param load_functions: LOAD EXTENDED MATH FUNCTIONS (MAY REQUIRE upgrade) - :param kwargs: - """ - global _upgraded - global sqlite3 - - self.settings = kwargs - if not _upgraded: - if upgrade: - _upgrade() - _upgraded = True - import sqlite3 - _ = sqlite3 - - self.filename = File(filename).abspath if filename else None - if known_databases.get(self.filename): - Log.error("Not allowed to create more than one Sqlite instance for {{file}}", file=self.filename) - - # SETUP DATABASE - DEBUG and Log.note("Sqlite version {{version}}", version=sqlite3.sqlite_version) - try: - if db == None: - self.db = sqlite3.connect( - database=coalesce(self.filename, ":memory:"), - check_same_thread=False, - isolation_level=None - ) - else: - self.db = db - except Exception as e: - Log.error("could not open file {{filename}}", filename=self.filename, cause=e) - load_functions and self._load_functions() - - self.locker = Lock() - self.available_transactions = [] # LIST OF ALL THE TRANSACTIONS BEING MANAGED - self.queue = Queue("sql commands") # HOLD (command, result, signal, stacktrace) TUPLES - - self.get_trace = coalesce(get_trace, TRACE) - self.upgrade = upgrade - self.closed = False - - # WORKER VARIABLES - self.transaction_stack = [] # THE TRANSACTION OBJECT WE HAVE PARTIALLY RUN - self.last_command_item = None # USE THIS TO HELP BLAME current_transaction FOR HANGING ON TOO LONG - self.too_long = None - self.delayed_queries = [] - self.delayed_transactions = [] - self.worker = Thread.run("sqlite db thread", self._worker) - - DEBUG and Log.note("Sqlite version {{version}}", version=self.query("select sqlite_version()").data[0][0]) - - def _enhancements(self): - def regex(pattern, value): - return 1 if re.match(pattern+"$", value) else 0 - con = self.db.create_function("regex", 2, regex) - - class Percentile(object): - def __init__(self, percentile): - self.percentile=percentile - self.acc=[] - - def step(self, value): - self.acc.append(value) - - def finalize(self): - return percentile(self.acc, self.percentile) - - con.create_aggregate("percentile", 2, Percentile) - - def transaction(self): - thread = Thread.current() - parent = None - with self.locker: - for t in self.available_transactions: - if t.thread is thread: - parent = t - - output = Transaction(self, parent=parent) - self.available_transactions.append(output) - return output - - def query(self, command): - """ - WILL BLOCK CALLING THREAD UNTIL THE command IS COMPLETED - :param command: COMMAND FOR SQLITE - :return: list OF RESULTS - """ - if self.closed: - Log.error("database is closed") - - signal = _allocate_lock() - signal.acquire() - result = Data() - trace = extract_stack(1) if self.get_trace else None - - if self.get_trace: - current_thread = Thread.current() - with self.locker: - for t in self.available_transactions: - if t.thread is current_thread: - Log.error(DOUBLE_TRANSACTION_ERROR) - - self.queue.add(CommandItem(command, result, signal, trace, None)) - signal.acquire() - - if result.exception: - Log.error("Problem with Sqlite call", cause=result.exception) - return result - - def close(self): - """ - OPTIONAL COMMIT-AND-CLOSE - IF THIS IS NOT DONE, THEN THE THREAD THAT SPAWNED THIS INSTANCE - :return: - """ - self.closed = True - signal = _allocate_lock() - signal.acquire() - self.queue.add(CommandItem(COMMIT, None, signal, None, None)) - signal.acquire() - self.worker.please_stop.go() - return - - def __enter__(self): - pass - - def __exit__(self, exc_type, exc_val, exc_tb): - self.close() - - def _load_functions(self): - global _load_extension_warning_sent - library_loc = File.new_instance(sys.modules[__name__].__file__, "../..") - full_path = File.new_instance(library_loc, "vendor/sqlite/libsqlitefunctions.so").abspath - try: - trace = extract_stack(0)[0] - if self.upgrade: - if os.name == 'nt': - file = File.new_instance(trace["file"], "../../vendor/sqlite/libsqlitefunctions.so") - else: - file = File.new_instance(trace["file"], "../../vendor/sqlite/libsqlitefunctions") - - full_path = file.abspath - self.db.enable_load_extension(True) - self.db.execute(SQL_SELECT + "load_extension" + sql_iso(quote_value(full_path))) - except Exception as e: - if not _load_extension_warning_sent: - _load_extension_warning_sent = True - Log.warning("Could not load {{file}}, doing without. (no SQRT for you!)", file=full_path, cause=e) - - def create_new_functions(self): - def regexp(pattern, item): - reg = re.compile(pattern) - return reg.search(item) is not None - - self.db.create_function("REGEXP", 2, regexp) - - def show_transactions_blocked_warning(self): - blocker = self.last_command_item - blocked = (self.delayed_queries+self.delayed_transactions)[0] - - Log.warning( - "Query on thread {{blocked_thread|json}} at\n" - "{{blocked_trace|indent}}" - "is blocked by {{blocker_thread|json}} at\n" - "{{blocker_trace|indent}}" - "this message brought to you by....", - blocker_trace=format_trace(blocker.trace), - blocked_trace=format_trace(blocked.trace), - blocker_thread=blocker.transaction.thread.name if blocker.transaction is not None else None, - blocked_thread=blocked.transaction.thread.name if blocked.transaction is not None else None - ) - - def _close_transaction(self, command_item): - query, result, signal, trace, transaction = command_item - - transaction.end_of_life = True - with self.locker: - self.available_transactions.remove(transaction) - assert transaction not in self.available_transactions - - old_length = len(self.transaction_stack) - old_trans = self.transaction_stack[-1] - del self.transaction_stack[-1] - - assert old_length - 1 == len(self.transaction_stack) - assert old_trans - assert old_trans not in self.transaction_stack - if not self.transaction_stack: - # NESTED TRANSACTIONS NOT ALLOWED IN sqlite3 - DEBUG and Log.note(FORMAT_COMMAND, command=query) - self.db.execute(query) - - has_been_too_long = False - with self.locker: - if self.too_long is not None: - self.too_long, too_long = None, self.too_long - # WE ARE CHEATING HERE: WE REACH INTO THE Signal MEMBERS AND REMOVE WHAT WE ADDED TO THE INTERNAL job_queue - with too_long.lock: - has_been_too_long = bool(too_long) - too_long.job_queue = None - - # PUT delayed BACK ON THE QUEUE, IN THE ORDER FOUND, BUT WITH QUERIES FIRST - if self.delayed_transactions: - for c in reversed(self.delayed_transactions): - self.queue.push(c) - del self.delayed_transactions[:] - if self.delayed_queries: - for c in reversed(self.delayed_queries): - self.queue.push(c) - del self.delayed_queries[:] - if has_been_too_long: - Log.note("Transaction blockage cleared") - - def _worker(self, please_stop): - try: - # MAIN EXECUTION LOOP - while not please_stop: - command_item = self.queue.pop(till=please_stop) - if command_item is None: - break - try: - self._process_command_item(command_item) - except Exception as e: - Log.warning("worker can not execute command", cause=e) - except Exception as e: - e = Except.wrap(e) - if not please_stop: - Log.warning("Problem with sql", cause=e) - finally: - self.closed = True - DEBUG and Log.note("Database is closed") - self.db.close() - - def _process_command_item(self, command_item): - query, result, signal, trace, transaction = command_item - - with Timer("SQL Timing", silent=not DEBUG): - if transaction is None: - # THIS IS A TRANSACTIONLESS QUERY, DELAY IT IF THERE IS A CURRENT TRANSACTION - if self.transaction_stack: - with self.locker: - if self.too_long is None: - self.too_long = Till(seconds=TOO_LONG_TO_HOLD_TRANSACTION) - self.too_long.on_go(self.show_transactions_blocked_warning) - self.delayed_queries.append(command_item) - return - elif self.transaction_stack and self.transaction_stack[-1] not in [transaction, transaction.parent]: - # THIS TRANSACTION IS NOT THE CURRENT TRANSACTION, DELAY IT - with self.locker: - if self.too_long is None: - self.too_long = Till(seconds=TOO_LONG_TO_HOLD_TRANSACTION) - self.too_long.on_go(self.show_transactions_blocked_warning) - self.delayed_transactions.append(command_item) - return - else: - # ENSURE THE CURRENT TRANSACTION IS UP TO DATE FOR THIS query - if not self.transaction_stack: - # sqlite3 ALLOWS ONLY ONE TRANSACTION AT A TIME - DEBUG and Log.note(FORMAT_COMMAND, command=BEGIN) - self.db.execute(BEGIN) - self.transaction_stack.append(transaction) - elif transaction is not self.transaction_stack[-1]: - self.transaction_stack.append(transaction) - elif transaction.exception and query is not ROLLBACK: - result.exception = Except( - type=ERROR, - template="Not allowed to continue using a transaction that failed", - cause=transaction.exception, - trace=trace - ) - signal.release() - return - - try: - transaction.do_all() - except Exception as e: - # DEAL WITH ERRORS IN QUEUED COMMANDS - # WE WILL UNWRAP THE OUTER EXCEPTION TO GET THE CAUSE - err = Except( - type=ERROR, - template="Bad call to Sqlite3 while "+FORMAT_COMMAND, - params={"command": e.params.current.command}, - cause=e.cause, - trace=e.params.current.trace - ) - transaction.exception = result.exception = err - - if query in [COMMIT, ROLLBACK]: - self._close_transaction(CommandItem(ROLLBACK, result, signal, trace, transaction)) - - signal.release() - return - - try: - # DEAL WITH END-OF-TRANSACTION MESSAGES - if query in [COMMIT, ROLLBACK]: - self._close_transaction(command_item) - return - - # EXECUTE QUERY - self.last_command_item = command_item - DEBUG and Log.note(FORMAT_COMMAND, command=query) - curr = self.db.execute(query) - result.meta.format = "table" - result.header = [d[0] for d in curr.description] if curr.description else None - result.data = curr.fetchall() - if DEBUG and result.data: - text = convert.table2csv(list(result.data)) - Log.note("Result:\n{{data|limit(100)|indent}}", data=text) - except Exception as e: - e = Except.wrap(e) - err = Except( - type=ERROR, - template="Bad call to Sqlite while " + FORMAT_COMMAND, - params={"command": query}, - trace=trace, - cause=e - ) - result.exception = err - if transaction: - transaction.exception = err - finally: - signal.release() - - -class Transaction(object): - - def __init__(self, db, parent=None): - self.db = db - self.locker = Lock("transaction " + text_type(id(self)) + " todo lock") - self.todo = [] - self.complete = 0 - self.end_of_life = False - self.exception = None - self.parent = parent - self.thread = parent.thread if parent else Thread.current() - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - causes = [] - try: - if isinstance(exc_val, Exception): - causes.append(Except.wrap(exc_val)) - self.rollback() - else: - self.commit() - except Exception as e: - causes.append(Except.wrap(e)) - Log.error("Transaction failed", cause=unwraplist(causes)) - - def transaction(self): - with self.db.locker: - output = Transaction(self.db, parent=self) - self.db.available_transactions.append(output) - return output - - def execute(self, command): - trace = extract_stack(1) if self.db.get_trace else None - with self.locker: - self.todo.append(CommandItem(command, None, None, trace, self)) - - def do_all(self): - # ENSURE PARENT TRANSACTION IS UP TO DATE - c = None - try: - if self.parent == self: - Log.warning("Transactions parent is equal to itself.") - if self.parent: - self.parent.do_all() - # GET THE REMAINING COMMANDS - with self.locker: - todo = self.todo[self.complete:] - self.complete = len(self.todo) - - # RUN THEM - for c in todo: - DEBUG and Log.note(FORMAT_COMMAND, command=c.command) - self.db.db.execute(c.command) - except Exception as e: - Log.error("problem running commands", current=c, cause=e) - - - def query(self, query): - if self.db.closed: - Log.error("database is closed") - - signal = _allocate_lock() - signal.acquire() - result = Data() - trace = extract_stack(1) if self.db.get_trace else None - self.db.queue.add(CommandItem(query, result, signal, trace, self)) - signal.acquire() - if result.exception: - Log.error("Problem with Sqlite call", cause=result.exception) - return result - - def rollback(self): - self.query(ROLLBACK) - - def commit(self): - self.query(COMMIT) - - -CommandItem = namedtuple("CommandItem", ("command", "result", "is_done", "trace", "transaction")) - - -_no_need_to_quote = re.compile(r"^\w+$", re.UNICODE) - - -def quote_column(column_name, table=None): - if isinstance(column_name, SQL): - return column_name - - if not isinstance(column_name, text_type): - Log.error("expecting a name") - if table != None: - return SQL(" d" + quote(table) + "." + quote(column_name) + " ") - else: - if _no_need_to_quote.match(column_name): - return SQL(" " + column_name + " ") - return SQL(" " + quote(column_name) + " ") - - -def quote_value(value): - if isinstance(value, (Mapping, list)): - return SQL(".") - elif isinstance(value, Date): - return SQL(text_type(value.unix)) - elif isinstance(value, Duration): - return SQL(text_type(value.seconds)) - elif isinstance(value, text_type): - return SQL("'" + value.replace("'", "''") + "'") - elif value == None: - return SQL_NULL - elif value is True: - return SQL_TRUE - elif value is False: - return SQL_FALSE - else: - return SQL(text_type(value)) - - -def quote_list(list): - return sql_iso(sql_list(map(quote_value, list))) - -def join_column(a, b): - a = quote_column(a) - b = quote_column(b) - return SQL(a.template.rstrip() + "." + b.template.lstrip()) - - -BEGIN = "BEGIN" -COMMIT = "COMMIT" -ROLLBACK = "ROLLBACK" - - -def _upgrade(): - global _upgraded - global sqlite3 - - try: - Log.note("sqlite not upgraded") - # return - # - # import sys - # import platform - # if "windows" in platform.system().lower(): - # original_dll = File.new_instance(sys.exec_prefix, "dlls/sqlite3.dll") - # if platform.architecture()[0]=='32bit': - # source_dll = File("vendor/pyLibrary/vendor/sqlite/sqlite3_32.dll") - # else: - # source_dll = File("vendor/pyLibrary/vendor/sqlite/sqlite3_64.dll") - # - # if not all(a == b for a, b in zip_longest(source_dll.read_bytes(), original_dll.read_bytes())): - # original_dll.backup() - # File.copy(source_dll, original_dll) - # else: - # pass - except Exception as e: - Log.warning("could not upgrade python's sqlite", cause=e) - - import sqlite3 - _ = sqlite3 - _upgraded = True diff --git a/vendor/pyLibrary/sql/util.py b/vendor/pyLibrary/sql/util.py index 6d5092a..5c209cc 100644 --- a/vendor/pyLibrary/sql/util.py +++ b/vendor/pyLibrary/sql/util.py @@ -5,15 +5,15 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals + +from jx_mysql.mysql import esfilter2sqlwhere from mo_dots import wrap -from jx_mysql import esfilter2sqlwhere + def find_holes(db_module, db, table_name, column_name, _range, filter=None): """ diff --git a/vendor/pyLibrary/testing/elasticsearch.py b/vendor/pyLibrary/testing/elasticsearch.py index 444344c..fe1ae43 100644 --- a/vendor/pyLibrary/testing/elasticsearch.py +++ b/vendor/pyLibrary/testing/elasticsearch.py @@ -4,20 +4,18 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # -# Author: Kyle Lahnakoski (kyle@lahnakoski.com) +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) # -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import absolute_import, division, unicode_literals -import mo_json from jx_python import jx -from mo_dots import Data, Null, unwrap, wrap +from mo_dots import Data, Null, is_list, unwrap, wrap from mo_files import File +import mo_json from mo_kwargs import override from mo_logs import Log -from pyLibrary.env.elasticsearch import Cluster +from jx_elasticsearch.elasticsearch import Cluster @override @@ -84,13 +82,18 @@ class FakeES(): v["id"]: v["value"] if "value" in v else mo_json.json2value(v['json']) for v in records } + for r in records.values(): + try: + del r['etl'] + except Exception: + pass unwrap(self.data).update(records) self.refresh() Log.note("{{num}} documents added", num=len(records)) def add(self, record): - if isinstance(record, list): + if is_list(record): Log.error("no longer accepting lists, use extend()") return self.extend([record]) diff --git a/vendor/pyLibrary/utils.py b/vendor/pyLibrary/utils.py new file mode 100644 index 0000000..f49ea13 --- /dev/null +++ b/vendor/pyLibrary/utils.py @@ -0,0 +1,92 @@ +# encoding: utf-8 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Contact: Kyle Lahnakoski (kyle@lahnakoski.com) +# +from __future__ import division, unicode_literals + +import datetime + +from mo_dots import DataObject, Null, unwrap +from mo_future import text, zip_longest + + +class Version(object): + + __slots__ = ["version"] + + def __new__(cls, version): + if version == None: + return Null + else: + return object.__new__(cls) + + def __init__(self, version): + version = unwrap(version) + + if isinstance(version, tuple): + self.version = version + elif isinstance(version, DataObject): + self.version = [0, 0, 0] + elif isinstance(version, Version): + self.version = version.version + else: + try: + self.version = tuple(map(int, version.split('.'))) + except Exception as e: + self.version = [0, 0, 0] + + def __gt__(self, other): + other = Version(other) + for s, o in zip_longest(self.version, other.version): + if s is None and o is not None: + return False + elif s is not None and o is None: + return True + elif s < o: + return False + elif s > o: + return True + + return False + + def __ge__(self, other): + return self == other or self > other + + def __eq__(self, other): + other = Version(other) + return self.version == other.version + + def __le__(self, other): + return self == other or not (self > other) + + def __lt__(self, other): + return not (self == other) and not (self > other) + + def __ne__(self, other): + other = Version(other) + return self.version != other.version + + def __str__(self): + return text(".").join(map(text, self.version)) + + def __add__(self, other): + major, minor, mini = self.version + minor += other + mini = datetime.datetime.utcnow().strftime("%y%j") + return Version((major, minor, mini)) + + @property + def major(self): + return self.version[0] + + @property + def minor(self): + return self.version[1] + + @property + def mini(self): + return self.version[2] diff --git a/vendor/pyLibrary/vendor/__init__.py b/vendor/pyLibrary/vendor/__init__.py deleted file mode 100644 index 7460e7f..0000000 --- a/vendor/pyLibrary/vendor/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__author__ = 'klahnakoski' diff --git a/vendor/pyLibrary/vendor/sqlite/README.md b/vendor/pyLibrary/vendor/sqlite/README.md deleted file mode 100644 index 42355a2..0000000 --- a/vendor/pyLibrary/vendor/sqlite/README.md +++ /dev/null @@ -1,4 +0,0 @@ -Sqlite -====== - -A copy of version 3.13.0 binaries and header file as per [Sqlite public domain licence](https://www.sqlite.org/copyright.html) \ No newline at end of file diff --git a/vendor/pyLibrary/vendor/sqlite/compile.bat b/vendor/pyLibrary/vendor/sqlite/compile.bat deleted file mode 100644 index 200ae77..0000000 --- a/vendor/pyLibrary/vendor/sqlite/compile.bat +++ /dev/null @@ -1 +0,0 @@ -gcc -shared -I "C:\Users\kyle\code\ActiveData\pyLibrary\vendor\sqlite\sqlite.h" -o libsqlitefunctions.so extension-functions.c diff --git a/vendor/pyLibrary/vendor/sqlite/extension-functions.c b/vendor/pyLibrary/vendor/sqlite/extension-functions.c deleted file mode 100644 index 2bdd194..0000000 --- a/vendor/pyLibrary/vendor/sqlite/extension-functions.c +++ /dev/null @@ -1,1947 +0,0 @@ -/* -This library will provide common mathematical and string functions in -SQL queries using the operating system libraries or provided -definitions. It includes the following functions: - -Math: acos, asin, atan, atn2, atan2, acosh, asinh, atanh, difference, -degrees, radians, cos, sin, tan, cot, cosh, sinh, tanh, coth, exp, -log, log10, power, sign, sqrt, square, ceil, floor, pi. - -String: replicate, charindex, leftstr, rightstr, ltrim, rtrim, trim, -replace, reverse, proper, padl, padr, padc, strfilter. - -Aggregate: stdev, variance, mode, median, lower_quartile, -upper_quartile. - -The string functions ltrim, rtrim, trim, replace are included in -recent versions of SQLite and so by default do not build. - -Compilation instructions: - Compile this C source file into a dynamic library as follows: - * Linux: - gcc -fPIC -lm -shared extension-functions.c -o libsqlitefunctions.so - * Mac OS X: - gcc -fno-common -dynamiclib extension-functions.c -o libsqlitefunctions.dylib - (You may need to add flags - -I /opt/local/include/ -L/opt/local/lib -lsqlite3 - if your sqlite3 is installed from Mac ports, or - -I /sw/include/ -L/sw/lib -lsqlite3 - if installed with Fink.) - * Windows: - 1. Install MinGW (http://www.mingw.org/) and you will get the gcc - (gnu compiler collection) - 2. add the path to your path variable (isn't done during the - installation!) - 3. compile: - gcc -shared -I "path" -o libsqlitefunctions.so extension-functions.c - (path = path of sqlite3ext.h; i.e. C:\programs\sqlite) - -Usage instructions for applications calling the sqlite3 API functions: - In your application, call sqlite3_enable_load_extension(db,1) to - allow loading external libraries. Then load the library libsqlitefunctions - using sqlite3_load_extension; the third argument should be 0. - See http://www.sqlite.org/cvstrac/wiki?p=LoadableExtensions. - Select statements may now use these functions, as in - SELECT cos(radians(inclination)) FROM satsum WHERE satnum = 25544; - -Usage instructions for the sqlite3 program: - If the program is built so that loading extensions is permitted, - the following will work: - sqlite> SELECT load_extension('./libsqlitefunctions.so'); - sqlite> select cos(radians(45)); - 0.707106781186548 - Note: Loading extensions is by default prohibited as a - security measure; see "Security Considerations" in - http://www.sqlite.org/cvstrac/wiki?p=LoadableExtensions. - If the sqlite3 program and library are built this - way, you cannot use these functions from the program, you - must write your own program using the sqlite3 API, and call - sqlite3_enable_load_extension as described above, or else - rebuilt the sqlite3 program to allow loadable extensions. - -Alterations: -The instructions are for Linux, Mac OS X, and Windows; users of other -OSes may need to modify this procedure. In particular, if your math -library lacks one or more of the needed trig or log functions, comment -out the appropriate HAVE_ #define at the top of file. If you do not -wish to make a loadable module, comment out the define for -COMPILE_SQLITE_EXTENSIONS_AS_LOADABLE_MODULE. If you are using a -version of SQLite without the trim functions and replace, comment out -the HAVE_TRIM #define. - -Liam Healy - -History: -2010-01-06 Correct check for argc in squareFunc, and add Windows -compilation instructions. -2009-06-24 Correct check for argc in properFunc. -2008-09-14 Add check that memory was actually allocated after -sqlite3_malloc or sqlite3StrDup, call sqlite3_result_error_nomem if -not. Thanks to Robert Simpson. -2008-06-13 Change to instructions to indicate use of the math library -and that program might work. -2007-10-01 Minor clarification to instructions. -2007-09-29 Compilation as loadable module is optional with -COMPILE_SQLITE_EXTENSIONS_AS_LOADABLE_MODULE. -2007-09-28 Use sqlite3_extension_init and macros -SQLITE_EXTENSION_INIT1, SQLITE_EXTENSION_INIT2, so that it works with -sqlite3_load_extension. Thanks to Eric Higashino and Joe Wilson. -New instructions for Mac compilation. -2007-09-17 With help from Joe Wilson and Nuno Luca, made use of -external interfaces so that compilation is no longer dependent on -SQLite source code. Merged source, header, and README into a single -file. Added casts so that Mac will compile without warnings (unsigned -and signed char). -2007-09-05 Included some definitions from sqlite 3.3.13 so that this -will continue to work in newer versions of sqlite. Completed -description of functions available. -2007-03-27 Revised description. -2007-03-23 Small cleanup and a bug fix on the code. This was mainly -letting errno flag errors encountered in the math library and checking -the result, rather than pre-checking. This fixes a bug in power that -would cause an error if any non-positive number was raised to any -power. -2007-02-07 posted by Mikey C to sqlite mailing list. -Original code 2006 June 05 by relicoder. - -*/ - -//#include "config.h" - -#define COMPILE_SQLITE_EXTENSIONS_AS_LOADABLE_MODULE 1 -#define HAVE_ACOSH 1 -#define HAVE_ASINH 1 -#define HAVE_ATANH 1 -#define HAVE_SINH 1 -#define HAVE_COSH 1 -#define HAVE_TANH 1 -#define HAVE_LOG10 1 -#define HAVE_ISBLANK 1 -#define SQLITE_SOUNDEX 1 -#define HAVE_TRIM 1 /* LMH 2007-03-25 if sqlite has trim functions */ - -#ifdef COMPILE_SQLITE_EXTENSIONS_AS_LOADABLE_MODULE -#include "sqlite3ext.h" -SQLITE_EXTENSION_INIT1 -#else -#include "sqlite3.h" -#endif - -#include -/* relicoder */ -#include -#include -#include -#include /* LMH 2007-03-25 */ - -#include -#include - -#ifndef _MAP_H_ -#define _MAP_H_ - -#include - -/* -** Simple binary tree implementation to use in median, mode and quartile calculations -** Tree is not necessarily balanced. That would require something like red&black trees of AVL -*/ - -typedef int(*cmp_func)(const void *, const void *); -typedef void(*map_iterator)(void*, int64_t, void*); - -typedef struct node{ - struct node *l; - struct node *r; - void* data; - int64_t count; -} node; - -typedef struct map{ - node *base; - cmp_func cmp; - short free; -} map; - -/* -** creates a map given a comparison function -*/ -map map_make(cmp_func cmp); - -/* -** inserts the element e into map m -*/ -void map_insert(map *m, void *e); - -/* -** executes function iter over all elements in the map, in key increasing order -*/ -void map_iterate(map *m, map_iterator iter, void* p); - -/* -** frees all memory used by a map -*/ -void map_destroy(map *m); - -/* -** compares 2 integers -** to use with map_make -*/ -int int_cmp(const void *a, const void *b); - -/* -** compares 2 doubles -** to use with map_make -*/ -int double_cmp(const void *a, const void *b); - -#endif /* _MAP_H_ */ - -typedef uint8_t u8; -typedef uint16_t u16; -typedef int64_t i64; - -static char *sqlite3StrDup( const char *z ) { - char *res = sqlite3_malloc( strlen(z)+1 ); - return strcpy( res, z ); -} - -/* -** These are copied verbatim from fun.c so as to not have the names exported -*/ - -/* LMH from sqlite3 3.3.13 */ -/* -** This table maps from the first byte of a UTF-8 character to the number -** of trailing bytes expected. A value '4' indicates that the table key -** is not a legal first byte for a UTF-8 character. -*/ -static const u8 xtra_utf8_bytes[256] = { -/* 0xxxxxxx */ -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -/* 10wwwwww */ -4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - -/* 110yyyyy */ -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - -/* 1110zzzz */ -2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - -/* 11110yyy */ -3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, -}; - - -/* -** This table maps from the number of trailing bytes in a UTF-8 character -** to an integer constant that is effectively calculated for each character -** read by a naive implementation of a UTF-8 character reader. The code -** in the READ_UTF8 macro explains things best. -*/ -static const int xtra_utf8_bits[] = { - 0, - 12416, /* (0xC0 << 6) + (0x80) */ - 925824, /* (0xE0 << 12) + (0x80 << 6) + (0x80) */ - 63447168 /* (0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ -}; - -/* -** If a UTF-8 character contains N bytes extra bytes (N bytes follow -** the initial byte so that the total character length is N+1) then -** masking the character with utf8_mask[N] must produce a non-zero -** result. Otherwise, we have an (illegal) overlong encoding. -*/ -static const int utf_mask[] = { - 0x00000000, - 0xffffff80, - 0xfffff800, - 0xffff0000, -}; - -/* LMH salvaged from sqlite3 3.3.13 source code src/utf.c */ -#define READ_UTF8(zIn, c) { \ - int xtra; \ - c = *(zIn)++; \ - xtra = xtra_utf8_bytes[c]; \ - switch( xtra ){ \ - case 4: c = (int)0xFFFD; break; \ - case 3: c = (c<<6) + *(zIn)++; \ - case 2: c = (c<<6) + *(zIn)++; \ - case 1: c = (c<<6) + *(zIn)++; \ - c -= xtra_utf8_bits[xtra]; \ - if( (utf_mask[xtra]&c)==0 \ - || (c&0xFFFFF800)==0xD800 \ - || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ - } \ -} - -static int sqlite3ReadUtf8(const unsigned char *z){ - int c; - READ_UTF8(z, c); - return c; -} - -#define SKIP_UTF8(zIn) { \ - zIn += (xtra_utf8_bytes[*(u8 *)zIn] + 1); \ -} - -/* -** pZ is a UTF-8 encoded unicode string. If nByte is less than zero, -** return the number of unicode characters in pZ up to (but not including) -** the first 0x00 byte. If nByte is not less than zero, return the -** number of unicode characters in the first nByte of pZ (or up to -** the first 0x00, whichever comes first). -*/ -static int sqlite3Utf8CharLen(const char *z, int nByte){ - int r = 0; - const char *zTerm; - if( nByte>=0 ){ - zTerm = &z[nByte]; - }else{ - zTerm = (const char *)(-1); - } - assert( z<=zTerm ); - while( *z!=0 && z 0) ? 1: ( iVal < 0 ) ? -1: 0; - sqlite3_result_int64(context, iVal); - break; - } - case SQLITE_NULL: { - sqlite3_result_null(context); - break; - } - default: { - /* 2nd change below. Line for abs was: if( rVal<0 ) rVal = rVal * -1.0; */ - - rVal = sqlite3_value_double(argv[0]); - rVal = ( rVal > 0) ? 1: ( rVal < 0 ) ? -1: 0; - sqlite3_result_double(context, rVal); - break; - } - } -} - - -/* -** smallest integer value not less than argument -*/ -static void ceilFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ - double rVal=0.0; - i64 iVal=0; - assert( argc==1 ); - switch( sqlite3_value_type(argv[0]) ){ - case SQLITE_INTEGER: { - i64 iVal = sqlite3_value_int64(argv[0]); - sqlite3_result_int64(context, iVal); - break; - } - case SQLITE_NULL: { - sqlite3_result_null(context); - break; - } - default: { - rVal = sqlite3_value_double(argv[0]); - sqlite3_result_int64(context, (i64) ceil(rVal)); - break; - } - } -} - -/* -** largest integer value not greater than argument -*/ -static void floorFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ - double rVal=0.0; - i64 iVal=0; - assert( argc==1 ); - switch( sqlite3_value_type(argv[0]) ){ - case SQLITE_INTEGER: { - i64 iVal = sqlite3_value_int64(argv[0]); - sqlite3_result_int64(context, iVal); - break; - } - case SQLITE_NULL: { - sqlite3_result_null(context); - break; - } - default: { - rVal = sqlite3_value_double(argv[0]); - sqlite3_result_int64(context, (i64) floor(rVal)); - break; - } - } -} - -/* -** Given a string (s) in the first argument and an integer (n) in the second returns the -** string that constains s contatenated n times -*/ -static void replicateFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ - unsigned char *z; /* input string */ - unsigned char *zo; /* result string */ - i64 iCount; /* times to repeat */ - i64 nLen; /* length of the input string (no multibyte considerations) */ - i64 nTLen; /* length of the result string (no multibyte considerations) */ - i64 i=0; - - if( argc!=2 || SQLITE_NULL==sqlite3_value_type(argv[0]) ) - return; - - iCount = sqlite3_value_int64(argv[1]); - - if( iCount<0 ){ - sqlite3_result_error(context, "domain error", -1); - }else{ - - nLen = sqlite3_value_bytes(argv[0]); - nTLen = nLen*iCount; - z=sqlite3_malloc(nTLen+1); - zo=sqlite3_malloc(nLen+1); - if (!z || !zo){ - sqlite3_result_error_nomem(context); - if (z) sqlite3_free(z); - if (zo) sqlite3_free(zo); - return; - } - strcpy((char*)zo, (char*)sqlite3_value_text(argv[0])); - - for(i=0; i=n it's a NOP -** padl(NULL) = NULL -*/ -static void padlFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ - i64 ilen; /* length to pad to */ - i64 zl; /* length of the input string (UTF-8 chars) */ - int i = 0; - const char *zi; /* input string */ - char *zo; /* output string */ - char *zt; - - assert( argc==2 ); - - if( sqlite3_value_type(argv[0]) == SQLITE_NULL ){ - sqlite3_result_null(context); - }else{ - zi = (char *)sqlite3_value_text(argv[0]); - ilen = sqlite3_value_int64(argv[1]); - /* check domain */ - if(ilen<0){ - sqlite3_result_error(context, "domain error", -1); - return; - } - zl = sqlite3Utf8CharLen(zi, -1); - if( zl>=ilen ){ - /* string is longer than the requested pad length, return the same string (dup it) */ - zo = sqlite3StrDup(zi); - if (!zo){ - sqlite3_result_error_nomem(context); - return; - } - sqlite3_result_text(context, zo, -1, SQLITE_TRANSIENT); - }else{ - zo = sqlite3_malloc(strlen(zi)+ilen-zl+1); - if (!zo){ - sqlite3_result_error_nomem(context); - return; - } - zt = zo; - for(i=1; i+zl<=ilen; ++i){ - *(zt++)=' '; - } - /* no need to take UTF-8 into consideration here */ - strcpy(zt,zi); - } - sqlite3_result_text(context, zo, -1, SQLITE_TRANSIENT); - sqlite3_free(zo); - } -} - -/* -** given an input string (s) and an integer (n) appends spaces at the end of s -** until it has a length of n characters. -** When s has a length >=n it's a NOP -** padl(NULL) = NULL -*/ -static void padrFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ - i64 ilen; /* length to pad to */ - i64 zl; /* length of the input string (UTF-8 chars) */ - i64 zll; /* length of the input string (bytes) */ - int i = 0; - const char *zi; /* input string */ - char *zo; /* output string */ - char *zt; - - assert( argc==2 ); - - if( sqlite3_value_type(argv[0]) == SQLITE_NULL ){ - sqlite3_result_null(context); - }else{ - zi = (char *)sqlite3_value_text(argv[0]); - ilen = sqlite3_value_int64(argv[1]); - /* check domain */ - if(ilen<0){ - sqlite3_result_error(context, "domain error", -1); - return; - } - zl = sqlite3Utf8CharLen(zi, -1); - if( zl>=ilen ){ - /* string is longer than the requested pad length, return the same string (dup it) */ - zo = sqlite3StrDup(zi); - if (!zo){ - sqlite3_result_error_nomem(context); - return; - } - sqlite3_result_text(context, zo, -1, SQLITE_TRANSIENT); - }else{ - zll = strlen(zi); - zo = sqlite3_malloc(zll+ilen-zl+1); - if (!zo){ - sqlite3_result_error_nomem(context); - return; - } - zt = strcpy(zo,zi)+zll; - for(i=1; i+zl<=ilen; ++i){ - *(zt++) = ' '; - } - *zt = '\0'; - } - sqlite3_result_text(context, zo, -1, SQLITE_TRANSIENT); - sqlite3_free(zo); - } -} - -/* -** given an input string (s) and an integer (n) appends spaces at the end of s -** and adds spaces at the begining of s until it has a length of n characters. -** Tries to add has many characters at the left as at the right. -** When s has a length >=n it's a NOP -** padl(NULL) = NULL -*/ -static void padcFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ - i64 ilen; /* length to pad to */ - i64 zl; /* length of the input string (UTF-8 chars) */ - i64 zll; /* length of the input string (bytes) */ - int i = 0; - const char *zi; /* input string */ - char *zo; /* output string */ - char *zt; - - assert( argc==2 ); - - if( sqlite3_value_type(argv[0]) == SQLITE_NULL ){ - sqlite3_result_null(context); - }else{ - zi = (char *)sqlite3_value_text(argv[0]); - ilen = sqlite3_value_int64(argv[1]); - /* check domain */ - if(ilen<0){ - sqlite3_result_error(context, "domain error", -1); - return; - } - zl = sqlite3Utf8CharLen(zi, -1); - if( zl>=ilen ){ - /* string is longer than the requested pad length, return the same string (dup it) */ - zo = sqlite3StrDup(zi); - if (!zo){ - sqlite3_result_error_nomem(context); - return; - } - sqlite3_result_text(context, zo, -1, SQLITE_TRANSIENT); - }else{ - zll = strlen(zi); - zo = sqlite3_malloc(zll+ilen-zl+1); - if (!zo){ - sqlite3_result_error_nomem(context); - return; - } - zt = zo; - for(i=1; 2*i+zl<=ilen; ++i){ - *(zt++) = ' '; - } - strcpy(zt, zi); - zt+=zll; - for(; i+zl<=ilen; ++i){ - *(zt++) = ' '; - } - *zt = '\0'; - } - sqlite3_result_text(context, zo, -1, SQLITE_TRANSIENT); - sqlite3_free(zo); - } -} - -/* -** given 2 string (s1,s2) returns the string s1 with the characters NOT in s2 removed -** assumes strings are UTF-8 encoded -*/ -static void strfilterFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ - const char *zi1; /* first parameter string (searched string) */ - const char *zi2; /* second parameter string (vcontains valid characters) */ - const char *z1; - const char *z21; - const char *z22; - char *zo; /* output string */ - char *zot; - int c1 = 0; - int c2 = 0; - - assert( argc==2 ); - - if( sqlite3_value_type(argv[0]) == SQLITE_NULL || sqlite3_value_type(argv[1]) == SQLITE_NULL ){ - sqlite3_result_null(context); - }else{ - zi1 = (char *)sqlite3_value_text(argv[0]); - zi2 = (char *)sqlite3_value_text(argv[1]); - /* - ** maybe I could allocate less, but that would imply 2 passes, rather waste - ** (possibly) some memory - */ - zo = sqlite3_malloc(strlen(zi1)+1); - if (!zo){ - sqlite3_result_error_nomem(context); - return; - } - zot = zo; - z1 = zi1; - while( (c1=sqliteCharVal((unsigned char *)z1))!=0 ){ - z21=zi2; - while( (c2=sqliteCharVal((unsigned char *)z21))!=0 && c2!=c1 ){ - sqliteNextChar(z21); - } - if( c2!=0){ - z22=z21; - sqliteNextChar(z22); - strncpy(zot, z21, z22-z21); - zot+=z22-z21; - } - sqliteNextChar(z1); - } - *zot = '\0'; - - sqlite3_result_text(context, zo, -1, SQLITE_TRANSIENT); - sqlite3_free(zo); - } -} - -/* -** Given a string z1, retutns the (0 based) index of it's first occurence -** in z2 after the first s characters. -** Returns -1 when there isn't a match. -** updates p to point to the character where the match occured. -** This is an auxiliary function. -*/ -static int _substr(const char* z1, const char* z2, int s, const char** p){ - int c = 0; - int rVal=-1; - const char* zt1; - const char* zt2; - int c1,c2; - - if( '\0'==*z1 ){ - return -1; - } - - while( (sqliteCharVal((unsigned char *)z2) != 0) && (c++)=0 ? rVal+s : rVal; -} - -/* -** given 2 input strings (s1,s2) and an integer (n) searches from the nth character -** for the string s1. Returns the position where the match occured. -** Characters are counted from 1. -** 0 is returned when no match occurs. -*/ - -static void charindexFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ - const u8 *z1; /* s1 string */ - u8 *z2; /* s2 string */ - int s=0; - int rVal=0; - - assert( argc==3 ||argc==2); - - if( SQLITE_NULL==sqlite3_value_type(argv[0]) || SQLITE_NULL==sqlite3_value_type(argv[1])){ - sqlite3_result_null(context); - return; - } - - z1 = sqlite3_value_text(argv[0]); - if( z1==0 ) return; - z2 = (u8*) sqlite3_value_text(argv[1]); - if(argc==3){ - s = sqlite3_value_int(argv[2])-1; - if(s<0){ - s=0; - } - }else{ - s = 0; - } - - rVal = _substr((char *)z1,(char *)z2,s,NULL); - sqlite3_result_int(context, rVal+1); -} - -/* -** given a string (s) and an integer (n) returns the n leftmost (UTF-8) characters -** if the string has a length<=n or is NULL this function is NOP -*/ -static void leftFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ - int c=0; - int cc=0; - int l=0; - const unsigned char *z; /* input string */ - const unsigned char *zt; - unsigned char *rz; /* output string */ - - assert( argc==2); - - if( SQLITE_NULL==sqlite3_value_type(argv[0]) || SQLITE_NULL==sqlite3_value_type(argv[1])){ - sqlite3_result_null(context); - return; - } - - z = sqlite3_value_text(argv[0]); - l = sqlite3_value_int(argv[1]); - zt = z; - - while( sqliteCharVal(zt) && c++ 0 ){ - sqliteNextChar(zt); - } - - rz = sqlite3_malloc(ze-zt+1); - if (!rz){ - sqlite3_result_error_nomem(context); - return; - } - strcpy((char*) rz, (char*) (zt)); - sqlite3_result_text(context, (char*)rz, -1, SQLITE_TRANSIENT); - sqlite3_free(rz); -} - -#ifndef HAVE_TRIM -/* -** removes the whitespaces at the begining of a string. -*/ -const char* ltrim(const char* s){ - while( *s==' ' ) - ++s; - return s; -} - -/* -** removes the whitespaces at the end of a string. -** !mutates the input string! -*/ -void rtrim(char* s){ - char* ss = s+strlen(s)-1; - while( ss>=s && *ss==' ' ) - --ss; - *(ss+1)='\0'; -} - -/* -** Removes the whitespace at the begining of a string -*/ -static void ltrimFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ - const char *z; - - assert( argc==1); - - if( SQLITE_NULL==sqlite3_value_type(argv[0]) ){ - sqlite3_result_null(context); - return; - } - z = sqlite3_value_text(argv[0]); - sqlite3_result_text(context, ltrim(z), -1, SQLITE_TRANSIENT); -} - -/* -** Removes the whitespace at the end of a string -*/ -static void rtrimFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ - const char *z; - char *rz; - /* try not to change data in argv */ - - assert( argc==1); - - if( SQLITE_NULL==sqlite3_value_type(argv[0]) ){ - sqlite3_result_null(context); - return; - } - z = sqlite3_value_text(argv[0]); - rz = sqlite3StrDup(z); - rtrim(rz); - sqlite3_result_text(context, rz, -1, SQLITE_TRANSIENT); - sqlite3_free(rz); -} - -/* -** Removes the whitespace at the begining and end of a string -*/ -static void trimFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ - const char *z; - char *rz; - /* try not to change data in argv */ - - assert( argc==1); - - if( SQLITE_NULL==sqlite3_value_type(argv[0]) ){ - sqlite3_result_null(context); - return; - } - z = sqlite3_value_text(argv[0]); - rz = sqlite3StrDup(z); - rtrim(rz); - sqlite3_result_text(context, ltrim(rz), -1, SQLITE_TRANSIENT); - sqlite3_free(rz); -} -#endif - -/* -** given a pointer to a string s1, the length of that string (l1), a new string (s2) -** and it's length (l2) appends s2 to s1. -** All lengths in bytes. -** This is just an auxiliary function -*/ -// static void _append(char **s1, int l1, const char *s2, int l2){ -// *s1 = realloc(*s1, (l1+l2+1)*sizeof(char)); -// strncpy((*s1)+l1, s2, l2); -// *(*(s1)+l1+l2) = '\0'; -// } - -#ifndef HAVE_TRIM - -/* -** given strings s, s1 and s2 replaces occurrences of s1 in s by s2 -*/ -static void replaceFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ - const char *z1; /* string s (first parameter) */ - const char *z2; /* string s1 (second parameter) string to look for */ - const char *z3; /* string s2 (third parameter) string to replace occurrences of s1 with */ - int lz1; - int lz2; - int lz3; - int lzo=0; - char *zo=0; - int ret=0; - const char *zt1; - const char *zt2; - - assert( 3==argc ); - - if( SQLITE_NULL==sqlite3_value_type(argv[0]) ){ - sqlite3_result_null(context); - return; - } - - z1 = sqlite3_value_text(argv[0]); - z2 = sqlite3_value_text(argv[1]); - z3 = sqlite3_value_text(argv[2]); - /* handle possible null values */ - if( 0==z2 ){ - z2=""; - } - if( 0==z3 ){ - z3=""; - } - - lz1 = strlen(z1); - lz2 = strlen(z2); - lz3 = strlen(z3); - -#if 0 - /* special case when z2 is empty (or null) nothing will be changed */ - if( 0==lz2 ){ - sqlite3_result_text(context, z1, -1, SQLITE_TRANSIENT); - return; - } -#endif - - zt1=z1; - zt2=z1; - - while(1){ - ret=_substr(z2,zt1 , 0, &zt2); - - if( ret<0 ) - break; - - _append(&zo, lzo, zt1, zt2-zt1); - lzo+=zt2-zt1; - _append(&zo, lzo, z3, lz3); - lzo+=lz3; - - zt1=zt2+lz2; - } - _append(&zo, lzo, zt1, lz1-(zt1-z1)); - sqlite3_result_text(context, zo, -1, SQLITE_TRANSIENT); - sqlite3_free(zo); -} -#endif - -/* -** given a string returns the same string but with the characters in reverse order -*/ -static void reverseFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ - const char *z; - const char *zt; - char *rz; - char *rzt; - int l = 0; - int i = 0; - - assert( 1==argc ); - - if( SQLITE_NULL==sqlite3_value_type(argv[0]) ){ - sqlite3_result_null(context); - return; - } - z = (char *)sqlite3_value_text(argv[0]); - l = strlen(z); - rz = sqlite3_malloc(l+1); - if (!rz){ - sqlite3_result_error_nomem(context); - return; - } - rzt = rz+l; - *(rzt--) = '\0'; - - zt=z; - while( sqliteCharVal((unsigned char *)zt)!=0 ){ - z=zt; - sqliteNextChar(zt); - for(i=1; zt-i>=z; ++i){ - *(rzt--)=*(zt-i); - } - } - - sqlite3_result_text(context, rz, -1, SQLITE_TRANSIENT); - sqlite3_free(rz); -} - -/* -** An instance of the following structure holds the context of a -** stdev() or variance() aggregate computation. -** implementaion of http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Algorithm_II -** less prone to rounding errors -*/ -typedef struct StdevCtx StdevCtx; -struct StdevCtx { - double rM; - double rS; - i64 cnt; /* number of elements */ -}; - -/* -** An instance of the following structure holds the context of a -** mode() or median() aggregate computation. -** Depends on structures defined in map.c (see map & map) -** These aggregate functions only work for integers and floats although -** they could be made to work for strings. This is usually considered meaningless. -** Only usuall order (for median), no use of collation functions (would this even make sense?) -*/ -typedef struct ModeCtx ModeCtx; -struct ModeCtx { - i64 riM; /* integer value found so far */ - double rdM; /* double value found so far */ - i64 cnt; /* number of elements so far */ - double pcnt; /* number of elements smaller than a percentile */ - i64 mcnt; /* maximum number of occurrences (for mode) */ - i64 mn; /* number of occurrences (for mode and percentiles) */ - i64 is_double; /* whether the computation is being done for doubles (>0) or integers (=0) */ - map* m; /* map structure used for the computation */ - int done; /* whether the answer has been found */ -}; - -/* -** called for each value received during a calculation of stdev or variance -*/ -static void varianceStep(sqlite3_context *context, int argc, sqlite3_value **argv){ - StdevCtx *p; - - double delta; - double x; - - assert( argc==1 ); - p = sqlite3_aggregate_context(context, sizeof(*p)); - /* only consider non-null values */ - if( SQLITE_NULL != sqlite3_value_numeric_type(argv[0]) ){ - p->cnt++; - x = sqlite3_value_double(argv[0]); - delta = (x-p->rM); - p->rM += delta/p->cnt; - p->rS += delta*(x-p->rM); - } -} - -/* -** called for each value received during a calculation of mode of median -*/ -static void modeStep(sqlite3_context *context, int argc, sqlite3_value **argv){ - ModeCtx *p; - i64 xi=0; - double xd=0.0; - i64 *iptr; - double *dptr; - int type; - - assert( argc==1 ); - type = sqlite3_value_numeric_type(argv[0]); - - if( type == SQLITE_NULL) - return; - - p = sqlite3_aggregate_context(context, sizeof(*p)); - - if( 0==(p->m) ){ - p->m = calloc(1, sizeof(map)); - if( type==SQLITE_INTEGER ){ - /* map will be used for integers */ - *(p->m) = map_make(int_cmp); - p->is_double = 0; - }else{ - p->is_double = 1; - /* map will be used for doubles */ - *(p->m) = map_make(double_cmp); - } - } - - ++(p->cnt); - - if( 0==p->is_double ){ - xi = sqlite3_value_int64(argv[0]); - iptr = (i64*)calloc(1,sizeof(i64)); - *iptr = xi; - map_insert(p->m, iptr); - }else{ - xd = sqlite3_value_double(argv[0]); - dptr = (double*)calloc(1,sizeof(double)); - *dptr = xd; - map_insert(p->m, dptr); - } -} - -/* -** Auxiliary function that iterates all elements in a map and finds the mode -** (most frequent value) -*/ -static void modeIterate(void* e, i64 c, void* pp){ - i64 ei; - double ed; - ModeCtx *p = (ModeCtx*)pp; - - if( 0==p->is_double ){ - ei = *(int*)(e); - - if( p->mcnt==c ){ - ++p->mn; - }else if( p->mcntriM = ei; - p->mcnt = c; - p->mn=1; - } - }else{ - ed = *(double*)(e); - - if( p->mcnt==c ){ - ++p->mn; - }else if(p->mcntrdM = ed; - p->mcnt = c; - p->mn=1; - } - } -} - -/* -** Auxiliary function that iterates all elements in a map and finds the median -** (the value such that the number of elements smaller is equal the the number of -** elements larger) -*/ -static void medianIterate(void* e, i64 c, void* pp){ - i64 ei; - double ed; - double iL; - double iR; - int il; - int ir; - ModeCtx *p = (ModeCtx*)pp; - - if(p->done>0) - return; - - iL = p->pcnt; - iR = p->cnt - p->pcnt; - il = p->mcnt + c; - ir = p->cnt - p->mcnt; - - if( il >= iL ){ - if( ir >= iR ){ - ++p->mn; - if( 0==p->is_double ){ - ei = *(int*)(e); - p->riM += ei; - }else{ - ed = *(double*)(e); - p->rdM += ed; - } - }else{ - p->done=1; - } - } - p->mcnt+=c; -} - -/* -** Returns the mode value -*/ -static void modeFinalize(sqlite3_context *context){ - ModeCtx *p; - p = sqlite3_aggregate_context(context, 0); - if( p && p->m ){ - map_iterate(p->m, modeIterate, p); - map_destroy(p->m); - free(p->m); - - if( 1==p->mn ){ - if( 0==p->is_double ) - sqlite3_result_int64(context, p->riM); - else - sqlite3_result_double(context, p->rdM); - } - } -} - -/* -** auxiliary function for percentiles -*/ -static void _medianFinalize(sqlite3_context *context){ - ModeCtx *p; - p = (ModeCtx*) sqlite3_aggregate_context(context, 0); - if( p && p->m ){ - p->done=0; - map_iterate(p->m, medianIterate, p); - map_destroy(p->m); - free(p->m); - - if( 0==p->is_double ) - if( 1==p->mn ) - sqlite3_result_int64(context, p->riM); - else - sqlite3_result_double(context, p->riM*1.0/p->mn); - else - sqlite3_result_double(context, p->rdM/p->mn); - } -} - -/* -** Returns the median value -*/ -static void medianFinalize(sqlite3_context *context){ - ModeCtx *p; - p = (ModeCtx*) sqlite3_aggregate_context(context, 0); - if( p!=0 ){ - p->pcnt = (p->cnt)/2.0; - _medianFinalize(context); - } -} - -/* -** Returns the lower_quartile value -*/ -static void lower_quartileFinalize(sqlite3_context *context){ - ModeCtx *p; - p = (ModeCtx*) sqlite3_aggregate_context(context, 0); - if( p!=0 ){ - p->pcnt = (p->cnt)/4.0; - _medianFinalize(context); - } -} - -/* -** Returns the upper_quartile value -*/ -static void upper_quartileFinalize(sqlite3_context *context){ - ModeCtx *p; - p = (ModeCtx*) sqlite3_aggregate_context(context, 0); - if( p!=0 ){ - p->pcnt = (p->cnt)*3/4.0; - _medianFinalize(context); - } -} - -/* -** Returns the stdev value -*/ -static void stdevFinalize(sqlite3_context *context){ - StdevCtx *p; - p = sqlite3_aggregate_context(context, 0); - if( p && p->cnt>1 ){ - sqlite3_result_double(context, sqrt(p->rS/(p->cnt-1))); - }else{ - sqlite3_result_double(context, 0.0); - } -} - -/* -** Returns the variance value -*/ -static void varianceFinalize(sqlite3_context *context){ - StdevCtx *p; - p = sqlite3_aggregate_context(context, 0); - if( p && p->cnt>1 ){ - sqlite3_result_double(context, p->rS/(p->cnt-1)); - }else{ - sqlite3_result_double(context, 0.0); - } -} - -#ifdef SQLITE_SOUNDEX - -/* relicoder factored code */ -/* -** Calculates the soundex value of a string -*/ - -static void soundex(const u8 *zIn, char *zResult){ - int i, j; - static const unsigned char iCode[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 2, 3, 0, 1, 2, 0, 0, 2, 2, 4, 5, 5, 0, - 1, 2, 6, 2, 3, 0, 1, 0, 2, 0, 2, 0, 0, 0, 0, 0, - 0, 0, 1, 2, 3, 0, 1, 2, 0, 0, 2, 2, 4, 5, 5, 0, - 1, 2, 6, 2, 3, 0, 1, 0, 2, 0, 2, 0, 0, 0, 0, 0, - }; - - for(i=0; zIn[i] && !isalpha(zIn[i]); i++){} - if( zIn[i] ){ - zResult[0] = toupper(zIn[i]); - for(j=1; j<4 && zIn[i]; i++){ - int code = iCode[zIn[i]&0x7f]; - if( code>0 ){ - zResult[j++] = code + '0'; - } - } - while( j<4 ){ - zResult[j++] = '0'; - } - zResult[j] = 0; - }else{ - strcpy(zResult, "?000"); - } -} - -/* -** computes the number of different characters between the soundex value fo 2 strings -*/ -static void differenceFunc(sqlite3_context *context, int argc, sqlite3_value **argv){ - char zResult1[8]; - char zResult2[8]; - char *zR1 = zResult1; - char *zR2 = zResult2; - int rVal = 0; - int i = 0; - const u8 *zIn1; - const u8 *zIn2; - - assert( argc==2 ); - - if( sqlite3_value_type(argv[0])==SQLITE_NULL || sqlite3_value_type(argv[1])==SQLITE_NULL ){ - sqlite3_result_null(context); - return; - } - - zIn1 = (u8*)sqlite3_value_text(argv[0]); - zIn2 = (u8*)sqlite3_value_text(argv[1]); - - soundex(zIn1, zR1); - soundex(zIn2, zR2); - - for(i=0; i<4; ++i){ - if( sqliteCharVal((unsigned char *)zR1)==sqliteCharVal((unsigned char *)zR2) ) - ++rVal; - sqliteNextChar(zR1); - sqliteNextChar(zR2); - } - sqlite3_result_int(context, rVal); -} -#endif - -/* -** This function registered all of the above C functions as SQL -** functions. This should be the only routine in this file with -** external linkage. -*/ -int RegisterExtensionFunctions(sqlite3 *db){ - static const struct FuncDef { - char *zName; - signed char nArg; - u8 argType; /* 0: none. 1: db 2: (-1) */ - u8 eTextRep; /* 1: UTF-16. 0: UTF-8 */ - u8 needCollSeq; - void (*xFunc)(sqlite3_context*,int,sqlite3_value **); - } aFuncs[] = { - /* math.h */ - { "acos", 1, 0, SQLITE_UTF8, 0, acosFunc }, - { "asin", 1, 0, SQLITE_UTF8, 0, asinFunc }, - { "atan", 1, 0, SQLITE_UTF8, 0, atanFunc }, - { "atn2", 2, 0, SQLITE_UTF8, 0, atn2Func }, - /* XXX alias */ - { "atan2", 2, 0, SQLITE_UTF8, 0, atn2Func }, - { "acosh", 1, 0, SQLITE_UTF8, 0, acoshFunc }, - { "asinh", 1, 0, SQLITE_UTF8, 0, asinhFunc }, - { "atanh", 1, 0, SQLITE_UTF8, 0, atanhFunc }, - - { "difference", 2, 0, SQLITE_UTF8, 0, differenceFunc}, - { "degrees", 1, 0, SQLITE_UTF8, 0, rad2degFunc }, - { "radians", 1, 0, SQLITE_UTF8, 0, deg2radFunc }, - - { "cos", 1, 0, SQLITE_UTF8, 0, cosFunc }, - { "sin", 1, 0, SQLITE_UTF8, 0, sinFunc }, - { "tan", 1, 0, SQLITE_UTF8, 0, tanFunc }, - { "cot", 1, 0, SQLITE_UTF8, 0, cotFunc }, - { "cosh", 1, 0, SQLITE_UTF8, 0, coshFunc }, - { "sinh", 1, 0, SQLITE_UTF8, 0, sinhFunc }, - { "tanh", 1, 0, SQLITE_UTF8, 0, tanhFunc }, - { "coth", 1, 0, SQLITE_UTF8, 0, cothFunc }, - - { "exp", 1, 0, SQLITE_UTF8, 0, expFunc }, - { "log", 1, 0, SQLITE_UTF8, 0, logFunc }, - { "log10", 1, 0, SQLITE_UTF8, 0, log10Func }, - { "power", 2, 0, SQLITE_UTF8, 0, powerFunc }, - { "sign", 1, 0, SQLITE_UTF8, 0, signFunc }, - { "sqrt", 1, 0, SQLITE_UTF8, 0, sqrtFunc }, - { "square", 1, 0, SQLITE_UTF8, 0, squareFunc }, - - { "ceil", 1, 0, SQLITE_UTF8, 0, ceilFunc }, - { "floor", 1, 0, SQLITE_UTF8, 0, floorFunc }, - - { "pi", 0, 0, SQLITE_UTF8, 1, piFunc }, - - - /* string */ - { "replicate", 2, 0, SQLITE_UTF8, 0, replicateFunc }, - { "charindex", 2, 0, SQLITE_UTF8, 0, charindexFunc }, - { "charindex", 3, 0, SQLITE_UTF8, 0, charindexFunc }, - { "leftstr", 2, 0, SQLITE_UTF8, 0, leftFunc }, - { "rightstr", 2, 0, SQLITE_UTF8, 0, rightFunc }, -#ifndef HAVE_TRIM - { "ltrim", 1, 0, SQLITE_UTF8, 0, ltrimFunc }, - { "rtrim", 1, 0, SQLITE_UTF8, 0, rtrimFunc }, - { "trim", 1, 0, SQLITE_UTF8, 0, trimFunc }, - { "replace", 3, 0, SQLITE_UTF8, 0, replaceFunc }, -#endif - { "reverse", 1, 0, SQLITE_UTF8, 0, reverseFunc }, - { "proper", 1, 0, SQLITE_UTF8, 0, properFunc }, - { "padl", 2, 0, SQLITE_UTF8, 0, padlFunc }, - { "padr", 2, 0, SQLITE_UTF8, 0, padrFunc }, - { "padc", 2, 0, SQLITE_UTF8, 0, padcFunc }, - { "strfilter", 2, 0, SQLITE_UTF8, 0, strfilterFunc }, - - }; - /* Aggregate functions */ - static const struct FuncDefAgg { - char *zName; - signed char nArg; - u8 argType; - u8 needCollSeq; - void (*xStep)(sqlite3_context*,int,sqlite3_value**); - void (*xFinalize)(sqlite3_context*); - } aAggs[] = { - { "stdev", 1, 0, 0, varianceStep, stdevFinalize }, - { "variance", 1, 0, 0, varianceStep, varianceFinalize }, - { "mode", 1, 0, 0, modeStep, modeFinalize }, - { "median", 1, 0, 0, modeStep, medianFinalize }, - { "lower_quartile", 1, 0, 0, modeStep, lower_quartileFinalize }, - { "upper_quartile", 1, 0, 0, modeStep, upper_quartileFinalize }, - }; - int i; - - for(i=0; ineedCollSeq = 1; - } - } -#endif - } - - for(i=0; ineedCollSeq = 1; - } - } -#endif - } - return 0; -} - -#ifdef COMPILE_SQLITE_EXTENSIONS_AS_LOADABLE_MODULE -int sqlite3_extension_init( - sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi){ - SQLITE_EXTENSION_INIT2(pApi); - RegisterExtensionFunctions(db); - return 0; -} -#endif /* COMPILE_SQLITE_EXTENSIONS_AS_LOADABLE_MODULE */ - -map map_make(cmp_func cmp){ - map r; - r.cmp=cmp; - r.base = 0; - - return r; -} - -void* xcalloc(size_t nmemb, size_t size, char* s){ - void* ret = calloc(nmemb, size); - return ret; -} - -void xfree(void* p){ - free(p); -} - -void node_insert(node** n, cmp_func cmp, void *e){ - int c; - node* nn; - if(*n==0){ - nn = (node*)xcalloc(1,sizeof(node), "for node"); - nn->data = e; - nn->count = 1; - *n=nn; - }else{ - c=cmp((*n)->data,e); - if(0==c){ - ++((*n)->count); - xfree(e); - }else if(c>0){ - /* put it right here */ - node_insert(&((*n)->l), cmp, e); - }else{ - node_insert(&((*n)->r), cmp, e); - } - } -} - -void map_insert(map *m, void *e){ - node_insert(&(m->base), m->cmp, e); -} - -void node_iterate(node *n, map_iterator iter, void* p){ - if(n){ - if(n->l) - node_iterate(n->l, iter, p); - iter(n->data, n->count, p); - if(n->r) - node_iterate(n->r, iter, p); - } -} - -void map_iterate(map *m, map_iterator iter, void* p){ - node_iterate(m->base, iter, p); -} - -void node_destroy(node *n){ - if(0!=n){ - xfree(n->data); - if(n->l) - node_destroy(n->l); - if(n->r) - node_destroy(n->r); - - xfree(n); - } -} - -void map_destroy(map *m){ - node_destroy(m->base); -} - -int int_cmp(const void *a, const void *b){ - int64_t aa = *(int64_t *)(a); - int64_t bb = *(int64_t *)(b); - /* printf("cmp %d <=> %d\n",aa,bb); */ - if(aa==bb) - return 0; - else if(aa %d\n",aa,bb); */ - if(aa==bb) - return 0; - else if(aa %lld\n", ee,c); -} - diff --git a/vendor/pyLibrary/vendor/sqlite/libsqlitefunctions.so b/vendor/pyLibrary/vendor/sqlite/libsqlitefunctions.so deleted file mode 100644 index 6582ca0..0000000 Binary files a/vendor/pyLibrary/vendor/sqlite/libsqlitefunctions.so and /dev/null differ diff --git a/vendor/pyLibrary/vendor/sqlite/sqlite3.def b/vendor/pyLibrary/vendor/sqlite/sqlite3.def deleted file mode 100644 index 0ab28af..0000000 --- a/vendor/pyLibrary/vendor/sqlite/sqlite3.def +++ /dev/null @@ -1,237 +0,0 @@ -EXPORTS -sqlite3_aggregate_context -sqlite3_aggregate_count -sqlite3_auto_extension -sqlite3_backup_finish -sqlite3_backup_init -sqlite3_backup_pagecount -sqlite3_backup_remaining -sqlite3_backup_step -sqlite3_bind_blob -sqlite3_bind_blob64 -sqlite3_bind_double -sqlite3_bind_int -sqlite3_bind_int64 -sqlite3_bind_null -sqlite3_bind_parameter_count -sqlite3_bind_parameter_index -sqlite3_bind_parameter_name -sqlite3_bind_text -sqlite3_bind_text16 -sqlite3_bind_text64 -sqlite3_bind_value -sqlite3_bind_zeroblob -sqlite3_bind_zeroblob64 -sqlite3_blob_bytes -sqlite3_blob_close -sqlite3_blob_open -sqlite3_blob_read -sqlite3_blob_reopen -sqlite3_blob_write -sqlite3_busy_handler -sqlite3_busy_timeout -sqlite3_cancel_auto_extension -sqlite3_changes -sqlite3_clear_bindings -sqlite3_close -sqlite3_close_v2 -sqlite3_collation_needed -sqlite3_collation_needed16 -sqlite3_column_blob -sqlite3_column_bytes -sqlite3_column_bytes16 -sqlite3_column_count -sqlite3_column_database_name -sqlite3_column_database_name16 -sqlite3_column_decltype -sqlite3_column_decltype16 -sqlite3_column_double -sqlite3_column_int -sqlite3_column_int64 -sqlite3_column_name -sqlite3_column_name16 -sqlite3_column_origin_name -sqlite3_column_origin_name16 -sqlite3_column_table_name -sqlite3_column_table_name16 -sqlite3_column_text -sqlite3_column_text16 -sqlite3_column_type -sqlite3_column_value -sqlite3_commit_hook -sqlite3_compileoption_get -sqlite3_compileoption_used -sqlite3_complete -sqlite3_complete16 -sqlite3_config -sqlite3_context_db_handle -sqlite3_create_collation -sqlite3_create_collation_v2 -sqlite3_create_collation16 -sqlite3_create_function -sqlite3_create_function_v2 -sqlite3_create_function16 -sqlite3_create_module -sqlite3_create_module_v2 -sqlite3_data_count -sqlite3_data_directory -sqlite3_db_cacheflush -sqlite3_db_config -sqlite3_db_filename -sqlite3_db_handle -sqlite3_db_mutex -sqlite3_db_readonly -sqlite3_db_release_memory -sqlite3_db_status -sqlite3_declare_vtab -sqlite3_enable_load_extension -sqlite3_enable_shared_cache -sqlite3_errcode -sqlite3_errmsg -sqlite3_errmsg16 -sqlite3_errstr -sqlite3_exec -sqlite3_expired -sqlite3_extended_errcode -sqlite3_extended_result_codes -sqlite3_file_control -sqlite3_finalize -sqlite3_free -sqlite3_free_table -sqlite3_fts5_may_be_corrupt -sqlite3_get_autocommit -sqlite3_get_auxdata -sqlite3_get_table -sqlite3_global_recover -sqlite3_initialize -sqlite3_interrupt -sqlite3_last_insert_rowid -sqlite3_libversion -sqlite3_libversion_number -sqlite3_limit -sqlite3_load_extension -sqlite3_log -sqlite3_malloc -sqlite3_malloc64 -sqlite3_memory_alarm -sqlite3_memory_highwater -sqlite3_memory_used -sqlite3_mprintf -sqlite3_msize -sqlite3_mutex_alloc -sqlite3_mutex_enter -sqlite3_mutex_free -sqlite3_mutex_leave -sqlite3_mutex_try -sqlite3_next_stmt -sqlite3_open -sqlite3_open_v2 -sqlite3_open16 -sqlite3_os_end -sqlite3_os_init -sqlite3_overload_function -sqlite3_prepare -sqlite3_prepare_v2 -sqlite3_prepare16 -sqlite3_prepare16_v2 -sqlite3_profile -sqlite3_progress_handler -sqlite3_randomness -sqlite3_realloc -sqlite3_realloc64 -sqlite3_release_memory -sqlite3_reset -sqlite3_reset_auto_extension -sqlite3_result_blob -sqlite3_result_blob64 -sqlite3_result_double -sqlite3_result_error -sqlite3_result_error_code -sqlite3_result_error_nomem -sqlite3_result_error_toobig -sqlite3_result_error16 -sqlite3_result_int -sqlite3_result_int64 -sqlite3_result_null -sqlite3_result_subtype -sqlite3_result_text -sqlite3_result_text16 -sqlite3_result_text16be -sqlite3_result_text16le -sqlite3_result_text64 -sqlite3_result_value -sqlite3_result_zeroblob -sqlite3_result_zeroblob64 -sqlite3_rollback_hook -sqlite3_rtree_geometry_callback -sqlite3_rtree_query_callback -sqlite3_set_authorizer -sqlite3_set_auxdata -sqlite3_shutdown -sqlite3_sleep -sqlite3_snprintf -sqlite3_soft_heap_limit -sqlite3_soft_heap_limit64 -sqlite3_sourceid -sqlite3_sql -sqlite3_status -sqlite3_status64 -sqlite3_step -sqlite3_stmt_busy -sqlite3_stmt_readonly -sqlite3_stmt_status -sqlite3_strglob -sqlite3_stricmp -sqlite3_strlike -sqlite3_strnicmp -sqlite3_system_errno -sqlite3_table_column_metadata -sqlite3_temp_directory -sqlite3_test_control -sqlite3_thread_cleanup -sqlite3_threadsafe -sqlite3_total_changes -sqlite3_trace -sqlite3_transfer_bindings -sqlite3_update_hook -sqlite3_uri_boolean -sqlite3_uri_int64 -sqlite3_uri_parameter -sqlite3_user_data -sqlite3_value_blob -sqlite3_value_bytes -sqlite3_value_bytes16 -sqlite3_value_double -sqlite3_value_dup -sqlite3_value_free -sqlite3_value_int -sqlite3_value_int64 -sqlite3_value_numeric_type -sqlite3_value_subtype -sqlite3_value_text -sqlite3_value_text16 -sqlite3_value_text16be -sqlite3_value_text16le -sqlite3_value_type -sqlite3_version -sqlite3_vfs_find -sqlite3_vfs_register -sqlite3_vfs_unregister -sqlite3_vmprintf -sqlite3_vsnprintf -sqlite3_vtab_config -sqlite3_vtab_on_conflict -sqlite3_wal_autocheckpoint -sqlite3_wal_checkpoint -sqlite3_wal_checkpoint_v2 -sqlite3_wal_hook -sqlite3_win32_is_nt -sqlite3_win32_mbcs_to_utf8 -sqlite3_win32_mbcs_to_utf8_v2 -sqlite3_win32_set_directory -sqlite3_win32_sleep -sqlite3_win32_unicode_to_utf8 -sqlite3_win32_utf8_to_mbcs -sqlite3_win32_utf8_to_mbcs_v2 -sqlite3_win32_utf8_to_unicode -sqlite3_win32_write_debug diff --git a/vendor/pyLibrary/vendor/sqlite/sqlite3.dll b/vendor/pyLibrary/vendor/sqlite/sqlite3.dll deleted file mode 100644 index 9ef02de..0000000 Binary files a/vendor/pyLibrary/vendor/sqlite/sqlite3.dll and /dev/null differ diff --git a/vendor/pyLibrary/vendor/sqlite/sqlite3.h b/vendor/pyLibrary/vendor/sqlite/sqlite3.h deleted file mode 100644 index 313b5ec..0000000 --- a/vendor/pyLibrary/vendor/sqlite/sqlite3.h +++ /dev/null @@ -1,10188 +0,0 @@ -/* -** 2001 September 15 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This header file defines the interface that the SQLite library -** presents to client programs. If a C-function, structure, datatype, -** or constant definition does not appear in this file, then it is -** not a published API of SQLite, is subject to change without -** notice, and should not be referenced by programs that use SQLite. -** -** Some of the definitions that are in this file are marked as -** "experimental". Experimental interfaces are normally new -** features recently added to SQLite. We do not anticipate changes -** to experimental interfaces but reserve the right to make minor changes -** if experience from use "in the wild" suggest such changes are prudent. -** -** The official C-language API documentation for SQLite is derived -** from comments in this file. This file is the authoritative source -** on how SQLite interfaces are supposed to operate. -** -** The name of this file under configuration management is "sqlite.h.in". -** The makefile makes some minor changes to this file (such as inserting -** the version number) and changes its name to "sqlite3.h" as -** part of the build process. -*/ -#ifndef _SQLITE3_H_ -#define _SQLITE3_H_ -#include /* Needed for the definition of va_list */ - -/* -** Make sure we can call this stuff from C++. -*/ -#ifdef __cplusplus -extern "C" { -#endif - - -/* -** Provide the ability to override linkage features of the interface. -*/ -#ifndef SQLITE_EXTERN -# define SQLITE_EXTERN extern -#endif -#ifndef SQLITE_API -# define SQLITE_API -#endif -#ifndef SQLITE_CDECL -# define SQLITE_CDECL -#endif -#ifndef SQLITE_STDCALL -# define SQLITE_STDCALL -#endif - -/* -** These no-op macros are used in front of interfaces to mark those -** interfaces as either deprecated or experimental. New applications -** should not use deprecated interfaces - they are supported for backwards -** compatibility only. Application writers should be aware that -** experimental interfaces are subject to change in point releases. -** -** These macros used to resolve to various kinds of compiler magic that -** would generate warning messages when they were used. But that -** compiler magic ended up generating such a flurry of bug reports -** that we have taken it all out and gone back to using simple -** noop macros. -*/ -#define SQLITE_DEPRECATED -#define SQLITE_EXPERIMENTAL - -/* -** Ensure these symbols were not defined by some previous header file. -*/ -#ifdef SQLITE_VERSION -# undef SQLITE_VERSION -#endif -#ifdef SQLITE_VERSION_NUMBER -# undef SQLITE_VERSION_NUMBER -#endif - -/* -** CAPI3REF: Compile-Time Library Version Numbers -** -** ^(The [SQLITE_VERSION] C preprocessor macro in the sqlite3.h header -** evaluates to a string literal that is the SQLite version in the -** format "X.Y.Z" where X is the major version number (always 3 for -** SQLite3) and Y is the minor version number and Z is the release number.)^ -** ^(The [SQLITE_VERSION_NUMBER] C preprocessor macro resolves to an integer -** with the value (X*1000000 + Y*1000 + Z) where X, Y, and Z are the same -** numbers used in [SQLITE_VERSION].)^ -** The SQLITE_VERSION_NUMBER for any given release of SQLite will also -** be larger than the release from which it is derived. Either Y will -** be held constant and Z will be incremented or else Y will be incremented -** and Z will be reset to zero. -** -** Since version 3.6.18, SQLite source code has been stored in the -** Fossil configuration management -** system. ^The SQLITE_SOURCE_ID macro evaluates to -** a string which identifies a particular check-in of SQLite -** within its configuration management system. ^The SQLITE_SOURCE_ID -** string contains the date and time of the check-in (UTC) and an SHA1 -** hash of the entire source tree. -** -** See also: [sqlite3_libversion()], -** [sqlite3_libversion_number()], [sqlite3_sourceid()], -** [sqlite_version()] and [sqlite_source_id()]. -*/ -#define SQLITE_VERSION "3.13.0" -#define SQLITE_VERSION_NUMBER 3013000 -#define SQLITE_SOURCE_ID "2016-05-18 10:57:30 fc49f556e48970561d7ab6a2f24fdd7d9eb81ff2" - -/* -** CAPI3REF: Run-Time Library Version Numbers -** KEYWORDS: sqlite3_version, sqlite3_sourceid -** -** These interfaces provide the same information as the [SQLITE_VERSION], -** [SQLITE_VERSION_NUMBER], and [SQLITE_SOURCE_ID] C preprocessor macros -** but are associated with the library instead of the header file. ^(Cautious -** programmers might include assert() statements in their application to -** verify that values returned by these interfaces match the macros in -** the header, and thus ensure that the application is -** compiled with matching library and header files. -** -**
-** assert( sqlite3_libversion_number()==SQLITE_VERSION_NUMBER );
-** assert( strcmp(sqlite3_sourceid(),SQLITE_SOURCE_ID)==0 );
-** assert( strcmp(sqlite3_libversion(),SQLITE_VERSION)==0 );
-** 
)^ -** -** ^The sqlite3_version[] string constant contains the text of [SQLITE_VERSION] -** macro. ^The sqlite3_libversion() function returns a pointer to the -** to the sqlite3_version[] string constant. The sqlite3_libversion() -** function is provided for use in DLLs since DLL users usually do not have -** direct access to string constants within the DLL. ^The -** sqlite3_libversion_number() function returns an integer equal to -** [SQLITE_VERSION_NUMBER]. ^The sqlite3_sourceid() function returns -** a pointer to a string constant whose value is the same as the -** [SQLITE_SOURCE_ID] C preprocessor macro. -** -** See also: [sqlite_version()] and [sqlite_source_id()]. -*/ -SQLITE_API SQLITE_EXTERN const char sqlite3_version[]; -SQLITE_API const char *SQLITE_STDCALL sqlite3_libversion(void); -SQLITE_API const char *SQLITE_STDCALL sqlite3_sourceid(void); -SQLITE_API int SQLITE_STDCALL sqlite3_libversion_number(void); - -/* -** CAPI3REF: Run-Time Library Compilation Options Diagnostics -** -** ^The sqlite3_compileoption_used() function returns 0 or 1 -** indicating whether the specified option was defined at -** compile time. ^The SQLITE_ prefix may be omitted from the -** option name passed to sqlite3_compileoption_used(). -** -** ^The sqlite3_compileoption_get() function allows iterating -** over the list of options that were defined at compile time by -** returning the N-th compile time option string. ^If N is out of range, -** sqlite3_compileoption_get() returns a NULL pointer. ^The SQLITE_ -** prefix is omitted from any strings returned by -** sqlite3_compileoption_get(). -** -** ^Support for the diagnostic functions sqlite3_compileoption_used() -** and sqlite3_compileoption_get() may be omitted by specifying the -** [SQLITE_OMIT_COMPILEOPTION_DIAGS] option at compile time. -** -** See also: SQL functions [sqlite_compileoption_used()] and -** [sqlite_compileoption_get()] and the [compile_options pragma]. -*/ -#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS -SQLITE_API int SQLITE_STDCALL sqlite3_compileoption_used(const char *zOptName); -SQLITE_API const char *SQLITE_STDCALL sqlite3_compileoption_get(int N); -#endif - -/* -** CAPI3REF: Test To See If The Library Is Threadsafe -** -** ^The sqlite3_threadsafe() function returns zero if and only if -** SQLite was compiled with mutexing code omitted due to the -** [SQLITE_THREADSAFE] compile-time option being set to 0. -** -** SQLite can be compiled with or without mutexes. When -** the [SQLITE_THREADSAFE] C preprocessor macro is 1 or 2, mutexes -** are enabled and SQLite is threadsafe. When the -** [SQLITE_THREADSAFE] macro is 0, -** the mutexes are omitted. Without the mutexes, it is not safe -** to use SQLite concurrently from more than one thread. -** -** Enabling mutexes incurs a measurable performance penalty. -** So if speed is of utmost importance, it makes sense to disable -** the mutexes. But for maximum safety, mutexes should be enabled. -** ^The default behavior is for mutexes to be enabled. -** -** This interface can be used by an application to make sure that the -** version of SQLite that it is linking against was compiled with -** the desired setting of the [SQLITE_THREADSAFE] macro. -** -** This interface only reports on the compile-time mutex setting -** of the [SQLITE_THREADSAFE] flag. If SQLite is compiled with -** SQLITE_THREADSAFE=1 or =2 then mutexes are enabled by default but -** can be fully or partially disabled using a call to [sqlite3_config()] -** with the verbs [SQLITE_CONFIG_SINGLETHREAD], [SQLITE_CONFIG_MULTITHREAD], -** or [SQLITE_CONFIG_SERIALIZED]. ^(The return value of the -** sqlite3_threadsafe() function shows only the compile-time setting of -** thread safety, not any run-time changes to that setting made by -** sqlite3_config(). In other words, the return value from sqlite3_threadsafe() -** is unchanged by calls to sqlite3_config().)^ -** -** See the [threading mode] documentation for additional information. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_threadsafe(void); - -/* -** CAPI3REF: Database Connection Handle -** KEYWORDS: {database connection} {database connections} -** -** Each open SQLite database is represented by a pointer to an instance of -** the opaque structure named "sqlite3". It is useful to think of an sqlite3 -** pointer as an object. The [sqlite3_open()], [sqlite3_open16()], and -** [sqlite3_open_v2()] interfaces are its constructors, and [sqlite3_close()] -** and [sqlite3_close_v2()] are its destructors. There are many other -** interfaces (such as -** [sqlite3_prepare_v2()], [sqlite3_create_function()], and -** [sqlite3_busy_timeout()] to name but three) that are methods on an -** sqlite3 object. -*/ -typedef struct sqlite3 sqlite3; - -/* -** CAPI3REF: 64-Bit Integer Types -** KEYWORDS: sqlite_int64 sqlite_uint64 -** -** Because there is no cross-platform way to specify 64-bit integer types -** SQLite includes typedefs for 64-bit signed and unsigned integers. -** -** The sqlite3_int64 and sqlite3_uint64 are the preferred type definitions. -** The sqlite_int64 and sqlite_uint64 types are supported for backwards -** compatibility only. -** -** ^The sqlite3_int64 and sqlite_int64 types can store integer values -** between -9223372036854775808 and +9223372036854775807 inclusive. ^The -** sqlite3_uint64 and sqlite_uint64 types can store integer values -** between 0 and +18446744073709551615 inclusive. -*/ -#ifdef SQLITE_INT64_TYPE - typedef SQLITE_INT64_TYPE sqlite_int64; - typedef unsigned SQLITE_INT64_TYPE sqlite_uint64; -#elif defined(_MSC_VER) || defined(__BORLANDC__) - typedef __int64 sqlite_int64; - typedef unsigned __int64 sqlite_uint64; -#else - typedef long long int sqlite_int64; - typedef unsigned long long int sqlite_uint64; -#endif -typedef sqlite_int64 sqlite3_int64; -typedef sqlite_uint64 sqlite3_uint64; - -/* -** If compiling for a processor that lacks floating point support, -** substitute integer for floating-point. -*/ -#ifdef SQLITE_OMIT_FLOATING_POINT -# define double sqlite3_int64 -#endif - -/* -** CAPI3REF: Closing A Database Connection -** DESTRUCTOR: sqlite3 -** -** ^The sqlite3_close() and sqlite3_close_v2() routines are destructors -** for the [sqlite3] object. -** ^Calls to sqlite3_close() and sqlite3_close_v2() return [SQLITE_OK] if -** the [sqlite3] object is successfully destroyed and all associated -** resources are deallocated. -** -** ^If the database connection is associated with unfinalized prepared -** statements or unfinished sqlite3_backup objects then sqlite3_close() -** will leave the database connection open and return [SQLITE_BUSY]. -** ^If sqlite3_close_v2() is called with unfinalized prepared statements -** and/or unfinished sqlite3_backups, then the database connection becomes -** an unusable "zombie" which will automatically be deallocated when the -** last prepared statement is finalized or the last sqlite3_backup is -** finished. The sqlite3_close_v2() interface is intended for use with -** host languages that are garbage collected, and where the order in which -** destructors are called is arbitrary. -** -** Applications should [sqlite3_finalize | finalize] all [prepared statements], -** [sqlite3_blob_close | close] all [BLOB handles], and -** [sqlite3_backup_finish | finish] all [sqlite3_backup] objects associated -** with the [sqlite3] object prior to attempting to close the object. ^If -** sqlite3_close_v2() is called on a [database connection] that still has -** outstanding [prepared statements], [BLOB handles], and/or -** [sqlite3_backup] objects then it returns [SQLITE_OK] and the deallocation -** of resources is deferred until all [prepared statements], [BLOB handles], -** and [sqlite3_backup] objects are also destroyed. -** -** ^If an [sqlite3] object is destroyed while a transaction is open, -** the transaction is automatically rolled back. -** -** The C parameter to [sqlite3_close(C)] and [sqlite3_close_v2(C)] -** must be either a NULL -** pointer or an [sqlite3] object pointer obtained -** from [sqlite3_open()], [sqlite3_open16()], or -** [sqlite3_open_v2()], and not previously closed. -** ^Calling sqlite3_close() or sqlite3_close_v2() with a NULL pointer -** argument is a harmless no-op. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_close(sqlite3*); -SQLITE_API int SQLITE_STDCALL sqlite3_close_v2(sqlite3*); - -/* -** The type for a callback function. -** This is legacy and deprecated. It is included for historical -** compatibility and is not documented. -*/ -typedef int (*sqlite3_callback)(void*,int,char**, char**); - -/* -** CAPI3REF: One-Step Query Execution Interface -** METHOD: sqlite3 -** -** The sqlite3_exec() interface is a convenience wrapper around -** [sqlite3_prepare_v2()], [sqlite3_step()], and [sqlite3_finalize()], -** that allows an application to run multiple statements of SQL -** without having to use a lot of C code. -** -** ^The sqlite3_exec() interface runs zero or more UTF-8 encoded, -** semicolon-separate SQL statements passed into its 2nd argument, -** in the context of the [database connection] passed in as its 1st -** argument. ^If the callback function of the 3rd argument to -** sqlite3_exec() is not NULL, then it is invoked for each result row -** coming out of the evaluated SQL statements. ^The 4th argument to -** sqlite3_exec() is relayed through to the 1st argument of each -** callback invocation. ^If the callback pointer to sqlite3_exec() -** is NULL, then no callback is ever invoked and result rows are -** ignored. -** -** ^If an error occurs while evaluating the SQL statements passed into -** sqlite3_exec(), then execution of the current statement stops and -** subsequent statements are skipped. ^If the 5th parameter to sqlite3_exec() -** is not NULL then any error message is written into memory obtained -** from [sqlite3_malloc()] and passed back through the 5th parameter. -** To avoid memory leaks, the application should invoke [sqlite3_free()] -** on error message strings returned through the 5th parameter of -** sqlite3_exec() after the error message string is no longer needed. -** ^If the 5th parameter to sqlite3_exec() is not NULL and no errors -** occur, then sqlite3_exec() sets the pointer in its 5th parameter to -** NULL before returning. -** -** ^If an sqlite3_exec() callback returns non-zero, the sqlite3_exec() -** routine returns SQLITE_ABORT without invoking the callback again and -** without running any subsequent SQL statements. -** -** ^The 2nd argument to the sqlite3_exec() callback function is the -** number of columns in the result. ^The 3rd argument to the sqlite3_exec() -** callback is an array of pointers to strings obtained as if from -** [sqlite3_column_text()], one for each column. ^If an element of a -** result row is NULL then the corresponding string pointer for the -** sqlite3_exec() callback is a NULL pointer. ^The 4th argument to the -** sqlite3_exec() callback is an array of pointers to strings where each -** entry represents the name of corresponding result column as obtained -** from [sqlite3_column_name()]. -** -** ^If the 2nd parameter to sqlite3_exec() is a NULL pointer, a pointer -** to an empty string, or a pointer that contains only whitespace and/or -** SQL comments, then no SQL statements are evaluated and the database -** is not changed. -** -** Restrictions: -** -**
    -**
  • The application must ensure that the 1st parameter to sqlite3_exec() -** is a valid and open [database connection]. -**
  • The application must not close the [database connection] specified by -** the 1st parameter to sqlite3_exec() while sqlite3_exec() is running. -**
  • The application must not modify the SQL statement text passed into -** the 2nd parameter of sqlite3_exec() while sqlite3_exec() is running. -**
-*/ -SQLITE_API int SQLITE_STDCALL sqlite3_exec( - sqlite3*, /* An open database */ - const char *sql, /* SQL to be evaluated */ - int (*callback)(void*,int,char**,char**), /* Callback function */ - void *, /* 1st argument to callback */ - char **errmsg /* Error msg written here */ -); - -/* -** CAPI3REF: Result Codes -** KEYWORDS: {result code definitions} -** -** Many SQLite functions return an integer result code from the set shown -** here in order to indicate success or failure. -** -** New error codes may be added in future versions of SQLite. -** -** See also: [extended result code definitions] -*/ -#define SQLITE_OK 0 /* Successful result */ -/* beginning-of-error-codes */ -#define SQLITE_ERROR 1 /* SQL error or missing database */ -#define SQLITE_INTERNAL 2 /* Internal logic error in SQLite */ -#define SQLITE_PERM 3 /* Access permission denied */ -#define SQLITE_ABORT 4 /* Callback routine requested an abort */ -#define SQLITE_BUSY 5 /* The database file is locked */ -#define SQLITE_LOCKED 6 /* A table in the database is locked */ -#define SQLITE_NOMEM 7 /* A malloc() failed */ -#define SQLITE_READONLY 8 /* Attempt to write a readonly database */ -#define SQLITE_INTERRUPT 9 /* Operation terminated by sqlite3_interrupt()*/ -#define SQLITE_IOERR 10 /* Some kind of disk I/O error occurred */ -#define SQLITE_CORRUPT 11 /* The database disk image is malformed */ -#define SQLITE_NOTFOUND 12 /* Unknown opcode in sqlite3_file_control() */ -#define SQLITE_FULL 13 /* Insertion failed because database is full */ -#define SQLITE_CANTOPEN 14 /* Unable to open the database file */ -#define SQLITE_PROTOCOL 15 /* Database lock protocol error */ -#define SQLITE_EMPTY 16 /* Database is empty */ -#define SQLITE_SCHEMA 17 /* The database schema changed */ -#define SQLITE_TOOBIG 18 /* String or BLOB exceeds size limit */ -#define SQLITE_CONSTRAINT 19 /* Abort due to constraint violation */ -#define SQLITE_MISMATCH 20 /* Data type mismatch */ -#define SQLITE_MISUSE 21 /* Library used incorrectly */ -#define SQLITE_NOLFS 22 /* Uses OS features not supported on host */ -#define SQLITE_AUTH 23 /* Authorization denied */ -#define SQLITE_FORMAT 24 /* Auxiliary database format error */ -#define SQLITE_RANGE 25 /* 2nd parameter to sqlite3_bind out of range */ -#define SQLITE_NOTADB 26 /* File opened that is not a database file */ -#define SQLITE_NOTICE 27 /* Notifications from sqlite3_log() */ -#define SQLITE_WARNING 28 /* Warnings from sqlite3_log() */ -#define SQLITE_ROW 100 /* sqlite3_step() has another row ready */ -#define SQLITE_DONE 101 /* sqlite3_step() has finished executing */ -/* end-of-error-codes */ - -/* -** CAPI3REF: Extended Result Codes -** KEYWORDS: {extended result code definitions} -** -** In its default configuration, SQLite API routines return one of 30 integer -** [result codes]. However, experience has shown that many of -** these result codes are too coarse-grained. They do not provide as -** much information about problems as programmers might like. In an effort to -** address this, newer versions of SQLite (version 3.3.8 and later) include -** support for additional result codes that provide more detailed information -** about errors. These [extended result codes] are enabled or disabled -** on a per database connection basis using the -** [sqlite3_extended_result_codes()] API. Or, the extended code for -** the most recent error can be obtained using -** [sqlite3_extended_errcode()]. -*/ -#define SQLITE_IOERR_READ (SQLITE_IOERR | (1<<8)) -#define SQLITE_IOERR_SHORT_READ (SQLITE_IOERR | (2<<8)) -#define SQLITE_IOERR_WRITE (SQLITE_IOERR | (3<<8)) -#define SQLITE_IOERR_FSYNC (SQLITE_IOERR | (4<<8)) -#define SQLITE_IOERR_DIR_FSYNC (SQLITE_IOERR | (5<<8)) -#define SQLITE_IOERR_TRUNCATE (SQLITE_IOERR | (6<<8)) -#define SQLITE_IOERR_FSTAT (SQLITE_IOERR | (7<<8)) -#define SQLITE_IOERR_UNLOCK (SQLITE_IOERR | (8<<8)) -#define SQLITE_IOERR_RDLOCK (SQLITE_IOERR | (9<<8)) -#define SQLITE_IOERR_DELETE (SQLITE_IOERR | (10<<8)) -#define SQLITE_IOERR_BLOCKED (SQLITE_IOERR | (11<<8)) -#define SQLITE_IOERR_NOMEM (SQLITE_IOERR | (12<<8)) -#define SQLITE_IOERR_ACCESS (SQLITE_IOERR | (13<<8)) -#define SQLITE_IOERR_CHECKRESERVEDLOCK (SQLITE_IOERR | (14<<8)) -#define SQLITE_IOERR_LOCK (SQLITE_IOERR | (15<<8)) -#define SQLITE_IOERR_CLOSE (SQLITE_IOERR | (16<<8)) -#define SQLITE_IOERR_DIR_CLOSE (SQLITE_IOERR | (17<<8)) -#define SQLITE_IOERR_SHMOPEN (SQLITE_IOERR | (18<<8)) -#define SQLITE_IOERR_SHMSIZE (SQLITE_IOERR | (19<<8)) -#define SQLITE_IOERR_SHMLOCK (SQLITE_IOERR | (20<<8)) -#define SQLITE_IOERR_SHMMAP (SQLITE_IOERR | (21<<8)) -#define SQLITE_IOERR_SEEK (SQLITE_IOERR | (22<<8)) -#define SQLITE_IOERR_DELETE_NOENT (SQLITE_IOERR | (23<<8)) -#define SQLITE_IOERR_MMAP (SQLITE_IOERR | (24<<8)) -#define SQLITE_IOERR_GETTEMPPATH (SQLITE_IOERR | (25<<8)) -#define SQLITE_IOERR_CONVPATH (SQLITE_IOERR | (26<<8)) -#define SQLITE_IOERR_VNODE (SQLITE_IOERR | (27<<8)) -#define SQLITE_IOERR_AUTH (SQLITE_IOERR | (28<<8)) -#define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8)) -#define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8)) -#define SQLITE_BUSY_SNAPSHOT (SQLITE_BUSY | (2<<8)) -#define SQLITE_CANTOPEN_NOTEMPDIR (SQLITE_CANTOPEN | (1<<8)) -#define SQLITE_CANTOPEN_ISDIR (SQLITE_CANTOPEN | (2<<8)) -#define SQLITE_CANTOPEN_FULLPATH (SQLITE_CANTOPEN | (3<<8)) -#define SQLITE_CANTOPEN_CONVPATH (SQLITE_CANTOPEN | (4<<8)) -#define SQLITE_CORRUPT_VTAB (SQLITE_CORRUPT | (1<<8)) -#define SQLITE_READONLY_RECOVERY (SQLITE_READONLY | (1<<8)) -#define SQLITE_READONLY_CANTLOCK (SQLITE_READONLY | (2<<8)) -#define SQLITE_READONLY_ROLLBACK (SQLITE_READONLY | (3<<8)) -#define SQLITE_READONLY_DBMOVED (SQLITE_READONLY | (4<<8)) -#define SQLITE_ABORT_ROLLBACK (SQLITE_ABORT | (2<<8)) -#define SQLITE_CONSTRAINT_CHECK (SQLITE_CONSTRAINT | (1<<8)) -#define SQLITE_CONSTRAINT_COMMITHOOK (SQLITE_CONSTRAINT | (2<<8)) -#define SQLITE_CONSTRAINT_FOREIGNKEY (SQLITE_CONSTRAINT | (3<<8)) -#define SQLITE_CONSTRAINT_FUNCTION (SQLITE_CONSTRAINT | (4<<8)) -#define SQLITE_CONSTRAINT_NOTNULL (SQLITE_CONSTRAINT | (5<<8)) -#define SQLITE_CONSTRAINT_PRIMARYKEY (SQLITE_CONSTRAINT | (6<<8)) -#define SQLITE_CONSTRAINT_TRIGGER (SQLITE_CONSTRAINT | (7<<8)) -#define SQLITE_CONSTRAINT_UNIQUE (SQLITE_CONSTRAINT | (8<<8)) -#define SQLITE_CONSTRAINT_VTAB (SQLITE_CONSTRAINT | (9<<8)) -#define SQLITE_CONSTRAINT_ROWID (SQLITE_CONSTRAINT |(10<<8)) -#define SQLITE_NOTICE_RECOVER_WAL (SQLITE_NOTICE | (1<<8)) -#define SQLITE_NOTICE_RECOVER_ROLLBACK (SQLITE_NOTICE | (2<<8)) -#define SQLITE_WARNING_AUTOINDEX (SQLITE_WARNING | (1<<8)) -#define SQLITE_AUTH_USER (SQLITE_AUTH | (1<<8)) - -/* -** CAPI3REF: Flags For File Open Operations -** -** These bit values are intended for use in the -** 3rd parameter to the [sqlite3_open_v2()] interface and -** in the 4th parameter to the [sqlite3_vfs.xOpen] method. -*/ -#define SQLITE_OPEN_READONLY 0x00000001 /* Ok for sqlite3_open_v2() */ -#define SQLITE_OPEN_READWRITE 0x00000002 /* Ok for sqlite3_open_v2() */ -#define SQLITE_OPEN_CREATE 0x00000004 /* Ok for sqlite3_open_v2() */ -#define SQLITE_OPEN_DELETEONCLOSE 0x00000008 /* VFS only */ -#define SQLITE_OPEN_EXCLUSIVE 0x00000010 /* VFS only */ -#define SQLITE_OPEN_AUTOPROXY 0x00000020 /* VFS only */ -#define SQLITE_OPEN_URI 0x00000040 /* Ok for sqlite3_open_v2() */ -#define SQLITE_OPEN_MEMORY 0x00000080 /* Ok for sqlite3_open_v2() */ -#define SQLITE_OPEN_MAIN_DB 0x00000100 /* VFS only */ -#define SQLITE_OPEN_TEMP_DB 0x00000200 /* VFS only */ -#define SQLITE_OPEN_TRANSIENT_DB 0x00000400 /* VFS only */ -#define SQLITE_OPEN_MAIN_JOURNAL 0x00000800 /* VFS only */ -#define SQLITE_OPEN_TEMP_JOURNAL 0x00001000 /* VFS only */ -#define SQLITE_OPEN_SUBJOURNAL 0x00002000 /* VFS only */ -#define SQLITE_OPEN_MASTER_JOURNAL 0x00004000 /* VFS only */ -#define SQLITE_OPEN_NOMUTEX 0x00008000 /* Ok for sqlite3_open_v2() */ -#define SQLITE_OPEN_FULLMUTEX 0x00010000 /* Ok for sqlite3_open_v2() */ -#define SQLITE_OPEN_SHAREDCACHE 0x00020000 /* Ok for sqlite3_open_v2() */ -#define SQLITE_OPEN_PRIVATECACHE 0x00040000 /* Ok for sqlite3_open_v2() */ -#define SQLITE_OPEN_WAL 0x00080000 /* VFS only */ - -/* Reserved: 0x00F00000 */ - -/* -** CAPI3REF: Device Characteristics -** -** The xDeviceCharacteristics method of the [sqlite3_io_methods] -** object returns an integer which is a vector of these -** bit values expressing I/O characteristics of the mass storage -** device that holds the file that the [sqlite3_io_methods] -** refers to. -** -** The SQLITE_IOCAP_ATOMIC property means that all writes of -** any size are atomic. The SQLITE_IOCAP_ATOMICnnn values -** mean that writes of blocks that are nnn bytes in size and -** are aligned to an address which is an integer multiple of -** nnn are atomic. The SQLITE_IOCAP_SAFE_APPEND value means -** that when data is appended to a file, the data is appended -** first then the size of the file is extended, never the other -** way around. The SQLITE_IOCAP_SEQUENTIAL property means that -** information is written to disk in the same order as calls -** to xWrite(). The SQLITE_IOCAP_POWERSAFE_OVERWRITE property means that -** after reboot following a crash or power loss, the only bytes in a -** file that were written at the application level might have changed -** and that adjacent bytes, even bytes within the same sector are -** guaranteed to be unchanged. The SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN -** flag indicate that a file cannot be deleted when open. The -** SQLITE_IOCAP_IMMUTABLE flag indicates that the file is on -** read-only media and cannot be changed even by processes with -** elevated privileges. -*/ -#define SQLITE_IOCAP_ATOMIC 0x00000001 -#define SQLITE_IOCAP_ATOMIC512 0x00000002 -#define SQLITE_IOCAP_ATOMIC1K 0x00000004 -#define SQLITE_IOCAP_ATOMIC2K 0x00000008 -#define SQLITE_IOCAP_ATOMIC4K 0x00000010 -#define SQLITE_IOCAP_ATOMIC8K 0x00000020 -#define SQLITE_IOCAP_ATOMIC16K 0x00000040 -#define SQLITE_IOCAP_ATOMIC32K 0x00000080 -#define SQLITE_IOCAP_ATOMIC64K 0x00000100 -#define SQLITE_IOCAP_SAFE_APPEND 0x00000200 -#define SQLITE_IOCAP_SEQUENTIAL 0x00000400 -#define SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN 0x00000800 -#define SQLITE_IOCAP_POWERSAFE_OVERWRITE 0x00001000 -#define SQLITE_IOCAP_IMMUTABLE 0x00002000 - -/* -** CAPI3REF: File Locking Levels -** -** SQLite uses one of these integer values as the second -** argument to calls it makes to the xLock() and xUnlock() methods -** of an [sqlite3_io_methods] object. -*/ -#define SQLITE_LOCK_NONE 0 -#define SQLITE_LOCK_SHARED 1 -#define SQLITE_LOCK_RESERVED 2 -#define SQLITE_LOCK_PENDING 3 -#define SQLITE_LOCK_EXCLUSIVE 4 - -/* -** CAPI3REF: Synchronization Type Flags -** -** When SQLite invokes the xSync() method of an -** [sqlite3_io_methods] object it uses a combination of -** these integer values as the second argument. -** -** When the SQLITE_SYNC_DATAONLY flag is used, it means that the -** sync operation only needs to flush data to mass storage. Inode -** information need not be flushed. If the lower four bits of the flag -** equal SQLITE_SYNC_NORMAL, that means to use normal fsync() semantics. -** If the lower four bits equal SQLITE_SYNC_FULL, that means -** to use Mac OS X style fullsync instead of fsync(). -** -** Do not confuse the SQLITE_SYNC_NORMAL and SQLITE_SYNC_FULL flags -** with the [PRAGMA synchronous]=NORMAL and [PRAGMA synchronous]=FULL -** settings. The [synchronous pragma] determines when calls to the -** xSync VFS method occur and applies uniformly across all platforms. -** The SQLITE_SYNC_NORMAL and SQLITE_SYNC_FULL flags determine how -** energetic or rigorous or forceful the sync operations are and -** only make a difference on Mac OSX for the default SQLite code. -** (Third-party VFS implementations might also make the distinction -** between SQLITE_SYNC_NORMAL and SQLITE_SYNC_FULL, but among the -** operating systems natively supported by SQLite, only Mac OSX -** cares about the difference.) -*/ -#define SQLITE_SYNC_NORMAL 0x00002 -#define SQLITE_SYNC_FULL 0x00003 -#define SQLITE_SYNC_DATAONLY 0x00010 - -/* -** CAPI3REF: OS Interface Open File Handle -** -** An [sqlite3_file] object represents an open file in the -** [sqlite3_vfs | OS interface layer]. Individual OS interface -** implementations will -** want to subclass this object by appending additional fields -** for their own use. The pMethods entry is a pointer to an -** [sqlite3_io_methods] object that defines methods for performing -** I/O operations on the open file. -*/ -typedef struct sqlite3_file sqlite3_file; -struct sqlite3_file { - const struct sqlite3_io_methods *pMethods; /* Methods for an open file */ -}; - -/* -** CAPI3REF: OS Interface File Virtual Methods Object -** -** Every file opened by the [sqlite3_vfs.xOpen] method populates an -** [sqlite3_file] object (or, more commonly, a subclass of the -** [sqlite3_file] object) with a pointer to an instance of this object. -** This object defines the methods used to perform various operations -** against the open file represented by the [sqlite3_file] object. -** -** If the [sqlite3_vfs.xOpen] method sets the sqlite3_file.pMethods element -** to a non-NULL pointer, then the sqlite3_io_methods.xClose method -** may be invoked even if the [sqlite3_vfs.xOpen] reported that it failed. The -** only way to prevent a call to xClose following a failed [sqlite3_vfs.xOpen] -** is for the [sqlite3_vfs.xOpen] to set the sqlite3_file.pMethods element -** to NULL. -** -** The flags argument to xSync may be one of [SQLITE_SYNC_NORMAL] or -** [SQLITE_SYNC_FULL]. The first choice is the normal fsync(). -** The second choice is a Mac OS X style fullsync. The [SQLITE_SYNC_DATAONLY] -** flag may be ORed in to indicate that only the data of the file -** and not its inode needs to be synced. -** -** The integer values to xLock() and xUnlock() are one of -**
    -**
  • [SQLITE_LOCK_NONE], -**
  • [SQLITE_LOCK_SHARED], -**
  • [SQLITE_LOCK_RESERVED], -**
  • [SQLITE_LOCK_PENDING], or -**
  • [SQLITE_LOCK_EXCLUSIVE]. -**
-** xLock() increases the lock. xUnlock() decreases the lock. -** The xCheckReservedLock() method checks whether any database connection, -** either in this process or in some other process, is holding a RESERVED, -** PENDING, or EXCLUSIVE lock on the file. It returns true -** if such a lock exists and false otherwise. -** -** The xFileControl() method is a generic interface that allows custom -** VFS implementations to directly control an open file using the -** [sqlite3_file_control()] interface. The second "op" argument is an -** integer opcode. The third argument is a generic pointer intended to -** point to a structure that may contain arguments or space in which to -** write return values. Potential uses for xFileControl() might be -** functions to enable blocking locks with timeouts, to change the -** locking strategy (for example to use dot-file locks), to inquire -** about the status of a lock, or to break stale locks. The SQLite -** core reserves all opcodes less than 100 for its own use. -** A [file control opcodes | list of opcodes] less than 100 is available. -** Applications that define a custom xFileControl method should use opcodes -** greater than 100 to avoid conflicts. VFS implementations should -** return [SQLITE_NOTFOUND] for file control opcodes that they do not -** recognize. -** -** The xSectorSize() method returns the sector size of the -** device that underlies the file. The sector size is the -** minimum write that can be performed without disturbing -** other bytes in the file. The xDeviceCharacteristics() -** method returns a bit vector describing behaviors of the -** underlying device: -** -**
    -**
  • [SQLITE_IOCAP_ATOMIC] -**
  • [SQLITE_IOCAP_ATOMIC512] -**
  • [SQLITE_IOCAP_ATOMIC1K] -**
  • [SQLITE_IOCAP_ATOMIC2K] -**
  • [SQLITE_IOCAP_ATOMIC4K] -**
  • [SQLITE_IOCAP_ATOMIC8K] -**
  • [SQLITE_IOCAP_ATOMIC16K] -**
  • [SQLITE_IOCAP_ATOMIC32K] -**
  • [SQLITE_IOCAP_ATOMIC64K] -**
  • [SQLITE_IOCAP_SAFE_APPEND] -**
  • [SQLITE_IOCAP_SEQUENTIAL] -**
-** -** The SQLITE_IOCAP_ATOMIC property means that all writes of -** any size are atomic. The SQLITE_IOCAP_ATOMICnnn values -** mean that writes of blocks that are nnn bytes in size and -** are aligned to an address which is an integer multiple of -** nnn are atomic. The SQLITE_IOCAP_SAFE_APPEND value means -** that when data is appended to a file, the data is appended -** first then the size of the file is extended, never the other -** way around. The SQLITE_IOCAP_SEQUENTIAL property means that -** information is written to disk in the same order as calls -** to xWrite(). -** -** If xRead() returns SQLITE_IOERR_SHORT_READ it must also fill -** in the unread portions of the buffer with zeros. A VFS that -** fails to zero-fill short reads might seem to work. However, -** failure to zero-fill short reads will eventually lead to -** database corruption. -*/ -typedef struct sqlite3_io_methods sqlite3_io_methods; -struct sqlite3_io_methods { - int iVersion; - int (*xClose)(sqlite3_file*); - int (*xRead)(sqlite3_file*, void*, int iAmt, sqlite3_int64 iOfst); - int (*xWrite)(sqlite3_file*, const void*, int iAmt, sqlite3_int64 iOfst); - int (*xTruncate)(sqlite3_file*, sqlite3_int64 size); - int (*xSync)(sqlite3_file*, int flags); - int (*xFileSize)(sqlite3_file*, sqlite3_int64 *pSize); - int (*xLock)(sqlite3_file*, int); - int (*xUnlock)(sqlite3_file*, int); - int (*xCheckReservedLock)(sqlite3_file*, int *pResOut); - int (*xFileControl)(sqlite3_file*, int op, void *pArg); - int (*xSectorSize)(sqlite3_file*); - int (*xDeviceCharacteristics)(sqlite3_file*); - /* Methods above are valid for version 1 */ - int (*xShmMap)(sqlite3_file*, int iPg, int pgsz, int, void volatile**); - int (*xShmLock)(sqlite3_file*, int offset, int n, int flags); - void (*xShmBarrier)(sqlite3_file*); - int (*xShmUnmap)(sqlite3_file*, int deleteFlag); - /* Methods above are valid for version 2 */ - int (*xFetch)(sqlite3_file*, sqlite3_int64 iOfst, int iAmt, void **pp); - int (*xUnfetch)(sqlite3_file*, sqlite3_int64 iOfst, void *p); - /* Methods above are valid for version 3 */ - /* Additional methods may be added in future releases */ -}; - -/* -** CAPI3REF: Standard File Control Opcodes -** KEYWORDS: {file control opcodes} {file control opcode} -** -** These integer constants are opcodes for the xFileControl method -** of the [sqlite3_io_methods] object and for the [sqlite3_file_control()] -** interface. -** -**
    -**
  • [[SQLITE_FCNTL_LOCKSTATE]] -** The [SQLITE_FCNTL_LOCKSTATE] opcode is used for debugging. This -** opcode causes the xFileControl method to write the current state of -** the lock (one of [SQLITE_LOCK_NONE], [SQLITE_LOCK_SHARED], -** [SQLITE_LOCK_RESERVED], [SQLITE_LOCK_PENDING], or [SQLITE_LOCK_EXCLUSIVE]) -** into an integer that the pArg argument points to. This capability -** is used during testing and is only available when the SQLITE_TEST -** compile-time option is used. -** -**
  • [[SQLITE_FCNTL_SIZE_HINT]] -** The [SQLITE_FCNTL_SIZE_HINT] opcode is used by SQLite to give the VFS -** layer a hint of how large the database file will grow to be during the -** current transaction. This hint is not guaranteed to be accurate but it -** is often close. The underlying VFS might choose to preallocate database -** file space based on this hint in order to help writes to the database -** file run faster. -** -**
  • [[SQLITE_FCNTL_CHUNK_SIZE]] -** The [SQLITE_FCNTL_CHUNK_SIZE] opcode is used to request that the VFS -** extends and truncates the database file in chunks of a size specified -** by the user. The fourth argument to [sqlite3_file_control()] should -** point to an integer (type int) containing the new chunk-size to use -** for the nominated database. Allocating database file space in large -** chunks (say 1MB at a time), may reduce file-system fragmentation and -** improve performance on some systems. -** -**
  • [[SQLITE_FCNTL_FILE_POINTER]] -** The [SQLITE_FCNTL_FILE_POINTER] opcode is used to obtain a pointer -** to the [sqlite3_file] object associated with a particular database -** connection. See also [SQLITE_FCNTL_JOURNAL_POINTER]. -** -**
  • [[SQLITE_FCNTL_JOURNAL_POINTER]] -** The [SQLITE_FCNTL_JOURNAL_POINTER] opcode is used to obtain a pointer -** to the [sqlite3_file] object associated with the journal file (either -** the [rollback journal] or the [write-ahead log]) for a particular database -** connection. See also [SQLITE_FCNTL_FILE_POINTER]. -** -**
  • [[SQLITE_FCNTL_SYNC_OMITTED]] -** No longer in use. -** -**
  • [[SQLITE_FCNTL_SYNC]] -** The [SQLITE_FCNTL_SYNC] opcode is generated internally by SQLite and -** sent to the VFS immediately before the xSync method is invoked on a -** database file descriptor. Or, if the xSync method is not invoked -** because the user has configured SQLite with -** [PRAGMA synchronous | PRAGMA synchronous=OFF] it is invoked in place -** of the xSync method. In most cases, the pointer argument passed with -** this file-control is NULL. However, if the database file is being synced -** as part of a multi-database commit, the argument points to a nul-terminated -** string containing the transactions master-journal file name. VFSes that -** do not need this signal should silently ignore this opcode. Applications -** should not call [sqlite3_file_control()] with this opcode as doing so may -** disrupt the operation of the specialized VFSes that do require it. -** -**
  • [[SQLITE_FCNTL_COMMIT_PHASETWO]] -** The [SQLITE_FCNTL_COMMIT_PHASETWO] opcode is generated internally by SQLite -** and sent to the VFS after a transaction has been committed immediately -** but before the database is unlocked. VFSes that do not need this signal -** should silently ignore this opcode. Applications should not call -** [sqlite3_file_control()] with this opcode as doing so may disrupt the -** operation of the specialized VFSes that do require it. -** -**
  • [[SQLITE_FCNTL_WIN32_AV_RETRY]] -** ^The [SQLITE_FCNTL_WIN32_AV_RETRY] opcode is used to configure automatic -** retry counts and intervals for certain disk I/O operations for the -** windows [VFS] in order to provide robustness in the presence of -** anti-virus programs. By default, the windows VFS will retry file read, -** file write, and file delete operations up to 10 times, with a delay -** of 25 milliseconds before the first retry and with the delay increasing -** by an additional 25 milliseconds with each subsequent retry. This -** opcode allows these two values (10 retries and 25 milliseconds of delay) -** to be adjusted. The values are changed for all database connections -** within the same process. The argument is a pointer to an array of two -** integers where the first integer i the new retry count and the second -** integer is the delay. If either integer is negative, then the setting -** is not changed but instead the prior value of that setting is written -** into the array entry, allowing the current retry settings to be -** interrogated. The zDbName parameter is ignored. -** -**
  • [[SQLITE_FCNTL_PERSIST_WAL]] -** ^The [SQLITE_FCNTL_PERSIST_WAL] opcode is used to set or query the -** persistent [WAL | Write Ahead Log] setting. By default, the auxiliary -** write ahead log and shared memory files used for transaction control -** are automatically deleted when the latest connection to the database -** closes. Setting persistent WAL mode causes those files to persist after -** close. Persisting the files is useful when other processes that do not -** have write permission on the directory containing the database file want -** to read the database file, as the WAL and shared memory files must exist -** in order for the database to be readable. The fourth parameter to -** [sqlite3_file_control()] for this opcode should be a pointer to an integer. -** That integer is 0 to disable persistent WAL mode or 1 to enable persistent -** WAL mode. If the integer is -1, then it is overwritten with the current -** WAL persistence setting. -** -**
  • [[SQLITE_FCNTL_POWERSAFE_OVERWRITE]] -** ^The [SQLITE_FCNTL_POWERSAFE_OVERWRITE] opcode is used to set or query the -** persistent "powersafe-overwrite" or "PSOW" setting. The PSOW setting -** determines the [SQLITE_IOCAP_POWERSAFE_OVERWRITE] bit of the -** xDeviceCharacteristics methods. The fourth parameter to -** [sqlite3_file_control()] for this opcode should be a pointer to an integer. -** That integer is 0 to disable zero-damage mode or 1 to enable zero-damage -** mode. If the integer is -1, then it is overwritten with the current -** zero-damage mode setting. -** -**
  • [[SQLITE_FCNTL_OVERWRITE]] -** ^The [SQLITE_FCNTL_OVERWRITE] opcode is invoked by SQLite after opening -** a write transaction to indicate that, unless it is rolled back for some -** reason, the entire database file will be overwritten by the current -** transaction. This is used by VACUUM operations. -** -**
  • [[SQLITE_FCNTL_VFSNAME]] -** ^The [SQLITE_FCNTL_VFSNAME] opcode can be used to obtain the names of -** all [VFSes] in the VFS stack. The names are of all VFS shims and the -** final bottom-level VFS are written into memory obtained from -** [sqlite3_malloc()] and the result is stored in the char* variable -** that the fourth parameter of [sqlite3_file_control()] points to. -** The caller is responsible for freeing the memory when done. As with -** all file-control actions, there is no guarantee that this will actually -** do anything. Callers should initialize the char* variable to a NULL -** pointer in case this file-control is not implemented. This file-control -** is intended for diagnostic use only. -** -**
  • [[SQLITE_FCNTL_VFS_POINTER]] -** ^The [SQLITE_FCNTL_VFS_POINTER] opcode finds a pointer to the top-level -** [VFSes] currently in use. ^(The argument X in -** sqlite3_file_control(db,SQLITE_FCNTL_VFS_POINTER,X) must be -** of type "[sqlite3_vfs] **". This opcodes will set *X -** to a pointer to the top-level VFS.)^ -** ^When there are multiple VFS shims in the stack, this opcode finds the -** upper-most shim only. -** -**
  • [[SQLITE_FCNTL_PRAGMA]] -** ^Whenever a [PRAGMA] statement is parsed, an [SQLITE_FCNTL_PRAGMA] -** file control is sent to the open [sqlite3_file] object corresponding -** to the database file to which the pragma statement refers. ^The argument -** to the [SQLITE_FCNTL_PRAGMA] file control is an array of -** pointers to strings (char**) in which the second element of the array -** is the name of the pragma and the third element is the argument to the -** pragma or NULL if the pragma has no argument. ^The handler for an -** [SQLITE_FCNTL_PRAGMA] file control can optionally make the first element -** of the char** argument point to a string obtained from [sqlite3_mprintf()] -** or the equivalent and that string will become the result of the pragma or -** the error message if the pragma fails. ^If the -** [SQLITE_FCNTL_PRAGMA] file control returns [SQLITE_NOTFOUND], then normal -** [PRAGMA] processing continues. ^If the [SQLITE_FCNTL_PRAGMA] -** file control returns [SQLITE_OK], then the parser assumes that the -** VFS has handled the PRAGMA itself and the parser generates a no-op -** prepared statement if result string is NULL, or that returns a copy -** of the result string if the string is non-NULL. -** ^If the [SQLITE_FCNTL_PRAGMA] file control returns -** any result code other than [SQLITE_OK] or [SQLITE_NOTFOUND], that means -** that the VFS encountered an error while handling the [PRAGMA] and the -** compilation of the PRAGMA fails with an error. ^The [SQLITE_FCNTL_PRAGMA] -** file control occurs at the beginning of pragma statement analysis and so -** it is able to override built-in [PRAGMA] statements. -** -**
  • [[SQLITE_FCNTL_BUSYHANDLER]] -** ^The [SQLITE_FCNTL_BUSYHANDLER] -** file-control may be invoked by SQLite on the database file handle -** shortly after it is opened in order to provide a custom VFS with access -** to the connections busy-handler callback. The argument is of type (void **) -** - an array of two (void *) values. The first (void *) actually points -** to a function of type (int (*)(void *)). In order to invoke the connections -** busy-handler, this function should be invoked with the second (void *) in -** the array as the only argument. If it returns non-zero, then the operation -** should be retried. If it returns zero, the custom VFS should abandon the -** current operation. -** -**
  • [[SQLITE_FCNTL_TEMPFILENAME]] -** ^Application can invoke the [SQLITE_FCNTL_TEMPFILENAME] file-control -** to have SQLite generate a -** temporary filename using the same algorithm that is followed to generate -** temporary filenames for TEMP tables and other internal uses. The -** argument should be a char** which will be filled with the filename -** written into memory obtained from [sqlite3_malloc()]. The caller should -** invoke [sqlite3_free()] on the result to avoid a memory leak. -** -**
  • [[SQLITE_FCNTL_MMAP_SIZE]] -** The [SQLITE_FCNTL_MMAP_SIZE] file control is used to query or set the -** maximum number of bytes that will be used for memory-mapped I/O. -** The argument is a pointer to a value of type sqlite3_int64 that -** is an advisory maximum number of bytes in the file to memory map. The -** pointer is overwritten with the old value. The limit is not changed if -** the value originally pointed to is negative, and so the current limit -** can be queried by passing in a pointer to a negative number. This -** file-control is used internally to implement [PRAGMA mmap_size]. -** -**
  • [[SQLITE_FCNTL_TRACE]] -** The [SQLITE_FCNTL_TRACE] file control provides advisory information -** to the VFS about what the higher layers of the SQLite stack are doing. -** This file control is used by some VFS activity tracing [shims]. -** The argument is a zero-terminated string. Higher layers in the -** SQLite stack may generate instances of this file control if -** the [SQLITE_USE_FCNTL_TRACE] compile-time option is enabled. -** -**
  • [[SQLITE_FCNTL_HAS_MOVED]] -** The [SQLITE_FCNTL_HAS_MOVED] file control interprets its argument as a -** pointer to an integer and it writes a boolean into that integer depending -** on whether or not the file has been renamed, moved, or deleted since it -** was first opened. -** -**
  • [[SQLITE_FCNTL_WIN32_SET_HANDLE]] -** The [SQLITE_FCNTL_WIN32_SET_HANDLE] opcode is used for debugging. This -** opcode causes the xFileControl method to swap the file handle with the one -** pointed to by the pArg argument. This capability is used during testing -** and only needs to be supported when SQLITE_TEST is defined. -** -**
  • [[SQLITE_FCNTL_WAL_BLOCK]] -** The [SQLITE_FCNTL_WAL_BLOCK] is a signal to the VFS layer that it might -** be advantageous to block on the next WAL lock if the lock is not immediately -** available. The WAL subsystem issues this signal during rare -** circumstances in order to fix a problem with priority inversion. -** Applications should not use this file-control. -** -**
  • [[SQLITE_FCNTL_ZIPVFS]] -** The [SQLITE_FCNTL_ZIPVFS] opcode is implemented by zipvfs only. All other -** VFS should return SQLITE_NOTFOUND for this opcode. -** -**
  • [[SQLITE_FCNTL_RBU]] -** The [SQLITE_FCNTL_RBU] opcode is implemented by the special VFS used by -** the RBU extension only. All other VFS should return SQLITE_NOTFOUND for -** this opcode. -**
-*/ -#define SQLITE_FCNTL_LOCKSTATE 1 -#define SQLITE_FCNTL_GET_LOCKPROXYFILE 2 -#define SQLITE_FCNTL_SET_LOCKPROXYFILE 3 -#define SQLITE_FCNTL_LAST_ERRNO 4 -#define SQLITE_FCNTL_SIZE_HINT 5 -#define SQLITE_FCNTL_CHUNK_SIZE 6 -#define SQLITE_FCNTL_FILE_POINTER 7 -#define SQLITE_FCNTL_SYNC_OMITTED 8 -#define SQLITE_FCNTL_WIN32_AV_RETRY 9 -#define SQLITE_FCNTL_PERSIST_WAL 10 -#define SQLITE_FCNTL_OVERWRITE 11 -#define SQLITE_FCNTL_VFSNAME 12 -#define SQLITE_FCNTL_POWERSAFE_OVERWRITE 13 -#define SQLITE_FCNTL_PRAGMA 14 -#define SQLITE_FCNTL_BUSYHANDLER 15 -#define SQLITE_FCNTL_TEMPFILENAME 16 -#define SQLITE_FCNTL_MMAP_SIZE 18 -#define SQLITE_FCNTL_TRACE 19 -#define SQLITE_FCNTL_HAS_MOVED 20 -#define SQLITE_FCNTL_SYNC 21 -#define SQLITE_FCNTL_COMMIT_PHASETWO 22 -#define SQLITE_FCNTL_WIN32_SET_HANDLE 23 -#define SQLITE_FCNTL_WAL_BLOCK 24 -#define SQLITE_FCNTL_ZIPVFS 25 -#define SQLITE_FCNTL_RBU 26 -#define SQLITE_FCNTL_VFS_POINTER 27 -#define SQLITE_FCNTL_JOURNAL_POINTER 28 - -/* deprecated names */ -#define SQLITE_GET_LOCKPROXYFILE SQLITE_FCNTL_GET_LOCKPROXYFILE -#define SQLITE_SET_LOCKPROXYFILE SQLITE_FCNTL_SET_LOCKPROXYFILE -#define SQLITE_LAST_ERRNO SQLITE_FCNTL_LAST_ERRNO - - -/* -** CAPI3REF: Mutex Handle -** -** The mutex module within SQLite defines [sqlite3_mutex] to be an -** abstract type for a mutex object. The SQLite core never looks -** at the internal representation of an [sqlite3_mutex]. It only -** deals with pointers to the [sqlite3_mutex] object. -** -** Mutexes are created using [sqlite3_mutex_alloc()]. -*/ -typedef struct sqlite3_mutex sqlite3_mutex; - -/* -** CAPI3REF: OS Interface Object -** -** An instance of the sqlite3_vfs object defines the interface between -** the SQLite core and the underlying operating system. The "vfs" -** in the name of the object stands for "virtual file system". See -** the [VFS | VFS documentation] for further information. -** -** The value of the iVersion field is initially 1 but may be larger in -** future versions of SQLite. Additional fields may be appended to this -** object when the iVersion value is increased. Note that the structure -** of the sqlite3_vfs object changes in the transaction between -** SQLite version 3.5.9 and 3.6.0 and yet the iVersion field was not -** modified. -** -** The szOsFile field is the size of the subclassed [sqlite3_file] -** structure used by this VFS. mxPathname is the maximum length of -** a pathname in this VFS. -** -** Registered sqlite3_vfs objects are kept on a linked list formed by -** the pNext pointer. The [sqlite3_vfs_register()] -** and [sqlite3_vfs_unregister()] interfaces manage this list -** in a thread-safe way. The [sqlite3_vfs_find()] interface -** searches the list. Neither the application code nor the VFS -** implementation should use the pNext pointer. -** -** The pNext field is the only field in the sqlite3_vfs -** structure that SQLite will ever modify. SQLite will only access -** or modify this field while holding a particular static mutex. -** The application should never modify anything within the sqlite3_vfs -** object once the object has been registered. -** -** The zName field holds the name of the VFS module. The name must -** be unique across all VFS modules. -** -** [[sqlite3_vfs.xOpen]] -** ^SQLite guarantees that the zFilename parameter to xOpen -** is either a NULL pointer or string obtained -** from xFullPathname() with an optional suffix added. -** ^If a suffix is added to the zFilename parameter, it will -** consist of a single "-" character followed by no more than -** 11 alphanumeric and/or "-" characters. -** ^SQLite further guarantees that -** the string will be valid and unchanged until xClose() is -** called. Because of the previous sentence, -** the [sqlite3_file] can safely store a pointer to the -** filename if it needs to remember the filename for some reason. -** If the zFilename parameter to xOpen is a NULL pointer then xOpen -** must invent its own temporary name for the file. ^Whenever the -** xFilename parameter is NULL it will also be the case that the -** flags parameter will include [SQLITE_OPEN_DELETEONCLOSE]. -** -** The flags argument to xOpen() includes all bits set in -** the flags argument to [sqlite3_open_v2()]. Or if [sqlite3_open()] -** or [sqlite3_open16()] is used, then flags includes at least -** [SQLITE_OPEN_READWRITE] | [SQLITE_OPEN_CREATE]. -** If xOpen() opens a file read-only then it sets *pOutFlags to -** include [SQLITE_OPEN_READONLY]. Other bits in *pOutFlags may be set. -** -** ^(SQLite will also add one of the following flags to the xOpen() -** call, depending on the object being opened: -** -**
    -**
  • [SQLITE_OPEN_MAIN_DB] -**
  • [SQLITE_OPEN_MAIN_JOURNAL] -**
  • [SQLITE_OPEN_TEMP_DB] -**
  • [SQLITE_OPEN_TEMP_JOURNAL] -**
  • [SQLITE_OPEN_TRANSIENT_DB] -**
  • [SQLITE_OPEN_SUBJOURNAL] -**
  • [SQLITE_OPEN_MASTER_JOURNAL] -**
  • [SQLITE_OPEN_WAL] -**
)^ -** -** The file I/O implementation can use the object type flags to -** change the way it deals with files. For example, an application -** that does not care about crash recovery or rollback might make -** the open of a journal file a no-op. Writes to this journal would -** also be no-ops, and any attempt to read the journal would return -** SQLITE_IOERR. Or the implementation might recognize that a database -** file will be doing page-aligned sector reads and writes in a random -** order and set up its I/O subsystem accordingly. -** -** SQLite might also add one of the following flags to the xOpen method: -** -**
    -**
  • [SQLITE_OPEN_DELETEONCLOSE] -**
  • [SQLITE_OPEN_EXCLUSIVE] -**
-** -** The [SQLITE_OPEN_DELETEONCLOSE] flag means the file should be -** deleted when it is closed. ^The [SQLITE_OPEN_DELETEONCLOSE] -** will be set for TEMP databases and their journals, transient -** databases, and subjournals. -** -** ^The [SQLITE_OPEN_EXCLUSIVE] flag is always used in conjunction -** with the [SQLITE_OPEN_CREATE] flag, which are both directly -** analogous to the O_EXCL and O_CREAT flags of the POSIX open() -** API. The SQLITE_OPEN_EXCLUSIVE flag, when paired with the -** SQLITE_OPEN_CREATE, is used to indicate that file should always -** be created, and that it is an error if it already exists. -** It is not used to indicate the file should be opened -** for exclusive access. -** -** ^At least szOsFile bytes of memory are allocated by SQLite -** to hold the [sqlite3_file] structure passed as the third -** argument to xOpen. The xOpen method does not have to -** allocate the structure; it should just fill it in. Note that -** the xOpen method must set the sqlite3_file.pMethods to either -** a valid [sqlite3_io_methods] object or to NULL. xOpen must do -** this even if the open fails. SQLite expects that the sqlite3_file.pMethods -** element will be valid after xOpen returns regardless of the success -** or failure of the xOpen call. -** -** [[sqlite3_vfs.xAccess]] -** ^The flags argument to xAccess() may be [SQLITE_ACCESS_EXISTS] -** to test for the existence of a file, or [SQLITE_ACCESS_READWRITE] to -** test whether a file is readable and writable, or [SQLITE_ACCESS_READ] -** to test whether a file is at least readable. The file can be a -** directory. -** -** ^SQLite will always allocate at least mxPathname+1 bytes for the -** output buffer xFullPathname. The exact size of the output buffer -** is also passed as a parameter to both methods. If the output buffer -** is not large enough, [SQLITE_CANTOPEN] should be returned. Since this is -** handled as a fatal error by SQLite, vfs implementations should endeavor -** to prevent this by setting mxPathname to a sufficiently large value. -** -** The xRandomness(), xSleep(), xCurrentTime(), and xCurrentTimeInt64() -** interfaces are not strictly a part of the filesystem, but they are -** included in the VFS structure for completeness. -** The xRandomness() function attempts to return nBytes bytes -** of good-quality randomness into zOut. The return value is -** the actual number of bytes of randomness obtained. -** The xSleep() method causes the calling thread to sleep for at -** least the number of microseconds given. ^The xCurrentTime() -** method returns a Julian Day Number for the current date and time as -** a floating point value. -** ^The xCurrentTimeInt64() method returns, as an integer, the Julian -** Day Number multiplied by 86400000 (the number of milliseconds in -** a 24-hour day). -** ^SQLite will use the xCurrentTimeInt64() method to get the current -** date and time if that method is available (if iVersion is 2 or -** greater and the function pointer is not NULL) and will fall back -** to xCurrentTime() if xCurrentTimeInt64() is unavailable. -** -** ^The xSetSystemCall(), xGetSystemCall(), and xNestSystemCall() interfaces -** are not used by the SQLite core. These optional interfaces are provided -** by some VFSes to facilitate testing of the VFS code. By overriding -** system calls with functions under its control, a test program can -** simulate faults and error conditions that would otherwise be difficult -** or impossible to induce. The set of system calls that can be overridden -** varies from one VFS to another, and from one version of the same VFS to the -** next. Applications that use these interfaces must be prepared for any -** or all of these interfaces to be NULL or for their behavior to change -** from one release to the next. Applications must not attempt to access -** any of these methods if the iVersion of the VFS is less than 3. -*/ -typedef struct sqlite3_vfs sqlite3_vfs; -typedef void (*sqlite3_syscall_ptr)(void); -struct sqlite3_vfs { - int iVersion; /* Structure version number (currently 3) */ - int szOsFile; /* Size of subclassed sqlite3_file */ - int mxPathname; /* Maximum file pathname length */ - sqlite3_vfs *pNext; /* Next registered VFS */ - const char *zName; /* Name of this virtual file system */ - void *pAppData; /* Pointer to application-specific data */ - int (*xOpen)(sqlite3_vfs*, const char *zName, sqlite3_file*, - int flags, int *pOutFlags); - int (*xDelete)(sqlite3_vfs*, const char *zName, int syncDir); - int (*xAccess)(sqlite3_vfs*, const char *zName, int flags, int *pResOut); - int (*xFullPathname)(sqlite3_vfs*, const char *zName, int nOut, char *zOut); - void *(*xDlOpen)(sqlite3_vfs*, const char *zFilename); - void (*xDlError)(sqlite3_vfs*, int nByte, char *zErrMsg); - void (*(*xDlSym)(sqlite3_vfs*,void*, const char *zSymbol))(void); - void (*xDlClose)(sqlite3_vfs*, void*); - int (*xRandomness)(sqlite3_vfs*, int nByte, char *zOut); - int (*xSleep)(sqlite3_vfs*, int microseconds); - int (*xCurrentTime)(sqlite3_vfs*, double*); - int (*xGetLastError)(sqlite3_vfs*, int, char *); - /* - ** The methods above are in version 1 of the sqlite_vfs object - ** definition. Those that follow are added in version 2 or later - */ - int (*xCurrentTimeInt64)(sqlite3_vfs*, sqlite3_int64*); - /* - ** The methods above are in versions 1 and 2 of the sqlite_vfs object. - ** Those below are for version 3 and greater. - */ - int (*xSetSystemCall)(sqlite3_vfs*, const char *zName, sqlite3_syscall_ptr); - sqlite3_syscall_ptr (*xGetSystemCall)(sqlite3_vfs*, const char *zName); - const char *(*xNextSystemCall)(sqlite3_vfs*, const char *zName); - /* - ** The methods above are in versions 1 through 3 of the sqlite_vfs object. - ** New fields may be appended in future versions. The iVersion - ** value will increment whenever this happens. - */ -}; - -/* -** CAPI3REF: Flags for the xAccess VFS method -** -** These integer constants can be used as the third parameter to -** the xAccess method of an [sqlite3_vfs] object. They determine -** what kind of permissions the xAccess method is looking for. -** With SQLITE_ACCESS_EXISTS, the xAccess method -** simply checks whether the file exists. -** With SQLITE_ACCESS_READWRITE, the xAccess method -** checks whether the named directory is both readable and writable -** (in other words, if files can be added, removed, and renamed within -** the directory). -** The SQLITE_ACCESS_READWRITE constant is currently used only by the -** [temp_store_directory pragma], though this could change in a future -** release of SQLite. -** With SQLITE_ACCESS_READ, the xAccess method -** checks whether the file is readable. The SQLITE_ACCESS_READ constant is -** currently unused, though it might be used in a future release of -** SQLite. -*/ -#define SQLITE_ACCESS_EXISTS 0 -#define SQLITE_ACCESS_READWRITE 1 /* Used by PRAGMA temp_store_directory */ -#define SQLITE_ACCESS_READ 2 /* Unused */ - -/* -** CAPI3REF: Flags for the xShmLock VFS method -** -** These integer constants define the various locking operations -** allowed by the xShmLock method of [sqlite3_io_methods]. The -** following are the only legal combinations of flags to the -** xShmLock method: -** -**
    -**
  • SQLITE_SHM_LOCK | SQLITE_SHM_SHARED -**
  • SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE -**
  • SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED -**
  • SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE -**
-** -** When unlocking, the same SHARED or EXCLUSIVE flag must be supplied as -** was given on the corresponding lock. -** -** The xShmLock method can transition between unlocked and SHARED or -** between unlocked and EXCLUSIVE. It cannot transition between SHARED -** and EXCLUSIVE. -*/ -#define SQLITE_SHM_UNLOCK 1 -#define SQLITE_SHM_LOCK 2 -#define SQLITE_SHM_SHARED 4 -#define SQLITE_SHM_EXCLUSIVE 8 - -/* -** CAPI3REF: Maximum xShmLock index -** -** The xShmLock method on [sqlite3_io_methods] may use values -** between 0 and this upper bound as its "offset" argument. -** The SQLite core will never attempt to acquire or release a -** lock outside of this range -*/ -#define SQLITE_SHM_NLOCK 8 - - -/* -** CAPI3REF: Initialize The SQLite Library -** -** ^The sqlite3_initialize() routine initializes the -** SQLite library. ^The sqlite3_shutdown() routine -** deallocates any resources that were allocated by sqlite3_initialize(). -** These routines are designed to aid in process initialization and -** shutdown on embedded systems. Workstation applications using -** SQLite normally do not need to invoke either of these routines. -** -** A call to sqlite3_initialize() is an "effective" call if it is -** the first time sqlite3_initialize() is invoked during the lifetime of -** the process, or if it is the first time sqlite3_initialize() is invoked -** following a call to sqlite3_shutdown(). ^(Only an effective call -** of sqlite3_initialize() does any initialization. All other calls -** are harmless no-ops.)^ -** -** A call to sqlite3_shutdown() is an "effective" call if it is the first -** call to sqlite3_shutdown() since the last sqlite3_initialize(). ^(Only -** an effective call to sqlite3_shutdown() does any deinitialization. -** All other valid calls to sqlite3_shutdown() are harmless no-ops.)^ -** -** The sqlite3_initialize() interface is threadsafe, but sqlite3_shutdown() -** is not. The sqlite3_shutdown() interface must only be called from a -** single thread. All open [database connections] must be closed and all -** other SQLite resources must be deallocated prior to invoking -** sqlite3_shutdown(). -** -** Among other things, ^sqlite3_initialize() will invoke -** sqlite3_os_init(). Similarly, ^sqlite3_shutdown() -** will invoke sqlite3_os_end(). -** -** ^The sqlite3_initialize() routine returns [SQLITE_OK] on success. -** ^If for some reason, sqlite3_initialize() is unable to initialize -** the library (perhaps it is unable to allocate a needed resource such -** as a mutex) it returns an [error code] other than [SQLITE_OK]. -** -** ^The sqlite3_initialize() routine is called internally by many other -** SQLite interfaces so that an application usually does not need to -** invoke sqlite3_initialize() directly. For example, [sqlite3_open()] -** calls sqlite3_initialize() so the SQLite library will be automatically -** initialized when [sqlite3_open()] is called if it has not be initialized -** already. ^However, if SQLite is compiled with the [SQLITE_OMIT_AUTOINIT] -** compile-time option, then the automatic calls to sqlite3_initialize() -** are omitted and the application must call sqlite3_initialize() directly -** prior to using any other SQLite interface. For maximum portability, -** it is recommended that applications always invoke sqlite3_initialize() -** directly prior to using any other SQLite interface. Future releases -** of SQLite may require this. In other words, the behavior exhibited -** when SQLite is compiled with [SQLITE_OMIT_AUTOINIT] might become the -** default behavior in some future release of SQLite. -** -** The sqlite3_os_init() routine does operating-system specific -** initialization of the SQLite library. The sqlite3_os_end() -** routine undoes the effect of sqlite3_os_init(). Typical tasks -** performed by these routines include allocation or deallocation -** of static resources, initialization of global variables, -** setting up a default [sqlite3_vfs] module, or setting up -** a default configuration using [sqlite3_config()]. -** -** The application should never invoke either sqlite3_os_init() -** or sqlite3_os_end() directly. The application should only invoke -** sqlite3_initialize() and sqlite3_shutdown(). The sqlite3_os_init() -** interface is called automatically by sqlite3_initialize() and -** sqlite3_os_end() is called by sqlite3_shutdown(). Appropriate -** implementations for sqlite3_os_init() and sqlite3_os_end() -** are built into SQLite when it is compiled for Unix, Windows, or OS/2. -** When [custom builds | built for other platforms] -** (using the [SQLITE_OS_OTHER=1] compile-time -** option) the application must supply a suitable implementation for -** sqlite3_os_init() and sqlite3_os_end(). An application-supplied -** implementation of sqlite3_os_init() or sqlite3_os_end() -** must return [SQLITE_OK] on success and some other [error code] upon -** failure. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_initialize(void); -SQLITE_API int SQLITE_STDCALL sqlite3_shutdown(void); -SQLITE_API int SQLITE_STDCALL sqlite3_os_init(void); -SQLITE_API int SQLITE_STDCALL sqlite3_os_end(void); - -/* -** CAPI3REF: Configuring The SQLite Library -** -** The sqlite3_config() interface is used to make global configuration -** changes to SQLite in order to tune SQLite to the specific needs of -** the application. The default configuration is recommended for most -** applications and so this routine is usually not necessary. It is -** provided to support rare applications with unusual needs. -** -** The sqlite3_config() interface is not threadsafe. The application -** must ensure that no other SQLite interfaces are invoked by other -** threads while sqlite3_config() is running. -** -** The sqlite3_config() interface -** may only be invoked prior to library initialization using -** [sqlite3_initialize()] or after shutdown by [sqlite3_shutdown()]. -** ^If sqlite3_config() is called after [sqlite3_initialize()] and before -** [sqlite3_shutdown()] then it will return SQLITE_MISUSE. -** Note, however, that ^sqlite3_config() can be called as part of the -** implementation of an application-defined [sqlite3_os_init()]. -** -** The first argument to sqlite3_config() is an integer -** [configuration option] that determines -** what property of SQLite is to be configured. Subsequent arguments -** vary depending on the [configuration option] -** in the first argument. -** -** ^When a configuration option is set, sqlite3_config() returns [SQLITE_OK]. -** ^If the option is unknown or SQLite is unable to set the option -** then this routine returns a non-zero [error code]. -*/ -SQLITE_API int SQLITE_CDECL sqlite3_config(int, ...); - -/* -** CAPI3REF: Configure database connections -** METHOD: sqlite3 -** -** The sqlite3_db_config() interface is used to make configuration -** changes to a [database connection]. The interface is similar to -** [sqlite3_config()] except that the changes apply to a single -** [database connection] (specified in the first argument). -** -** The second argument to sqlite3_db_config(D,V,...) is the -** [SQLITE_DBCONFIG_LOOKASIDE | configuration verb] - an integer code -** that indicates what aspect of the [database connection] is being configured. -** Subsequent arguments vary depending on the configuration verb. -** -** ^Calls to sqlite3_db_config() return SQLITE_OK if and only if -** the call is considered successful. -*/ -SQLITE_API int SQLITE_CDECL sqlite3_db_config(sqlite3*, int op, ...); - -/* -** CAPI3REF: Memory Allocation Routines -** -** An instance of this object defines the interface between SQLite -** and low-level memory allocation routines. -** -** This object is used in only one place in the SQLite interface. -** A pointer to an instance of this object is the argument to -** [sqlite3_config()] when the configuration option is -** [SQLITE_CONFIG_MALLOC] or [SQLITE_CONFIG_GETMALLOC]. -** By creating an instance of this object -** and passing it to [sqlite3_config]([SQLITE_CONFIG_MALLOC]) -** during configuration, an application can specify an alternative -** memory allocation subsystem for SQLite to use for all of its -** dynamic memory needs. -** -** Note that SQLite comes with several [built-in memory allocators] -** that are perfectly adequate for the overwhelming majority of applications -** and that this object is only useful to a tiny minority of applications -** with specialized memory allocation requirements. This object is -** also used during testing of SQLite in order to specify an alternative -** memory allocator that simulates memory out-of-memory conditions in -** order to verify that SQLite recovers gracefully from such -** conditions. -** -** The xMalloc, xRealloc, and xFree methods must work like the -** malloc(), realloc() and free() functions from the standard C library. -** ^SQLite guarantees that the second argument to -** xRealloc is always a value returned by a prior call to xRoundup. -** -** xSize should return the allocated size of a memory allocation -** previously obtained from xMalloc or xRealloc. The allocated size -** is always at least as big as the requested size but may be larger. -** -** The xRoundup method returns what would be the allocated size of -** a memory allocation given a particular requested size. Most memory -** allocators round up memory allocations at least to the next multiple -** of 8. Some allocators round up to a larger multiple or to a power of 2. -** Every memory allocation request coming in through [sqlite3_malloc()] -** or [sqlite3_realloc()] first calls xRoundup. If xRoundup returns 0, -** that causes the corresponding memory allocation to fail. -** -** The xInit method initializes the memory allocator. For example, -** it might allocate any require mutexes or initialize internal data -** structures. The xShutdown method is invoked (indirectly) by -** [sqlite3_shutdown()] and should deallocate any resources acquired -** by xInit. The pAppData pointer is used as the only parameter to -** xInit and xShutdown. -** -** SQLite holds the [SQLITE_MUTEX_STATIC_MASTER] mutex when it invokes -** the xInit method, so the xInit method need not be threadsafe. The -** xShutdown method is only called from [sqlite3_shutdown()] so it does -** not need to be threadsafe either. For all other methods, SQLite -** holds the [SQLITE_MUTEX_STATIC_MEM] mutex as long as the -** [SQLITE_CONFIG_MEMSTATUS] configuration option is turned on (which -** it is by default) and so the methods are automatically serialized. -** However, if [SQLITE_CONFIG_MEMSTATUS] is disabled, then the other -** methods must be threadsafe or else make their own arrangements for -** serialization. -** -** SQLite will never invoke xInit() more than once without an intervening -** call to xShutdown(). -*/ -typedef struct sqlite3_mem_methods sqlite3_mem_methods; -struct sqlite3_mem_methods { - void *(*xMalloc)(int); /* Memory allocation function */ - void (*xFree)(void*); /* Free a prior allocation */ - void *(*xRealloc)(void*,int); /* Resize an allocation */ - int (*xSize)(void*); /* Return the size of an allocation */ - int (*xRoundup)(int); /* Round up request size to allocation size */ - int (*xInit)(void*); /* Initialize the memory allocator */ - void (*xShutdown)(void*); /* Deinitialize the memory allocator */ - void *pAppData; /* Argument to xInit() and xShutdown() */ -}; - -/* -** CAPI3REF: Configuration Options -** KEYWORDS: {configuration option} -** -** These constants are the available integer configuration options that -** can be passed as the first argument to the [sqlite3_config()] interface. -** -** New configuration options may be added in future releases of SQLite. -** Existing configuration options might be discontinued. Applications -** should check the return code from [sqlite3_config()] to make sure that -** the call worked. The [sqlite3_config()] interface will return a -** non-zero [error code] if a discontinued or unsupported configuration option -** is invoked. -** -**
-** [[SQLITE_CONFIG_SINGLETHREAD]]
SQLITE_CONFIG_SINGLETHREAD
-**
There are no arguments to this option. ^This option sets the -** [threading mode] to Single-thread. In other words, it disables -** all mutexing and puts SQLite into a mode where it can only be used -** by a single thread. ^If SQLite is compiled with -** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then -** it is not possible to change the [threading mode] from its default -** value of Single-thread and so [sqlite3_config()] will return -** [SQLITE_ERROR] if called with the SQLITE_CONFIG_SINGLETHREAD -** configuration option.
-** -** [[SQLITE_CONFIG_MULTITHREAD]]
SQLITE_CONFIG_MULTITHREAD
-**
There are no arguments to this option. ^This option sets the -** [threading mode] to Multi-thread. In other words, it disables -** mutexing on [database connection] and [prepared statement] objects. -** The application is responsible for serializing access to -** [database connections] and [prepared statements]. But other mutexes -** are enabled so that SQLite will be safe to use in a multi-threaded -** environment as long as no two threads attempt to use the same -** [database connection] at the same time. ^If SQLite is compiled with -** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then -** it is not possible to set the Multi-thread [threading mode] and -** [sqlite3_config()] will return [SQLITE_ERROR] if called with the -** SQLITE_CONFIG_MULTITHREAD configuration option.
-** -** [[SQLITE_CONFIG_SERIALIZED]]
SQLITE_CONFIG_SERIALIZED
-**
There are no arguments to this option. ^This option sets the -** [threading mode] to Serialized. In other words, this option enables -** all mutexes including the recursive -** mutexes on [database connection] and [prepared statement] objects. -** In this mode (which is the default when SQLite is compiled with -** [SQLITE_THREADSAFE=1]) the SQLite library will itself serialize access -** to [database connections] and [prepared statements] so that the -** application is free to use the same [database connection] or the -** same [prepared statement] in different threads at the same time. -** ^If SQLite is compiled with -** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then -** it is not possible to set the Serialized [threading mode] and -** [sqlite3_config()] will return [SQLITE_ERROR] if called with the -** SQLITE_CONFIG_SERIALIZED configuration option.
-** -** [[SQLITE_CONFIG_MALLOC]]
SQLITE_CONFIG_MALLOC
-**
^(The SQLITE_CONFIG_MALLOC option takes a single argument which is -** a pointer to an instance of the [sqlite3_mem_methods] structure. -** The argument specifies -** alternative low-level memory allocation routines to be used in place of -** the memory allocation routines built into SQLite.)^ ^SQLite makes -** its own private copy of the content of the [sqlite3_mem_methods] structure -** before the [sqlite3_config()] call returns.
-** -** [[SQLITE_CONFIG_GETMALLOC]]
SQLITE_CONFIG_GETMALLOC
-**
^(The SQLITE_CONFIG_GETMALLOC option takes a single argument which -** is a pointer to an instance of the [sqlite3_mem_methods] structure. -** The [sqlite3_mem_methods] -** structure is filled with the currently defined memory allocation routines.)^ -** This option can be used to overload the default memory allocation -** routines with a wrapper that simulations memory allocation failure or -** tracks memory usage, for example.
-** -** [[SQLITE_CONFIG_MEMSTATUS]]
SQLITE_CONFIG_MEMSTATUS
-**
^The SQLITE_CONFIG_MEMSTATUS option takes single argument of type int, -** interpreted as a boolean, which enables or disables the collection of -** memory allocation statistics. ^(When memory allocation statistics are -** disabled, the following SQLite interfaces become non-operational: -**
    -**
  • [sqlite3_memory_used()] -**
  • [sqlite3_memory_highwater()] -**
  • [sqlite3_soft_heap_limit64()] -**
  • [sqlite3_status64()] -**
)^ -** ^Memory allocation statistics are enabled by default unless SQLite is -** compiled with [SQLITE_DEFAULT_MEMSTATUS]=0 in which case memory -** allocation statistics are disabled by default. -**
-** -** [[SQLITE_CONFIG_SCRATCH]]
SQLITE_CONFIG_SCRATCH
-**
^The SQLITE_CONFIG_SCRATCH option specifies a static memory buffer -** that SQLite can use for scratch memory. ^(There are three arguments -** to SQLITE_CONFIG_SCRATCH: A pointer an 8-byte -** aligned memory buffer from which the scratch allocations will be -** drawn, the size of each scratch allocation (sz), -** and the maximum number of scratch allocations (N).)^ -** The first argument must be a pointer to an 8-byte aligned buffer -** of at least sz*N bytes of memory. -** ^SQLite will not use more than one scratch buffers per thread. -** ^SQLite will never request a scratch buffer that is more than 6 -** times the database page size. -** ^If SQLite needs needs additional -** scratch memory beyond what is provided by this configuration option, then -** [sqlite3_malloc()] will be used to obtain the memory needed.

-** ^When the application provides any amount of scratch memory using -** SQLITE_CONFIG_SCRATCH, SQLite avoids unnecessary large -** [sqlite3_malloc|heap allocations]. -** This can help [Robson proof|prevent memory allocation failures] due to heap -** fragmentation in low-memory embedded systems. -**

-** -** [[SQLITE_CONFIG_PAGECACHE]]
SQLITE_CONFIG_PAGECACHE
-**
^The SQLITE_CONFIG_PAGECACHE option specifies a memory pool -** that SQLite can use for the database page cache with the default page -** cache implementation. -** This configuration option is a no-op if an application-define page -** cache implementation is loaded using the [SQLITE_CONFIG_PCACHE2]. -** ^There are three arguments to SQLITE_CONFIG_PAGECACHE: A pointer to -** 8-byte aligned memory (pMem), the size of each page cache line (sz), -** and the number of cache lines (N). -** The sz argument should be the size of the largest database page -** (a power of two between 512 and 65536) plus some extra bytes for each -** page header. ^The number of extra bytes needed by the page header -** can be determined using [SQLITE_CONFIG_PCACHE_HDRSZ]. -** ^It is harmless, apart from the wasted memory, -** for the sz parameter to be larger than necessary. The pMem -** argument must be either a NULL pointer or a pointer to an 8-byte -** aligned block of memory of at least sz*N bytes, otherwise -** subsequent behavior is undefined. -** ^When pMem is not NULL, SQLite will strive to use the memory provided -** to satisfy page cache needs, falling back to [sqlite3_malloc()] if -** a page cache line is larger than sz bytes or if all of the pMem buffer -** is exhausted. -** ^If pMem is NULL and N is non-zero, then each database connection -** does an initial bulk allocation for page cache memory -** from [sqlite3_malloc()] sufficient for N cache lines if N is positive or -** of -1024*N bytes if N is negative, . ^If additional -** page cache memory is needed beyond what is provided by the initial -** allocation, then SQLite goes to [sqlite3_malloc()] separately for each -** additional cache line.
-** -** [[SQLITE_CONFIG_HEAP]]
SQLITE_CONFIG_HEAP
-**
^The SQLITE_CONFIG_HEAP option specifies a static memory buffer -** that SQLite will use for all of its dynamic memory allocation needs -** beyond those provided for by [SQLITE_CONFIG_SCRATCH] and -** [SQLITE_CONFIG_PAGECACHE]. -** ^The SQLITE_CONFIG_HEAP option is only available if SQLite is compiled -** with either [SQLITE_ENABLE_MEMSYS3] or [SQLITE_ENABLE_MEMSYS5] and returns -** [SQLITE_ERROR] if invoked otherwise. -** ^There are three arguments to SQLITE_CONFIG_HEAP: -** An 8-byte aligned pointer to the memory, -** the number of bytes in the memory buffer, and the minimum allocation size. -** ^If the first pointer (the memory pointer) is NULL, then SQLite reverts -** to using its default memory allocator (the system malloc() implementation), -** undoing any prior invocation of [SQLITE_CONFIG_MALLOC]. ^If the -** memory pointer is not NULL then the alternative memory -** allocator is engaged to handle all of SQLites memory allocation needs. -** The first pointer (the memory pointer) must be aligned to an 8-byte -** boundary or subsequent behavior of SQLite will be undefined. -** The minimum allocation size is capped at 2**12. Reasonable values -** for the minimum allocation size are 2**5 through 2**8.
-** -** [[SQLITE_CONFIG_MUTEX]]
SQLITE_CONFIG_MUTEX
-**
^(The SQLITE_CONFIG_MUTEX option takes a single argument which is a -** pointer to an instance of the [sqlite3_mutex_methods] structure. -** The argument specifies alternative low-level mutex routines to be used -** in place the mutex routines built into SQLite.)^ ^SQLite makes a copy of -** the content of the [sqlite3_mutex_methods] structure before the call to -** [sqlite3_config()] returns. ^If SQLite is compiled with -** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then -** the entire mutexing subsystem is omitted from the build and hence calls to -** [sqlite3_config()] with the SQLITE_CONFIG_MUTEX configuration option will -** return [SQLITE_ERROR].
-** -** [[SQLITE_CONFIG_GETMUTEX]]
SQLITE_CONFIG_GETMUTEX
-**
^(The SQLITE_CONFIG_GETMUTEX option takes a single argument which -** is a pointer to an instance of the [sqlite3_mutex_methods] structure. The -** [sqlite3_mutex_methods] -** structure is filled with the currently defined mutex routines.)^ -** This option can be used to overload the default mutex allocation -** routines with a wrapper used to track mutex usage for performance -** profiling or testing, for example. ^If SQLite is compiled with -** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then -** the entire mutexing subsystem is omitted from the build and hence calls to -** [sqlite3_config()] with the SQLITE_CONFIG_GETMUTEX configuration option will -** return [SQLITE_ERROR].
-** -** [[SQLITE_CONFIG_LOOKASIDE]]
SQLITE_CONFIG_LOOKASIDE
-**
^(The SQLITE_CONFIG_LOOKASIDE option takes two arguments that determine -** the default size of lookaside memory on each [database connection]. -** The first argument is the -** size of each lookaside buffer slot and the second is the number of -** slots allocated to each database connection.)^ ^(SQLITE_CONFIG_LOOKASIDE -** sets the default lookaside size. The [SQLITE_DBCONFIG_LOOKASIDE] -** option to [sqlite3_db_config()] can be used to change the lookaside -** configuration on individual connections.)^
-** -** [[SQLITE_CONFIG_PCACHE2]]
SQLITE_CONFIG_PCACHE2
-**
^(The SQLITE_CONFIG_PCACHE2 option takes a single argument which is -** a pointer to an [sqlite3_pcache_methods2] object. This object specifies -** the interface to a custom page cache implementation.)^ -** ^SQLite makes a copy of the [sqlite3_pcache_methods2] object.
-** -** [[SQLITE_CONFIG_GETPCACHE2]]
SQLITE_CONFIG_GETPCACHE2
-**
^(The SQLITE_CONFIG_GETPCACHE2 option takes a single argument which -** is a pointer to an [sqlite3_pcache_methods2] object. SQLite copies of -** the current page cache implementation into that object.)^
-** -** [[SQLITE_CONFIG_LOG]]
SQLITE_CONFIG_LOG
-**
The SQLITE_CONFIG_LOG option is used to configure the SQLite -** global [error log]. -** (^The SQLITE_CONFIG_LOG option takes two arguments: a pointer to a -** function with a call signature of void(*)(void*,int,const char*), -** and a pointer to void. ^If the function pointer is not NULL, it is -** invoked by [sqlite3_log()] to process each logging event. ^If the -** function pointer is NULL, the [sqlite3_log()] interface becomes a no-op. -** ^The void pointer that is the second argument to SQLITE_CONFIG_LOG is -** passed through as the first parameter to the application-defined logger -** function whenever that function is invoked. ^The second parameter to -** the logger function is a copy of the first parameter to the corresponding -** [sqlite3_log()] call and is intended to be a [result code] or an -** [extended result code]. ^The third parameter passed to the logger is -** log message after formatting via [sqlite3_snprintf()]. -** The SQLite logging interface is not reentrant; the logger function -** supplied by the application must not invoke any SQLite interface. -** In a multi-threaded application, the application-defined logger -** function must be threadsafe.
-** -** [[SQLITE_CONFIG_URI]]
SQLITE_CONFIG_URI -**
^(The SQLITE_CONFIG_URI option takes a single argument of type int. -** If non-zero, then URI handling is globally enabled. If the parameter is zero, -** then URI handling is globally disabled.)^ ^If URI handling is globally -** enabled, all filenames passed to [sqlite3_open()], [sqlite3_open_v2()], -** [sqlite3_open16()] or -** specified as part of [ATTACH] commands are interpreted as URIs, regardless -** of whether or not the [SQLITE_OPEN_URI] flag is set when the database -** connection is opened. ^If it is globally disabled, filenames are -** only interpreted as URIs if the SQLITE_OPEN_URI flag is set when the -** database connection is opened. ^(By default, URI handling is globally -** disabled. The default value may be changed by compiling with the -** [SQLITE_USE_URI] symbol defined.)^ -** -** [[SQLITE_CONFIG_COVERING_INDEX_SCAN]]
SQLITE_CONFIG_COVERING_INDEX_SCAN -**
^The SQLITE_CONFIG_COVERING_INDEX_SCAN option takes a single integer -** argument which is interpreted as a boolean in order to enable or disable -** the use of covering indices for full table scans in the query optimizer. -** ^The default setting is determined -** by the [SQLITE_ALLOW_COVERING_INDEX_SCAN] compile-time option, or is "on" -** if that compile-time option is omitted. -** The ability to disable the use of covering indices for full table scans -** is because some incorrectly coded legacy applications might malfunction -** when the optimization is enabled. Providing the ability to -** disable the optimization allows the older, buggy application code to work -** without change even with newer versions of SQLite. -** -** [[SQLITE_CONFIG_PCACHE]] [[SQLITE_CONFIG_GETPCACHE]] -**
SQLITE_CONFIG_PCACHE and SQLITE_CONFIG_GETPCACHE -**
These options are obsolete and should not be used by new code. -** They are retained for backwards compatibility but are now no-ops. -**
-** -** [[SQLITE_CONFIG_SQLLOG]] -**
SQLITE_CONFIG_SQLLOG -**
This option is only available if sqlite is compiled with the -** [SQLITE_ENABLE_SQLLOG] pre-processor macro defined. The first argument should -** be a pointer to a function of type void(*)(void*,sqlite3*,const char*, int). -** The second should be of type (void*). The callback is invoked by the library -** in three separate circumstances, identified by the value passed as the -** fourth parameter. If the fourth parameter is 0, then the database connection -** passed as the second argument has just been opened. The third argument -** points to a buffer containing the name of the main database file. If the -** fourth parameter is 1, then the SQL statement that the third parameter -** points to has just been executed. Or, if the fourth parameter is 2, then -** the connection being passed as the second parameter is being closed. The -** third parameter is passed NULL In this case. An example of using this -** configuration option can be seen in the "test_sqllog.c" source file in -** the canonical SQLite source tree.
-** -** [[SQLITE_CONFIG_MMAP_SIZE]] -**
SQLITE_CONFIG_MMAP_SIZE -**
^SQLITE_CONFIG_MMAP_SIZE takes two 64-bit integer (sqlite3_int64) values -** that are the default mmap size limit (the default setting for -** [PRAGMA mmap_size]) and the maximum allowed mmap size limit. -** ^The default setting can be overridden by each database connection using -** either the [PRAGMA mmap_size] command, or by using the -** [SQLITE_FCNTL_MMAP_SIZE] file control. ^(The maximum allowed mmap size -** will be silently truncated if necessary so that it does not exceed the -** compile-time maximum mmap size set by the -** [SQLITE_MAX_MMAP_SIZE] compile-time option.)^ -** ^If either argument to this option is negative, then that argument is -** changed to its compile-time default. -** -** [[SQLITE_CONFIG_WIN32_HEAPSIZE]] -**
SQLITE_CONFIG_WIN32_HEAPSIZE -**
^The SQLITE_CONFIG_WIN32_HEAPSIZE option is only available if SQLite is -** compiled for Windows with the [SQLITE_WIN32_MALLOC] pre-processor macro -** defined. ^SQLITE_CONFIG_WIN32_HEAPSIZE takes a 32-bit unsigned integer value -** that specifies the maximum size of the created heap. -** -** [[SQLITE_CONFIG_PCACHE_HDRSZ]] -**
SQLITE_CONFIG_PCACHE_HDRSZ -**
^The SQLITE_CONFIG_PCACHE_HDRSZ option takes a single parameter which -** is a pointer to an integer and writes into that integer the number of extra -** bytes per page required for each page in [SQLITE_CONFIG_PAGECACHE]. -** The amount of extra space required can change depending on the compiler, -** target platform, and SQLite version. -** -** [[SQLITE_CONFIG_PMASZ]] -**
SQLITE_CONFIG_PMASZ -**
^The SQLITE_CONFIG_PMASZ option takes a single parameter which -** is an unsigned integer and sets the "Minimum PMA Size" for the multithreaded -** sorter to that integer. The default minimum PMA Size is set by the -** [SQLITE_SORTER_PMASZ] compile-time option. New threads are launched -** to help with sort operations when multithreaded sorting -** is enabled (using the [PRAGMA threads] command) and the amount of content -** to be sorted exceeds the page size times the minimum of the -** [PRAGMA cache_size] setting and this value. -** -** [[SQLITE_CONFIG_STMTJRNL_SPILL]] -**
SQLITE_CONFIG_STMTJRNL_SPILL -**
^The SQLITE_CONFIG_STMTJRNL_SPILL option takes a single parameter which -** becomes the [statement journal] spill-to-disk threshold. -** [Statement journals] are held in memory until their size (in bytes) -** exceeds this threshold, at which point they are written to disk. -** Or if the threshold is -1, statement journals are always held -** exclusively in memory. -** Since many statement journals never become large, setting the spill -** threshold to a value such as 64KiB can greatly reduce the amount of -** I/O required to support statement rollback. -** The default value for this setting is controlled by the -** [SQLITE_STMTJRNL_SPILL] compile-time option. -**
-*/ -#define SQLITE_CONFIG_SINGLETHREAD 1 /* nil */ -#define SQLITE_CONFIG_MULTITHREAD 2 /* nil */ -#define SQLITE_CONFIG_SERIALIZED 3 /* nil */ -#define SQLITE_CONFIG_MALLOC 4 /* sqlite3_mem_methods* */ -#define SQLITE_CONFIG_GETMALLOC 5 /* sqlite3_mem_methods* */ -#define SQLITE_CONFIG_SCRATCH 6 /* void*, int sz, int N */ -#define SQLITE_CONFIG_PAGECACHE 7 /* void*, int sz, int N */ -#define SQLITE_CONFIG_HEAP 8 /* void*, int nByte, int min */ -#define SQLITE_CONFIG_MEMSTATUS 9 /* boolean */ -#define SQLITE_CONFIG_MUTEX 10 /* sqlite3_mutex_methods* */ -#define SQLITE_CONFIG_GETMUTEX 11 /* sqlite3_mutex_methods* */ -/* previously SQLITE_CONFIG_CHUNKALLOC 12 which is now unused. */ -#define SQLITE_CONFIG_LOOKASIDE 13 /* int int */ -#define SQLITE_CONFIG_PCACHE 14 /* no-op */ -#define SQLITE_CONFIG_GETPCACHE 15 /* no-op */ -#define SQLITE_CONFIG_LOG 16 /* xFunc, void* */ -#define SQLITE_CONFIG_URI 17 /* int */ -#define SQLITE_CONFIG_PCACHE2 18 /* sqlite3_pcache_methods2* */ -#define SQLITE_CONFIG_GETPCACHE2 19 /* sqlite3_pcache_methods2* */ -#define SQLITE_CONFIG_COVERING_INDEX_SCAN 20 /* int */ -#define SQLITE_CONFIG_SQLLOG 21 /* xSqllog, void* */ -#define SQLITE_CONFIG_MMAP_SIZE 22 /* sqlite3_int64, sqlite3_int64 */ -#define SQLITE_CONFIG_WIN32_HEAPSIZE 23 /* int nByte */ -#define SQLITE_CONFIG_PCACHE_HDRSZ 24 /* int *psz */ -#define SQLITE_CONFIG_PMASZ 25 /* unsigned int szPma */ -#define SQLITE_CONFIG_STMTJRNL_SPILL 26 /* int nByte */ - -/* -** CAPI3REF: Database Connection Configuration Options -** -** These constants are the available integer configuration options that -** can be passed as the second argument to the [sqlite3_db_config()] interface. -** -** New configuration options may be added in future releases of SQLite. -** Existing configuration options might be discontinued. Applications -** should check the return code from [sqlite3_db_config()] to make sure that -** the call worked. ^The [sqlite3_db_config()] interface will return a -** non-zero [error code] if a discontinued or unsupported configuration option -** is invoked. -** -**
-**
SQLITE_DBCONFIG_LOOKASIDE
-**
^This option takes three additional arguments that determine the -** [lookaside memory allocator] configuration for the [database connection]. -** ^The first argument (the third parameter to [sqlite3_db_config()] is a -** pointer to a memory buffer to use for lookaside memory. -** ^The first argument after the SQLITE_DBCONFIG_LOOKASIDE verb -** may be NULL in which case SQLite will allocate the -** lookaside buffer itself using [sqlite3_malloc()]. ^The second argument is the -** size of each lookaside buffer slot. ^The third argument is the number of -** slots. The size of the buffer in the first argument must be greater than -** or equal to the product of the second and third arguments. The buffer -** must be aligned to an 8-byte boundary. ^If the second argument to -** SQLITE_DBCONFIG_LOOKASIDE is not a multiple of 8, it is internally -** rounded down to the next smaller multiple of 8. ^(The lookaside memory -** configuration for a database connection can only be changed when that -** connection is not currently using lookaside memory, or in other words -** when the "current value" returned by -** [sqlite3_db_status](D,[SQLITE_CONFIG_LOOKASIDE],...) is zero. -** Any attempt to change the lookaside memory configuration when lookaside -** memory is in use leaves the configuration unchanged and returns -** [SQLITE_BUSY].)^
-** -**
SQLITE_DBCONFIG_ENABLE_FKEY
-**
^This option is used to enable or disable the enforcement of -** [foreign key constraints]. There should be two additional arguments. -** The first argument is an integer which is 0 to disable FK enforcement, -** positive to enable FK enforcement or negative to leave FK enforcement -** unchanged. The second parameter is a pointer to an integer into which -** is written 0 or 1 to indicate whether FK enforcement is off or on -** following this call. The second parameter may be a NULL pointer, in -** which case the FK enforcement setting is not reported back.
-** -**
SQLITE_DBCONFIG_ENABLE_TRIGGER
-**
^This option is used to enable or disable [CREATE TRIGGER | triggers]. -** There should be two additional arguments. -** The first argument is an integer which is 0 to disable triggers, -** positive to enable triggers or negative to leave the setting unchanged. -** The second parameter is a pointer to an integer into which -** is written 0 or 1 to indicate whether triggers are disabled or enabled -** following this call. The second parameter may be a NULL pointer, in -** which case the trigger setting is not reported back.
-** -**
SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER
-**
^This option is used to enable or disable the two-argument -** version of the [fts3_tokenizer()] function which is part of the -** [FTS3] full-text search engine extension. -** There should be two additional arguments. -** The first argument is an integer which is 0 to disable fts3_tokenizer() or -** positive to enable fts3_tokenizer() or negative to leave the setting -** unchanged. -** The second parameter is a pointer to an integer into which -** is written 0 or 1 to indicate whether fts3_tokenizer is disabled or enabled -** following this call. The second parameter may be a NULL pointer, in -** which case the new setting is not reported back.
-** -**
SQLITE_DBCONFIG_ENABLE_LOAD_EXTENSION
-**
^This option is used to enable or disable the [sqlite3_load_extension()] -** interface independently of the [load_extension()] SQL function. -** The [sqlite3_enable_load_extension()] API enables or disables both the -** C-API [sqlite3_load_extension()] and the SQL function [load_extension()]. -** There should be two additional arguments. -** When the first argument to this interface is 1, then only the C-API is -** enabled and the SQL function remains disabled. If the first argment to -** this interface is 0, then both the C-API and the SQL function are disabled. -** If the first argument is -1, then no changes are made to state of either the -** C-API or the SQL function. -** The second parameter is a pointer to an integer into which -** is written 0 or 1 to indicate whether [sqlite3_load_extension()] interface -** is disabled or enabled following this call. The second parameter may -** be a NULL pointer, in which case the new setting is not reported back. -**
-** -**
-*/ -#define SQLITE_DBCONFIG_LOOKASIDE 1001 /* void* int int */ -#define SQLITE_DBCONFIG_ENABLE_FKEY 1002 /* int int* */ -#define SQLITE_DBCONFIG_ENABLE_TRIGGER 1003 /* int int* */ -#define SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER 1004 /* int int* */ -#define SQLITE_DBCONFIG_ENABLE_LOAD_EXTENSION 1005 /* int int* */ - - -/* -** CAPI3REF: Enable Or Disable Extended Result Codes -** METHOD: sqlite3 -** -** ^The sqlite3_extended_result_codes() routine enables or disables the -** [extended result codes] feature of SQLite. ^The extended result -** codes are disabled by default for historical compatibility. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_extended_result_codes(sqlite3*, int onoff); - -/* -** CAPI3REF: Last Insert Rowid -** METHOD: sqlite3 -** -** ^Each entry in most SQLite tables (except for [WITHOUT ROWID] tables) -** has a unique 64-bit signed -** integer key called the [ROWID | "rowid"]. ^The rowid is always available -** as an undeclared column named ROWID, OID, or _ROWID_ as long as those -** names are not also used by explicitly declared columns. ^If -** the table has a column of type [INTEGER PRIMARY KEY] then that column -** is another alias for the rowid. -** -** ^The sqlite3_last_insert_rowid(D) interface returns the [rowid] of the -** most recent successful [INSERT] into a rowid table or [virtual table] -** on database connection D. -** ^Inserts into [WITHOUT ROWID] tables are not recorded. -** ^If no successful [INSERT]s into rowid tables -** have ever occurred on the database connection D, -** then sqlite3_last_insert_rowid(D) returns zero. -** -** ^(If an [INSERT] occurs within a trigger or within a [virtual table] -** method, then this routine will return the [rowid] of the inserted -** row as long as the trigger or virtual table method is running. -** But once the trigger or virtual table method ends, the value returned -** by this routine reverts to what it was before the trigger or virtual -** table method began.)^ -** -** ^An [INSERT] that fails due to a constraint violation is not a -** successful [INSERT] and does not change the value returned by this -** routine. ^Thus INSERT OR FAIL, INSERT OR IGNORE, INSERT OR ROLLBACK, -** and INSERT OR ABORT make no changes to the return value of this -** routine when their insertion fails. ^(When INSERT OR REPLACE -** encounters a constraint violation, it does not fail. The -** INSERT continues to completion after deleting rows that caused -** the constraint problem so INSERT OR REPLACE will always change -** the return value of this interface.)^ -** -** ^For the purposes of this routine, an [INSERT] is considered to -** be successful even if it is subsequently rolled back. -** -** This function is accessible to SQL statements via the -** [last_insert_rowid() SQL function]. -** -** If a separate thread performs a new [INSERT] on the same -** database connection while the [sqlite3_last_insert_rowid()] -** function is running and thus changes the last insert [rowid], -** then the value returned by [sqlite3_last_insert_rowid()] is -** unpredictable and might not equal either the old or the new -** last insert [rowid]. -*/ -SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3_last_insert_rowid(sqlite3*); - -/* -** CAPI3REF: Count The Number Of Rows Modified -** METHOD: sqlite3 -** -** ^This function returns the number of rows modified, inserted or -** deleted by the most recently completed INSERT, UPDATE or DELETE -** statement on the database connection specified by the only parameter. -** ^Executing any other type of SQL statement does not modify the value -** returned by this function. -** -** ^Only changes made directly by the INSERT, UPDATE or DELETE statement are -** considered - auxiliary changes caused by [CREATE TRIGGER | triggers], -** [foreign key actions] or [REPLACE] constraint resolution are not counted. -** -** Changes to a view that are intercepted by -** [INSTEAD OF trigger | INSTEAD OF triggers] are not counted. ^The value -** returned by sqlite3_changes() immediately after an INSERT, UPDATE or -** DELETE statement run on a view is always zero. Only changes made to real -** tables are counted. -** -** Things are more complicated if the sqlite3_changes() function is -** executed while a trigger program is running. This may happen if the -** program uses the [changes() SQL function], or if some other callback -** function invokes sqlite3_changes() directly. Essentially: -** -**
    -**
  • ^(Before entering a trigger program the value returned by -** sqlite3_changes() function is saved. After the trigger program -** has finished, the original value is restored.)^ -** -**
  • ^(Within a trigger program each INSERT, UPDATE and DELETE -** statement sets the value returned by sqlite3_changes() -** upon completion as normal. Of course, this value will not include -** any changes performed by sub-triggers, as the sqlite3_changes() -** value will be saved and restored after each sub-trigger has run.)^ -**
-** -** ^This means that if the changes() SQL function (or similar) is used -** by the first INSERT, UPDATE or DELETE statement within a trigger, it -** returns the value as set when the calling statement began executing. -** ^If it is used by the second or subsequent such statement within a trigger -** program, the value returned reflects the number of rows modified by the -** previous INSERT, UPDATE or DELETE statement within the same trigger. -** -** See also the [sqlite3_total_changes()] interface, the -** [count_changes pragma], and the [changes() SQL function]. -** -** If a separate thread makes changes on the same database connection -** while [sqlite3_changes()] is running then the value returned -** is unpredictable and not meaningful. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_changes(sqlite3*); - -/* -** CAPI3REF: Total Number Of Rows Modified -** METHOD: sqlite3 -** -** ^This function returns the total number of rows inserted, modified or -** deleted by all [INSERT], [UPDATE] or [DELETE] statements completed -** since the database connection was opened, including those executed as -** part of trigger programs. ^Executing any other type of SQL statement -** does not affect the value returned by sqlite3_total_changes(). -** -** ^Changes made as part of [foreign key actions] are included in the -** count, but those made as part of REPLACE constraint resolution are -** not. ^Changes to a view that are intercepted by INSTEAD OF triggers -** are not counted. -** -** See also the [sqlite3_changes()] interface, the -** [count_changes pragma], and the [total_changes() SQL function]. -** -** If a separate thread makes changes on the same database connection -** while [sqlite3_total_changes()] is running then the value -** returned is unpredictable and not meaningful. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_total_changes(sqlite3*); - -/* -** CAPI3REF: Interrupt A Long-Running Query -** METHOD: sqlite3 -** -** ^This function causes any pending database operation to abort and -** return at its earliest opportunity. This routine is typically -** called in response to a user action such as pressing "Cancel" -** or Ctrl-C where the user wants a long query operation to halt -** immediately. -** -** ^It is safe to call this routine from a thread different from the -** thread that is currently running the database operation. But it -** is not safe to call this routine with a [database connection] that -** is closed or might close before sqlite3_interrupt() returns. -** -** ^If an SQL operation is very nearly finished at the time when -** sqlite3_interrupt() is called, then it might not have an opportunity -** to be interrupted and might continue to completion. -** -** ^An SQL operation that is interrupted will return [SQLITE_INTERRUPT]. -** ^If the interrupted SQL operation is an INSERT, UPDATE, or DELETE -** that is inside an explicit transaction, then the entire transaction -** will be rolled back automatically. -** -** ^The sqlite3_interrupt(D) call is in effect until all currently running -** SQL statements on [database connection] D complete. ^Any new SQL statements -** that are started after the sqlite3_interrupt() call and before the -** running statements reaches zero are interrupted as if they had been -** running prior to the sqlite3_interrupt() call. ^New SQL statements -** that are started after the running statement count reaches zero are -** not effected by the sqlite3_interrupt(). -** ^A call to sqlite3_interrupt(D) that occurs when there are no running -** SQL statements is a no-op and has no effect on SQL statements -** that are started after the sqlite3_interrupt() call returns. -** -** If the database connection closes while [sqlite3_interrupt()] -** is running then bad things will likely happen. -*/ -SQLITE_API void SQLITE_STDCALL sqlite3_interrupt(sqlite3*); - -/* -** CAPI3REF: Determine If An SQL Statement Is Complete -** -** These routines are useful during command-line input to determine if the -** currently entered text seems to form a complete SQL statement or -** if additional input is needed before sending the text into -** SQLite for parsing. ^These routines return 1 if the input string -** appears to be a complete SQL statement. ^A statement is judged to be -** complete if it ends with a semicolon token and is not a prefix of a -** well-formed CREATE TRIGGER statement. ^Semicolons that are embedded within -** string literals or quoted identifier names or comments are not -** independent tokens (they are part of the token in which they are -** embedded) and thus do not count as a statement terminator. ^Whitespace -** and comments that follow the final semicolon are ignored. -** -** ^These routines return 0 if the statement is incomplete. ^If a -** memory allocation fails, then SQLITE_NOMEM is returned. -** -** ^These routines do not parse the SQL statements thus -** will not detect syntactically incorrect SQL. -** -** ^(If SQLite has not been initialized using [sqlite3_initialize()] prior -** to invoking sqlite3_complete16() then sqlite3_initialize() is invoked -** automatically by sqlite3_complete16(). If that initialization fails, -** then the return value from sqlite3_complete16() will be non-zero -** regardless of whether or not the input SQL is complete.)^ -** -** The input to [sqlite3_complete()] must be a zero-terminated -** UTF-8 string. -** -** The input to [sqlite3_complete16()] must be a zero-terminated -** UTF-16 string in native byte order. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_complete(const char *sql); -SQLITE_API int SQLITE_STDCALL sqlite3_complete16(const void *sql); - -/* -** CAPI3REF: Register A Callback To Handle SQLITE_BUSY Errors -** KEYWORDS: {busy-handler callback} {busy handler} -** METHOD: sqlite3 -** -** ^The sqlite3_busy_handler(D,X,P) routine sets a callback function X -** that might be invoked with argument P whenever -** an attempt is made to access a database table associated with -** [database connection] D when another thread -** or process has the table locked. -** The sqlite3_busy_handler() interface is used to implement -** [sqlite3_busy_timeout()] and [PRAGMA busy_timeout]. -** -** ^If the busy callback is NULL, then [SQLITE_BUSY] -** is returned immediately upon encountering the lock. ^If the busy callback -** is not NULL, then the callback might be invoked with two arguments. -** -** ^The first argument to the busy handler is a copy of the void* pointer which -** is the third argument to sqlite3_busy_handler(). ^The second argument to -** the busy handler callback is the number of times that the busy handler has -** been invoked previously for the same locking event. ^If the -** busy callback returns 0, then no additional attempts are made to -** access the database and [SQLITE_BUSY] is returned -** to the application. -** ^If the callback returns non-zero, then another attempt -** is made to access the database and the cycle repeats. -** -** The presence of a busy handler does not guarantee that it will be invoked -** when there is lock contention. ^If SQLite determines that invoking the busy -** handler could result in a deadlock, it will go ahead and return [SQLITE_BUSY] -** to the application instead of invoking the -** busy handler. -** Consider a scenario where one process is holding a read lock that -** it is trying to promote to a reserved lock and -** a second process is holding a reserved lock that it is trying -** to promote to an exclusive lock. The first process cannot proceed -** because it is blocked by the second and the second process cannot -** proceed because it is blocked by the first. If both processes -** invoke the busy handlers, neither will make any progress. Therefore, -** SQLite returns [SQLITE_BUSY] for the first process, hoping that this -** will induce the first process to release its read lock and allow -** the second process to proceed. -** -** ^The default busy callback is NULL. -** -** ^(There can only be a single busy handler defined for each -** [database connection]. Setting a new busy handler clears any -** previously set handler.)^ ^Note that calling [sqlite3_busy_timeout()] -** or evaluating [PRAGMA busy_timeout=N] will change the -** busy handler and thus clear any previously set busy handler. -** -** The busy callback should not take any actions which modify the -** database connection that invoked the busy handler. In other words, -** the busy handler is not reentrant. Any such actions -** result in undefined behavior. -** -** A busy handler must not close the database connection -** or [prepared statement] that invoked the busy handler. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_busy_handler(sqlite3*, int(*)(void*,int), void*); - -/* -** CAPI3REF: Set A Busy Timeout -** METHOD: sqlite3 -** -** ^This routine sets a [sqlite3_busy_handler | busy handler] that sleeps -** for a specified amount of time when a table is locked. ^The handler -** will sleep multiple times until at least "ms" milliseconds of sleeping -** have accumulated. ^After at least "ms" milliseconds of sleeping, -** the handler returns 0 which causes [sqlite3_step()] to return -** [SQLITE_BUSY]. -** -** ^Calling this routine with an argument less than or equal to zero -** turns off all busy handlers. -** -** ^(There can only be a single busy handler for a particular -** [database connection] at any given moment. If another busy handler -** was defined (using [sqlite3_busy_handler()]) prior to calling -** this routine, that other busy handler is cleared.)^ -** -** See also: [PRAGMA busy_timeout] -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_busy_timeout(sqlite3*, int ms); - -/* -** CAPI3REF: Convenience Routines For Running Queries -** METHOD: sqlite3 -** -** This is a legacy interface that is preserved for backwards compatibility. -** Use of this interface is not recommended. -** -** Definition: A result table is memory data structure created by the -** [sqlite3_get_table()] interface. A result table records the -** complete query results from one or more queries. -** -** The table conceptually has a number of rows and columns. But -** these numbers are not part of the result table itself. These -** numbers are obtained separately. Let N be the number of rows -** and M be the number of columns. -** -** A result table is an array of pointers to zero-terminated UTF-8 strings. -** There are (N+1)*M elements in the array. The first M pointers point -** to zero-terminated strings that contain the names of the columns. -** The remaining entries all point to query results. NULL values result -** in NULL pointers. All other values are in their UTF-8 zero-terminated -** string representation as returned by [sqlite3_column_text()]. -** -** A result table might consist of one or more memory allocations. -** It is not safe to pass a result table directly to [sqlite3_free()]. -** A result table should be deallocated using [sqlite3_free_table()]. -** -** ^(As an example of the result table format, suppose a query result -** is as follows: -** -**
-**        Name        | Age
-**        -----------------------
-**        Alice       | 43
-**        Bob         | 28
-**        Cindy       | 21
-** 
-** -** There are two column (M==2) and three rows (N==3). Thus the -** result table has 8 entries. Suppose the result table is stored -** in an array names azResult. Then azResult holds this content: -** -**
-**        azResult[0] = "Name";
-**        azResult[1] = "Age";
-**        azResult[2] = "Alice";
-**        azResult[3] = "43";
-**        azResult[4] = "Bob";
-**        azResult[5] = "28";
-**        azResult[6] = "Cindy";
-**        azResult[7] = "21";
-** 
)^ -** -** ^The sqlite3_get_table() function evaluates one or more -** semicolon-separated SQL statements in the zero-terminated UTF-8 -** string of its 2nd parameter and returns a result table to the -** pointer given in its 3rd parameter. -** -** After the application has finished with the result from sqlite3_get_table(), -** it must pass the result table pointer to sqlite3_free_table() in order to -** release the memory that was malloced. Because of the way the -** [sqlite3_malloc()] happens within sqlite3_get_table(), the calling -** function must not try to call [sqlite3_free()] directly. Only -** [sqlite3_free_table()] is able to release the memory properly and safely. -** -** The sqlite3_get_table() interface is implemented as a wrapper around -** [sqlite3_exec()]. The sqlite3_get_table() routine does not have access -** to any internal data structures of SQLite. It uses only the public -** interface defined here. As a consequence, errors that occur in the -** wrapper layer outside of the internal [sqlite3_exec()] call are not -** reflected in subsequent calls to [sqlite3_errcode()] or -** [sqlite3_errmsg()]. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_get_table( - sqlite3 *db, /* An open database */ - const char *zSql, /* SQL to be evaluated */ - char ***pazResult, /* Results of the query */ - int *pnRow, /* Number of result rows written here */ - int *pnColumn, /* Number of result columns written here */ - char **pzErrmsg /* Error msg written here */ -); -SQLITE_API void SQLITE_STDCALL sqlite3_free_table(char **result); - -/* -** CAPI3REF: Formatted String Printing Functions -** -** These routines are work-alikes of the "printf()" family of functions -** from the standard C library. -** These routines understand most of the common K&R formatting options, -** plus some additional non-standard formats, detailed below. -** Note that some of the more obscure formatting options from recent -** C-library standards are omitted from this implementation. -** -** ^The sqlite3_mprintf() and sqlite3_vmprintf() routines write their -** results into memory obtained from [sqlite3_malloc()]. -** The strings returned by these two routines should be -** released by [sqlite3_free()]. ^Both routines return a -** NULL pointer if [sqlite3_malloc()] is unable to allocate enough -** memory to hold the resulting string. -** -** ^(The sqlite3_snprintf() routine is similar to "snprintf()" from -** the standard C library. The result is written into the -** buffer supplied as the second parameter whose size is given by -** the first parameter. Note that the order of the -** first two parameters is reversed from snprintf().)^ This is an -** historical accident that cannot be fixed without breaking -** backwards compatibility. ^(Note also that sqlite3_snprintf() -** returns a pointer to its buffer instead of the number of -** characters actually written into the buffer.)^ We admit that -** the number of characters written would be a more useful return -** value but we cannot change the implementation of sqlite3_snprintf() -** now without breaking compatibility. -** -** ^As long as the buffer size is greater than zero, sqlite3_snprintf() -** guarantees that the buffer is always zero-terminated. ^The first -** parameter "n" is the total size of the buffer, including space for -** the zero terminator. So the longest string that can be completely -** written will be n-1 characters. -** -** ^The sqlite3_vsnprintf() routine is a varargs version of sqlite3_snprintf(). -** -** These routines all implement some additional formatting -** options that are useful for constructing SQL statements. -** All of the usual printf() formatting options apply. In addition, there -** is are "%q", "%Q", "%w" and "%z" options. -** -** ^(The %q option works like %s in that it substitutes a nul-terminated -** string from the argument list. But %q also doubles every '\'' character. -** %q is designed for use inside a string literal.)^ By doubling each '\'' -** character it escapes that character and allows it to be inserted into -** the string. -** -** For example, assume the string variable zText contains text as follows: -** -**
-**  char *zText = "It's a happy day!";
-** 
-** -** One can use this text in an SQL statement as follows: -** -**
-**  char *zSQL = sqlite3_mprintf("INSERT INTO table VALUES('%q')", zText);
-**  sqlite3_exec(db, zSQL, 0, 0, 0);
-**  sqlite3_free(zSQL);
-** 
-** -** Because the %q format string is used, the '\'' character in zText -** is escaped and the SQL generated is as follows: -** -**
-**  INSERT INTO table1 VALUES('It''s a happy day!')
-** 
-** -** This is correct. Had we used %s instead of %q, the generated SQL -** would have looked like this: -** -**
-**  INSERT INTO table1 VALUES('It's a happy day!');
-** 
-** -** This second example is an SQL syntax error. As a general rule you should -** always use %q instead of %s when inserting text into a string literal. -** -** ^(The %Q option works like %q except it also adds single quotes around -** the outside of the total string. Additionally, if the parameter in the -** argument list is a NULL pointer, %Q substitutes the text "NULL" (without -** single quotes).)^ So, for example, one could say: -** -**
-**  char *zSQL = sqlite3_mprintf("INSERT INTO table VALUES(%Q)", zText);
-**  sqlite3_exec(db, zSQL, 0, 0, 0);
-**  sqlite3_free(zSQL);
-** 
-** -** The code above will render a correct SQL statement in the zSQL -** variable even if the zText variable is a NULL pointer. -** -** ^(The "%w" formatting option is like "%q" except that it expects to -** be contained within double-quotes instead of single quotes, and it -** escapes the double-quote character instead of the single-quote -** character.)^ The "%w" formatting option is intended for safely inserting -** table and column names into a constructed SQL statement. -** -** ^(The "%z" formatting option works like "%s" but with the -** addition that after the string has been read and copied into -** the result, [sqlite3_free()] is called on the input string.)^ -*/ -SQLITE_API char *SQLITE_CDECL sqlite3_mprintf(const char*,...); -SQLITE_API char *SQLITE_STDCALL sqlite3_vmprintf(const char*, va_list); -SQLITE_API char *SQLITE_CDECL sqlite3_snprintf(int,char*,const char*, ...); -SQLITE_API char *SQLITE_STDCALL sqlite3_vsnprintf(int,char*,const char*, va_list); - -/* -** CAPI3REF: Memory Allocation Subsystem -** -** The SQLite core uses these three routines for all of its own -** internal memory allocation needs. "Core" in the previous sentence -** does not include operating-system specific VFS implementation. The -** Windows VFS uses native malloc() and free() for some operations. -** -** ^The sqlite3_malloc() routine returns a pointer to a block -** of memory at least N bytes in length, where N is the parameter. -** ^If sqlite3_malloc() is unable to obtain sufficient free -** memory, it returns a NULL pointer. ^If the parameter N to -** sqlite3_malloc() is zero or negative then sqlite3_malloc() returns -** a NULL pointer. -** -** ^The sqlite3_malloc64(N) routine works just like -** sqlite3_malloc(N) except that N is an unsigned 64-bit integer instead -** of a signed 32-bit integer. -** -** ^Calling sqlite3_free() with a pointer previously returned -** by sqlite3_malloc() or sqlite3_realloc() releases that memory so -** that it might be reused. ^The sqlite3_free() routine is -** a no-op if is called with a NULL pointer. Passing a NULL pointer -** to sqlite3_free() is harmless. After being freed, memory -** should neither be read nor written. Even reading previously freed -** memory might result in a segmentation fault or other severe error. -** Memory corruption, a segmentation fault, or other severe error -** might result if sqlite3_free() is called with a non-NULL pointer that -** was not obtained from sqlite3_malloc() or sqlite3_realloc(). -** -** ^The sqlite3_realloc(X,N) interface attempts to resize a -** prior memory allocation X to be at least N bytes. -** ^If the X parameter to sqlite3_realloc(X,N) -** is a NULL pointer then its behavior is identical to calling -** sqlite3_malloc(N). -** ^If the N parameter to sqlite3_realloc(X,N) is zero or -** negative then the behavior is exactly the same as calling -** sqlite3_free(X). -** ^sqlite3_realloc(X,N) returns a pointer to a memory allocation -** of at least N bytes in size or NULL if insufficient memory is available. -** ^If M is the size of the prior allocation, then min(N,M) bytes -** of the prior allocation are copied into the beginning of buffer returned -** by sqlite3_realloc(X,N) and the prior allocation is freed. -** ^If sqlite3_realloc(X,N) returns NULL and N is positive, then the -** prior allocation is not freed. -** -** ^The sqlite3_realloc64(X,N) interfaces works the same as -** sqlite3_realloc(X,N) except that N is a 64-bit unsigned integer instead -** of a 32-bit signed integer. -** -** ^If X is a memory allocation previously obtained from sqlite3_malloc(), -** sqlite3_malloc64(), sqlite3_realloc(), or sqlite3_realloc64(), then -** sqlite3_msize(X) returns the size of that memory allocation in bytes. -** ^The value returned by sqlite3_msize(X) might be larger than the number -** of bytes requested when X was allocated. ^If X is a NULL pointer then -** sqlite3_msize(X) returns zero. If X points to something that is not -** the beginning of memory allocation, or if it points to a formerly -** valid memory allocation that has now been freed, then the behavior -** of sqlite3_msize(X) is undefined and possibly harmful. -** -** ^The memory returned by sqlite3_malloc(), sqlite3_realloc(), -** sqlite3_malloc64(), and sqlite3_realloc64() -** is always aligned to at least an 8 byte boundary, or to a -** 4 byte boundary if the [SQLITE_4_BYTE_ALIGNED_MALLOC] compile-time -** option is used. -** -** In SQLite version 3.5.0 and 3.5.1, it was possible to define -** the SQLITE_OMIT_MEMORY_ALLOCATION which would cause the built-in -** implementation of these routines to be omitted. That capability -** is no longer provided. Only built-in memory allocators can be used. -** -** Prior to SQLite version 3.7.10, the Windows OS interface layer called -** the system malloc() and free() directly when converting -** filenames between the UTF-8 encoding used by SQLite -** and whatever filename encoding is used by the particular Windows -** installation. Memory allocation errors were detected, but -** they were reported back as [SQLITE_CANTOPEN] or -** [SQLITE_IOERR] rather than [SQLITE_NOMEM]. -** -** The pointer arguments to [sqlite3_free()] and [sqlite3_realloc()] -** must be either NULL or else pointers obtained from a prior -** invocation of [sqlite3_malloc()] or [sqlite3_realloc()] that have -** not yet been released. -** -** The application must not read or write any part of -** a block of memory after it has been released using -** [sqlite3_free()] or [sqlite3_realloc()]. -*/ -SQLITE_API void *SQLITE_STDCALL sqlite3_malloc(int); -SQLITE_API void *SQLITE_STDCALL sqlite3_malloc64(sqlite3_uint64); -SQLITE_API void *SQLITE_STDCALL sqlite3_realloc(void*, int); -SQLITE_API void *SQLITE_STDCALL sqlite3_realloc64(void*, sqlite3_uint64); -SQLITE_API void SQLITE_STDCALL sqlite3_free(void*); -SQLITE_API sqlite3_uint64 SQLITE_STDCALL sqlite3_msize(void*); - -/* -** CAPI3REF: Memory Allocator Statistics -** -** SQLite provides these two interfaces for reporting on the status -** of the [sqlite3_malloc()], [sqlite3_free()], and [sqlite3_realloc()] -** routines, which form the built-in memory allocation subsystem. -** -** ^The [sqlite3_memory_used()] routine returns the number of bytes -** of memory currently outstanding (malloced but not freed). -** ^The [sqlite3_memory_highwater()] routine returns the maximum -** value of [sqlite3_memory_used()] since the high-water mark -** was last reset. ^The values returned by [sqlite3_memory_used()] and -** [sqlite3_memory_highwater()] include any overhead -** added by SQLite in its implementation of [sqlite3_malloc()], -** but not overhead added by the any underlying system library -** routines that [sqlite3_malloc()] may call. -** -** ^The memory high-water mark is reset to the current value of -** [sqlite3_memory_used()] if and only if the parameter to -** [sqlite3_memory_highwater()] is true. ^The value returned -** by [sqlite3_memory_highwater(1)] is the high-water mark -** prior to the reset. -*/ -SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3_memory_used(void); -SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3_memory_highwater(int resetFlag); - -/* -** CAPI3REF: Pseudo-Random Number Generator -** -** SQLite contains a high-quality pseudo-random number generator (PRNG) used to -** select random [ROWID | ROWIDs] when inserting new records into a table that -** already uses the largest possible [ROWID]. The PRNG is also used for -** the build-in random() and randomblob() SQL functions. This interface allows -** applications to access the same PRNG for other purposes. -** -** ^A call to this routine stores N bytes of randomness into buffer P. -** ^The P parameter can be a NULL pointer. -** -** ^If this routine has not been previously called or if the previous -** call had N less than one or a NULL pointer for P, then the PRNG is -** seeded using randomness obtained from the xRandomness method of -** the default [sqlite3_vfs] object. -** ^If the previous call to this routine had an N of 1 or more and a -** non-NULL P then the pseudo-randomness is generated -** internally and without recourse to the [sqlite3_vfs] xRandomness -** method. -*/ -SQLITE_API void SQLITE_STDCALL sqlite3_randomness(int N, void *P); - -/* -** CAPI3REF: Compile-Time Authorization Callbacks -** METHOD: sqlite3 -** -** ^This routine registers an authorizer callback with a particular -** [database connection], supplied in the first argument. -** ^The authorizer callback is invoked as SQL statements are being compiled -** by [sqlite3_prepare()] or its variants [sqlite3_prepare_v2()], -** [sqlite3_prepare16()] and [sqlite3_prepare16_v2()]. ^At various -** points during the compilation process, as logic is being created -** to perform various actions, the authorizer callback is invoked to -** see if those actions are allowed. ^The authorizer callback should -** return [SQLITE_OK] to allow the action, [SQLITE_IGNORE] to disallow the -** specific action but allow the SQL statement to continue to be -** compiled, or [SQLITE_DENY] to cause the entire SQL statement to be -** rejected with an error. ^If the authorizer callback returns -** any value other than [SQLITE_IGNORE], [SQLITE_OK], or [SQLITE_DENY] -** then the [sqlite3_prepare_v2()] or equivalent call that triggered -** the authorizer will fail with an error message. -** -** When the callback returns [SQLITE_OK], that means the operation -** requested is ok. ^When the callback returns [SQLITE_DENY], the -** [sqlite3_prepare_v2()] or equivalent call that triggered the -** authorizer will fail with an error message explaining that -** access is denied. -** -** ^The first parameter to the authorizer callback is a copy of the third -** parameter to the sqlite3_set_authorizer() interface. ^The second parameter -** to the callback is an integer [SQLITE_COPY | action code] that specifies -** the particular action to be authorized. ^The third through sixth parameters -** to the callback are zero-terminated strings that contain additional -** details about the action to be authorized. -** -** ^If the action code is [SQLITE_READ] -** and the callback returns [SQLITE_IGNORE] then the -** [prepared statement] statement is constructed to substitute -** a NULL value in place of the table column that would have -** been read if [SQLITE_OK] had been returned. The [SQLITE_IGNORE] -** return can be used to deny an untrusted user access to individual -** columns of a table. -** ^If the action code is [SQLITE_DELETE] and the callback returns -** [SQLITE_IGNORE] then the [DELETE] operation proceeds but the -** [truncate optimization] is disabled and all rows are deleted individually. -** -** An authorizer is used when [sqlite3_prepare | preparing] -** SQL statements from an untrusted source, to ensure that the SQL statements -** do not try to access data they are not allowed to see, or that they do not -** try to execute malicious statements that damage the database. For -** example, an application may allow a user to enter arbitrary -** SQL queries for evaluation by a database. But the application does -** not want the user to be able to make arbitrary changes to the -** database. An authorizer could then be put in place while the -** user-entered SQL is being [sqlite3_prepare | prepared] that -** disallows everything except [SELECT] statements. -** -** Applications that need to process SQL from untrusted sources -** might also consider lowering resource limits using [sqlite3_limit()] -** and limiting database size using the [max_page_count] [PRAGMA] -** in addition to using an authorizer. -** -** ^(Only a single authorizer can be in place on a database connection -** at a time. Each call to sqlite3_set_authorizer overrides the -** previous call.)^ ^Disable the authorizer by installing a NULL callback. -** The authorizer is disabled by default. -** -** The authorizer callback must not do anything that will modify -** the database connection that invoked the authorizer callback. -** Note that [sqlite3_prepare_v2()] and [sqlite3_step()] both modify their -** database connections for the meaning of "modify" in this paragraph. -** -** ^When [sqlite3_prepare_v2()] is used to prepare a statement, the -** statement might be re-prepared during [sqlite3_step()] due to a -** schema change. Hence, the application should ensure that the -** correct authorizer callback remains in place during the [sqlite3_step()]. -** -** ^Note that the authorizer callback is invoked only during -** [sqlite3_prepare()] or its variants. Authorization is not -** performed during statement evaluation in [sqlite3_step()], unless -** as stated in the previous paragraph, sqlite3_step() invokes -** sqlite3_prepare_v2() to reprepare a statement after a schema change. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_set_authorizer( - sqlite3*, - int (*xAuth)(void*,int,const char*,const char*,const char*,const char*), - void *pUserData -); - -/* -** CAPI3REF: Authorizer Return Codes -** -** The [sqlite3_set_authorizer | authorizer callback function] must -** return either [SQLITE_OK] or one of these two constants in order -** to signal SQLite whether or not the action is permitted. See the -** [sqlite3_set_authorizer | authorizer documentation] for additional -** information. -** -** Note that SQLITE_IGNORE is also used as a [conflict resolution mode] -** returned from the [sqlite3_vtab_on_conflict()] interface. -*/ -#define SQLITE_DENY 1 /* Abort the SQL statement with an error */ -#define SQLITE_IGNORE 2 /* Don't allow access, but don't generate an error */ - -/* -** CAPI3REF: Authorizer Action Codes -** -** The [sqlite3_set_authorizer()] interface registers a callback function -** that is invoked to authorize certain SQL statement actions. The -** second parameter to the callback is an integer code that specifies -** what action is being authorized. These are the integer action codes that -** the authorizer callback may be passed. -** -** These action code values signify what kind of operation is to be -** authorized. The 3rd and 4th parameters to the authorization -** callback function will be parameters or NULL depending on which of these -** codes is used as the second parameter. ^(The 5th parameter to the -** authorizer callback is the name of the database ("main", "temp", -** etc.) if applicable.)^ ^The 6th parameter to the authorizer callback -** is the name of the inner-most trigger or view that is responsible for -** the access attempt or NULL if this access attempt is directly from -** top-level SQL code. -*/ -/******************************************* 3rd ************ 4th ***********/ -#define SQLITE_CREATE_INDEX 1 /* Index Name Table Name */ -#define SQLITE_CREATE_TABLE 2 /* Table Name NULL */ -#define SQLITE_CREATE_TEMP_INDEX 3 /* Index Name Table Name */ -#define SQLITE_CREATE_TEMP_TABLE 4 /* Table Name NULL */ -#define SQLITE_CREATE_TEMP_TRIGGER 5 /* Trigger Name Table Name */ -#define SQLITE_CREATE_TEMP_VIEW 6 /* View Name NULL */ -#define SQLITE_CREATE_TRIGGER 7 /* Trigger Name Table Name */ -#define SQLITE_CREATE_VIEW 8 /* View Name NULL */ -#define SQLITE_DELETE 9 /* Table Name NULL */ -#define SQLITE_DROP_INDEX 10 /* Index Name Table Name */ -#define SQLITE_DROP_TABLE 11 /* Table Name NULL */ -#define SQLITE_DROP_TEMP_INDEX 12 /* Index Name Table Name */ -#define SQLITE_DROP_TEMP_TABLE 13 /* Table Name NULL */ -#define SQLITE_DROP_TEMP_TRIGGER 14 /* Trigger Name Table Name */ -#define SQLITE_DROP_TEMP_VIEW 15 /* View Name NULL */ -#define SQLITE_DROP_TRIGGER 16 /* Trigger Name Table Name */ -#define SQLITE_DROP_VIEW 17 /* View Name NULL */ -#define SQLITE_INSERT 18 /* Table Name NULL */ -#define SQLITE_PRAGMA 19 /* Pragma Name 1st arg or NULL */ -#define SQLITE_READ 20 /* Table Name Column Name */ -#define SQLITE_SELECT 21 /* NULL NULL */ -#define SQLITE_TRANSACTION 22 /* Operation NULL */ -#define SQLITE_UPDATE 23 /* Table Name Column Name */ -#define SQLITE_ATTACH 24 /* Filename NULL */ -#define SQLITE_DETACH 25 /* Database Name NULL */ -#define SQLITE_ALTER_TABLE 26 /* Database Name Table Name */ -#define SQLITE_REINDEX 27 /* Index Name NULL */ -#define SQLITE_ANALYZE 28 /* Table Name NULL */ -#define SQLITE_CREATE_VTABLE 29 /* Table Name Module Name */ -#define SQLITE_DROP_VTABLE 30 /* Table Name Module Name */ -#define SQLITE_FUNCTION 31 /* NULL Function Name */ -#define SQLITE_SAVEPOINT 32 /* Operation Savepoint Name */ -#define SQLITE_COPY 0 /* No longer used */ -#define SQLITE_RECURSIVE 33 /* NULL NULL */ - -/* -** CAPI3REF: Tracing And Profiling Functions -** METHOD: sqlite3 -** -** These routines register callback functions that can be used for -** tracing and profiling the execution of SQL statements. -** -** ^The callback function registered by sqlite3_trace() is invoked at -** various times when an SQL statement is being run by [sqlite3_step()]. -** ^The sqlite3_trace() callback is invoked with a UTF-8 rendering of the -** SQL statement text as the statement first begins executing. -** ^(Additional sqlite3_trace() callbacks might occur -** as each triggered subprogram is entered. The callbacks for triggers -** contain a UTF-8 SQL comment that identifies the trigger.)^ -** -** The [SQLITE_TRACE_SIZE_LIMIT] compile-time option can be used to limit -** the length of [bound parameter] expansion in the output of sqlite3_trace(). -** -** ^The callback function registered by sqlite3_profile() is invoked -** as each SQL statement finishes. ^The profile callback contains -** the original statement text and an estimate of wall-clock time -** of how long that statement took to run. ^The profile callback -** time is in units of nanoseconds, however the current implementation -** is only capable of millisecond resolution so the six least significant -** digits in the time are meaningless. Future versions of SQLite -** might provide greater resolution on the profiler callback. The -** sqlite3_profile() function is considered experimental and is -** subject to change in future versions of SQLite. -*/ -SQLITE_API void *SQLITE_STDCALL sqlite3_trace(sqlite3*, void(*xTrace)(void*,const char*), void*); -SQLITE_API SQLITE_EXPERIMENTAL void *SQLITE_STDCALL sqlite3_profile(sqlite3*, - void(*xProfile)(void*,const char*,sqlite3_uint64), void*); - -/* -** CAPI3REF: Query Progress Callbacks -** METHOD: sqlite3 -** -** ^The sqlite3_progress_handler(D,N,X,P) interface causes the callback -** function X to be invoked periodically during long running calls to -** [sqlite3_exec()], [sqlite3_step()] and [sqlite3_get_table()] for -** database connection D. An example use for this -** interface is to keep a GUI updated during a large query. -** -** ^The parameter P is passed through as the only parameter to the -** callback function X. ^The parameter N is the approximate number of -** [virtual machine instructions] that are evaluated between successive -** invocations of the callback X. ^If N is less than one then the progress -** handler is disabled. -** -** ^Only a single progress handler may be defined at one time per -** [database connection]; setting a new progress handler cancels the -** old one. ^Setting parameter X to NULL disables the progress handler. -** ^The progress handler is also disabled by setting N to a value less -** than 1. -** -** ^If the progress callback returns non-zero, the operation is -** interrupted. This feature can be used to implement a -** "Cancel" button on a GUI progress dialog box. -** -** The progress handler callback must not do anything that will modify -** the database connection that invoked the progress handler. -** Note that [sqlite3_prepare_v2()] and [sqlite3_step()] both modify their -** database connections for the meaning of "modify" in this paragraph. -** -*/ -SQLITE_API void SQLITE_STDCALL sqlite3_progress_handler(sqlite3*, int, int(*)(void*), void*); - -/* -** CAPI3REF: Opening A New Database Connection -** CONSTRUCTOR: sqlite3 -** -** ^These routines open an SQLite database file as specified by the -** filename argument. ^The filename argument is interpreted as UTF-8 for -** sqlite3_open() and sqlite3_open_v2() and as UTF-16 in the native byte -** order for sqlite3_open16(). ^(A [database connection] handle is usually -** returned in *ppDb, even if an error occurs. The only exception is that -** if SQLite is unable to allocate memory to hold the [sqlite3] object, -** a NULL will be written into *ppDb instead of a pointer to the [sqlite3] -** object.)^ ^(If the database is opened (and/or created) successfully, then -** [SQLITE_OK] is returned. Otherwise an [error code] is returned.)^ ^The -** [sqlite3_errmsg()] or [sqlite3_errmsg16()] routines can be used to obtain -** an English language description of the error following a failure of any -** of the sqlite3_open() routines. -** -** ^The default encoding will be UTF-8 for databases created using -** sqlite3_open() or sqlite3_open_v2(). ^The default encoding for databases -** created using sqlite3_open16() will be UTF-16 in the native byte order. -** -** Whether or not an error occurs when it is opened, resources -** associated with the [database connection] handle should be released by -** passing it to [sqlite3_close()] when it is no longer required. -** -** The sqlite3_open_v2() interface works like sqlite3_open() -** except that it accepts two additional parameters for additional control -** over the new database connection. ^(The flags parameter to -** sqlite3_open_v2() can take one of -** the following three values, optionally combined with the -** [SQLITE_OPEN_NOMUTEX], [SQLITE_OPEN_FULLMUTEX], [SQLITE_OPEN_SHAREDCACHE], -** [SQLITE_OPEN_PRIVATECACHE], and/or [SQLITE_OPEN_URI] flags:)^ -** -**
-** ^(
[SQLITE_OPEN_READONLY]
-**
The database is opened in read-only mode. If the database does not -** already exist, an error is returned.
)^ -** -** ^(
[SQLITE_OPEN_READWRITE]
-**
The database is opened for reading and writing if possible, or reading -** only if the file is write protected by the operating system. In either -** case the database must already exist, otherwise an error is returned.
)^ -** -** ^(
[SQLITE_OPEN_READWRITE] | [SQLITE_OPEN_CREATE]
-**
The database is opened for reading and writing, and is created if -** it does not already exist. This is the behavior that is always used for -** sqlite3_open() and sqlite3_open16().
)^ -**
-** -** If the 3rd parameter to sqlite3_open_v2() is not one of the -** combinations shown above optionally combined with other -** [SQLITE_OPEN_READONLY | SQLITE_OPEN_* bits] -** then the behavior is undefined. -** -** ^If the [SQLITE_OPEN_NOMUTEX] flag is set, then the database connection -** opens in the multi-thread [threading mode] as long as the single-thread -** mode has not been set at compile-time or start-time. ^If the -** [SQLITE_OPEN_FULLMUTEX] flag is set then the database connection opens -** in the serialized [threading mode] unless single-thread was -** previously selected at compile-time or start-time. -** ^The [SQLITE_OPEN_SHAREDCACHE] flag causes the database connection to be -** eligible to use [shared cache mode], regardless of whether or not shared -** cache is enabled using [sqlite3_enable_shared_cache()]. ^The -** [SQLITE_OPEN_PRIVATECACHE] flag causes the database connection to not -** participate in [shared cache mode] even if it is enabled. -** -** ^The fourth parameter to sqlite3_open_v2() is the name of the -** [sqlite3_vfs] object that defines the operating system interface that -** the new database connection should use. ^If the fourth parameter is -** a NULL pointer then the default [sqlite3_vfs] object is used. -** -** ^If the filename is ":memory:", then a private, temporary in-memory database -** is created for the connection. ^This in-memory database will vanish when -** the database connection is closed. Future versions of SQLite might -** make use of additional special filenames that begin with the ":" character. -** It is recommended that when a database filename actually does begin with -** a ":" character you should prefix the filename with a pathname such as -** "./" to avoid ambiguity. -** -** ^If the filename is an empty string, then a private, temporary -** on-disk database will be created. ^This private database will be -** automatically deleted as soon as the database connection is closed. -** -** [[URI filenames in sqlite3_open()]]

URI Filenames

-** -** ^If [URI filename] interpretation is enabled, and the filename argument -** begins with "file:", then the filename is interpreted as a URI. ^URI -** filename interpretation is enabled if the [SQLITE_OPEN_URI] flag is -** set in the fourth argument to sqlite3_open_v2(), or if it has -** been enabled globally using the [SQLITE_CONFIG_URI] option with the -** [sqlite3_config()] method or by the [SQLITE_USE_URI] compile-time option. -** As of SQLite version 3.7.7, URI filename interpretation is turned off -** by default, but future releases of SQLite might enable URI filename -** interpretation by default. See "[URI filenames]" for additional -** information. -** -** URI filenames are parsed according to RFC 3986. ^If the URI contains an -** authority, then it must be either an empty string or the string -** "localhost". ^If the authority is not an empty string or "localhost", an -** error is returned to the caller. ^The fragment component of a URI, if -** present, is ignored. -** -** ^SQLite uses the path component of the URI as the name of the disk file -** which contains the database. ^If the path begins with a '/' character, -** then it is interpreted as an absolute path. ^If the path does not begin -** with a '/' (meaning that the authority section is omitted from the URI) -** then the path is interpreted as a relative path. -** ^(On windows, the first component of an absolute path -** is a drive specification (e.g. "C:").)^ -** -** [[core URI query parameters]] -** The query component of a URI may contain parameters that are interpreted -** either by SQLite itself, or by a [VFS | custom VFS implementation]. -** SQLite and its built-in [VFSes] interpret the -** following query parameters: -** -**
    -**
  • vfs: ^The "vfs" parameter may be used to specify the name of -** a VFS object that provides the operating system interface that should -** be used to access the database file on disk. ^If this option is set to -** an empty string the default VFS object is used. ^Specifying an unknown -** VFS is an error. ^If sqlite3_open_v2() is used and the vfs option is -** present, then the VFS specified by the option takes precedence over -** the value passed as the fourth parameter to sqlite3_open_v2(). -** -**
  • mode: ^(The mode parameter may be set to either "ro", "rw", -** "rwc", or "memory". Attempting to set it to any other value is -** an error)^. -** ^If "ro" is specified, then the database is opened for read-only -** access, just as if the [SQLITE_OPEN_READONLY] flag had been set in the -** third argument to sqlite3_open_v2(). ^If the mode option is set to -** "rw", then the database is opened for read-write (but not create) -** access, as if SQLITE_OPEN_READWRITE (but not SQLITE_OPEN_CREATE) had -** been set. ^Value "rwc" is equivalent to setting both -** SQLITE_OPEN_READWRITE and SQLITE_OPEN_CREATE. ^If the mode option is -** set to "memory" then a pure [in-memory database] that never reads -** or writes from disk is used. ^It is an error to specify a value for -** the mode parameter that is less restrictive than that specified by -** the flags passed in the third parameter to sqlite3_open_v2(). -** -**
  • cache: ^The cache parameter may be set to either "shared" or -** "private". ^Setting it to "shared" is equivalent to setting the -** SQLITE_OPEN_SHAREDCACHE bit in the flags argument passed to -** sqlite3_open_v2(). ^Setting the cache parameter to "private" is -** equivalent to setting the SQLITE_OPEN_PRIVATECACHE bit. -** ^If sqlite3_open_v2() is used and the "cache" parameter is present in -** a URI filename, its value overrides any behavior requested by setting -** SQLITE_OPEN_PRIVATECACHE or SQLITE_OPEN_SHAREDCACHE flag. -** -**
  • psow: ^The psow parameter indicates whether or not the -** [powersafe overwrite] property does or does not apply to the -** storage media on which the database file resides. -** -**
  • nolock: ^The nolock parameter is a boolean query parameter -** which if set disables file locking in rollback journal modes. This -** is useful for accessing a database on a filesystem that does not -** support locking. Caution: Database corruption might result if two -** or more processes write to the same database and any one of those -** processes uses nolock=1. -** -**
  • immutable: ^The immutable parameter is a boolean query -** parameter that indicates that the database file is stored on -** read-only media. ^When immutable is set, SQLite assumes that the -** database file cannot be changed, even by a process with higher -** privilege, and so the database is opened read-only and all locking -** and change detection is disabled. Caution: Setting the immutable -** property on a database file that does in fact change can result -** in incorrect query results and/or [SQLITE_CORRUPT] errors. -** See also: [SQLITE_IOCAP_IMMUTABLE]. -** -**
-** -** ^Specifying an unknown parameter in the query component of a URI is not an -** error. Future versions of SQLite might understand additional query -** parameters. See "[query parameters with special meaning to SQLite]" for -** additional information. -** -** [[URI filename examples]]

URI filename examples

-** -** -**
URI filenames Results -**
file:data.db -** Open the file "data.db" in the current directory. -**
file:/home/fred/data.db
-** file:///home/fred/data.db
-** file://localhost/home/fred/data.db
-** Open the database file "/home/fred/data.db". -**
file://darkstar/home/fred/data.db -** An error. "darkstar" is not a recognized authority. -**
-** file:///C:/Documents%20and%20Settings/fred/Desktop/data.db -** Windows only: Open the file "data.db" on fred's desktop on drive -** C:. Note that the %20 escaping in this example is not strictly -** necessary - space characters can be used literally -** in URI filenames. -**
file:data.db?mode=ro&cache=private -** Open file "data.db" in the current directory for read-only access. -** Regardless of whether or not shared-cache mode is enabled by -** default, use a private cache. -**
file:/home/fred/data.db?vfs=unix-dotfile -** Open file "/home/fred/data.db". Use the special VFS "unix-dotfile" -** that uses dot-files in place of posix advisory locking. -**
file:data.db?mode=readonly -** An error. "readonly" is not a valid option for the "mode" parameter. -**
-** -** ^URI hexadecimal escape sequences (%HH) are supported within the path and -** query components of a URI. A hexadecimal escape sequence consists of a -** percent sign - "%" - followed by exactly two hexadecimal digits -** specifying an octet value. ^Before the path or query components of a -** URI filename are interpreted, they are encoded using UTF-8 and all -** hexadecimal escape sequences replaced by a single byte containing the -** corresponding octet. If this process generates an invalid UTF-8 encoding, -** the results are undefined. -** -** Note to Windows users: The encoding used for the filename argument -** of sqlite3_open() and sqlite3_open_v2() must be UTF-8, not whatever -** codepage is currently defined. Filenames containing international -** characters must be converted to UTF-8 prior to passing them into -** sqlite3_open() or sqlite3_open_v2(). -** -** Note to Windows Runtime users: The temporary directory must be set -** prior to calling sqlite3_open() or sqlite3_open_v2(). Otherwise, various -** features that require the use of temporary files may fail. -** -** See also: [sqlite3_temp_directory] -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_open( - const char *filename, /* Database filename (UTF-8) */ - sqlite3 **ppDb /* OUT: SQLite db handle */ -); -SQLITE_API int SQLITE_STDCALL sqlite3_open16( - const void *filename, /* Database filename (UTF-16) */ - sqlite3 **ppDb /* OUT: SQLite db handle */ -); -SQLITE_API int SQLITE_STDCALL sqlite3_open_v2( - const char *filename, /* Database filename (UTF-8) */ - sqlite3 **ppDb, /* OUT: SQLite db handle */ - int flags, /* Flags */ - const char *zVfs /* Name of VFS module to use */ -); - -/* -** CAPI3REF: Obtain Values For URI Parameters -** -** These are utility routines, useful to VFS implementations, that check -** to see if a database file was a URI that contained a specific query -** parameter, and if so obtains the value of that query parameter. -** -** If F is the database filename pointer passed into the xOpen() method of -** a VFS implementation when the flags parameter to xOpen() has one or -** more of the [SQLITE_OPEN_URI] or [SQLITE_OPEN_MAIN_DB] bits set and -** P is the name of the query parameter, then -** sqlite3_uri_parameter(F,P) returns the value of the P -** parameter if it exists or a NULL pointer if P does not appear as a -** query parameter on F. If P is a query parameter of F -** has no explicit value, then sqlite3_uri_parameter(F,P) returns -** a pointer to an empty string. -** -** The sqlite3_uri_boolean(F,P,B) routine assumes that P is a boolean -** parameter and returns true (1) or false (0) according to the value -** of P. The sqlite3_uri_boolean(F,P,B) routine returns true (1) if the -** value of query parameter P is one of "yes", "true", or "on" in any -** case or if the value begins with a non-zero number. The -** sqlite3_uri_boolean(F,P,B) routines returns false (0) if the value of -** query parameter P is one of "no", "false", or "off" in any case or -** if the value begins with a numeric zero. If P is not a query -** parameter on F or if the value of P is does not match any of the -** above, then sqlite3_uri_boolean(F,P,B) returns (B!=0). -** -** The sqlite3_uri_int64(F,P,D) routine converts the value of P into a -** 64-bit signed integer and returns that integer, or D if P does not -** exist. If the value of P is something other than an integer, then -** zero is returned. -** -** If F is a NULL pointer, then sqlite3_uri_parameter(F,P) returns NULL and -** sqlite3_uri_boolean(F,P,B) returns B. If F is not a NULL pointer and -** is not a database file pathname pointer that SQLite passed into the xOpen -** VFS method, then the behavior of this routine is undefined and probably -** undesirable. -*/ -SQLITE_API const char *SQLITE_STDCALL sqlite3_uri_parameter(const char *zFilename, const char *zParam); -SQLITE_API int SQLITE_STDCALL sqlite3_uri_boolean(const char *zFile, const char *zParam, int bDefault); -SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3_uri_int64(const char*, const char*, sqlite3_int64); - - -/* -** CAPI3REF: Error Codes And Messages -** METHOD: sqlite3 -** -** ^If the most recent sqlite3_* API call associated with -** [database connection] D failed, then the sqlite3_errcode(D) interface -** returns the numeric [result code] or [extended result code] for that -** API call. -** If the most recent API call was successful, -** then the return value from sqlite3_errcode() is undefined. -** ^The sqlite3_extended_errcode() -** interface is the same except that it always returns the -** [extended result code] even when extended result codes are -** disabled. -** -** ^The sqlite3_errmsg() and sqlite3_errmsg16() return English-language -** text that describes the error, as either UTF-8 or UTF-16 respectively. -** ^(Memory to hold the error message string is managed internally. -** The application does not need to worry about freeing the result. -** However, the error string might be overwritten or deallocated by -** subsequent calls to other SQLite interface functions.)^ -** -** ^The sqlite3_errstr() interface returns the English-language text -** that describes the [result code], as UTF-8. -** ^(Memory to hold the error message string is managed internally -** and must not be freed by the application)^. -** -** When the serialized [threading mode] is in use, it might be the -** case that a second error occurs on a separate thread in between -** the time of the first error and the call to these interfaces. -** When that happens, the second error will be reported since these -** interfaces always report the most recent result. To avoid -** this, each thread can obtain exclusive use of the [database connection] D -** by invoking [sqlite3_mutex_enter]([sqlite3_db_mutex](D)) before beginning -** to use D and invoking [sqlite3_mutex_leave]([sqlite3_db_mutex](D)) after -** all calls to the interfaces listed here are completed. -** -** If an interface fails with SQLITE_MISUSE, that means the interface -** was invoked incorrectly by the application. In that case, the -** error code and message may or may not be set. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_errcode(sqlite3 *db); -SQLITE_API int SQLITE_STDCALL sqlite3_extended_errcode(sqlite3 *db); -SQLITE_API const char *SQLITE_STDCALL sqlite3_errmsg(sqlite3*); -SQLITE_API const void *SQLITE_STDCALL sqlite3_errmsg16(sqlite3*); -SQLITE_API const char *SQLITE_STDCALL sqlite3_errstr(int); - -/* -** CAPI3REF: Prepared Statement Object -** KEYWORDS: {prepared statement} {prepared statements} -** -** An instance of this object represents a single SQL statement that -** has been compiled into binary form and is ready to be evaluated. -** -** Think of each SQL statement as a separate computer program. The -** original SQL text is source code. A prepared statement object -** is the compiled object code. All SQL must be converted into a -** prepared statement before it can be run. -** -** The life-cycle of a prepared statement object usually goes like this: -** -**
    -**
  1. Create the prepared statement object using [sqlite3_prepare_v2()]. -**
  2. Bind values to [parameters] using the sqlite3_bind_*() -** interfaces. -**
  3. Run the SQL by calling [sqlite3_step()] one or more times. -**
  4. Reset the prepared statement using [sqlite3_reset()] then go back -** to step 2. Do this zero or more times. -**
  5. Destroy the object using [sqlite3_finalize()]. -**
-*/ -typedef struct sqlite3_stmt sqlite3_stmt; - -/* -** CAPI3REF: Run-time Limits -** METHOD: sqlite3 -** -** ^(This interface allows the size of various constructs to be limited -** on a connection by connection basis. The first parameter is the -** [database connection] whose limit is to be set or queried. The -** second parameter is one of the [limit categories] that define a -** class of constructs to be size limited. The third parameter is the -** new limit for that construct.)^ -** -** ^If the new limit is a negative number, the limit is unchanged. -** ^(For each limit category SQLITE_LIMIT_NAME there is a -** [limits | hard upper bound] -** set at compile-time by a C preprocessor macro called -** [limits | SQLITE_MAX_NAME]. -** (The "_LIMIT_" in the name is changed to "_MAX_".))^ -** ^Attempts to increase a limit above its hard upper bound are -** silently truncated to the hard upper bound. -** -** ^Regardless of whether or not the limit was changed, the -** [sqlite3_limit()] interface returns the prior value of the limit. -** ^Hence, to find the current value of a limit without changing it, -** simply invoke this interface with the third parameter set to -1. -** -** Run-time limits are intended for use in applications that manage -** both their own internal database and also databases that are controlled -** by untrusted external sources. An example application might be a -** web browser that has its own databases for storing history and -** separate databases controlled by JavaScript applications downloaded -** off the Internet. The internal databases can be given the -** large, default limits. Databases managed by external sources can -** be given much smaller limits designed to prevent a denial of service -** attack. Developers might also want to use the [sqlite3_set_authorizer()] -** interface to further control untrusted SQL. The size of the database -** created by an untrusted script can be contained using the -** [max_page_count] [PRAGMA]. -** -** New run-time limit categories may be added in future releases. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_limit(sqlite3*, int id, int newVal); - -/* -** CAPI3REF: Run-Time Limit Categories -** KEYWORDS: {limit category} {*limit categories} -** -** These constants define various performance limits -** that can be lowered at run-time using [sqlite3_limit()]. -** The synopsis of the meanings of the various limits is shown below. -** Additional information is available at [limits | Limits in SQLite]. -** -**
-** [[SQLITE_LIMIT_LENGTH]] ^(
SQLITE_LIMIT_LENGTH
-**
The maximum size of any string or BLOB or table row, in bytes.
)^ -** -** [[SQLITE_LIMIT_SQL_LENGTH]] ^(
SQLITE_LIMIT_SQL_LENGTH
-**
The maximum length of an SQL statement, in bytes.
)^ -** -** [[SQLITE_LIMIT_COLUMN]] ^(
SQLITE_LIMIT_COLUMN
-**
The maximum number of columns in a table definition or in the -** result set of a [SELECT] or the maximum number of columns in an index -** or in an ORDER BY or GROUP BY clause.
)^ -** -** [[SQLITE_LIMIT_EXPR_DEPTH]] ^(
SQLITE_LIMIT_EXPR_DEPTH
-**
The maximum depth of the parse tree on any expression.
)^ -** -** [[SQLITE_LIMIT_COMPOUND_SELECT]] ^(
SQLITE_LIMIT_COMPOUND_SELECT
-**
The maximum number of terms in a compound SELECT statement.
)^ -** -** [[SQLITE_LIMIT_VDBE_OP]] ^(
SQLITE_LIMIT_VDBE_OP
-**
The maximum number of instructions in a virtual machine program -** used to implement an SQL statement. This limit is not currently -** enforced, though that might be added in some future release of -** SQLite.
)^ -** -** [[SQLITE_LIMIT_FUNCTION_ARG]] ^(
SQLITE_LIMIT_FUNCTION_ARG
-**
The maximum number of arguments on a function.
)^ -** -** [[SQLITE_LIMIT_ATTACHED]] ^(
SQLITE_LIMIT_ATTACHED
-**
The maximum number of [ATTACH | attached databases].)^
-** -** [[SQLITE_LIMIT_LIKE_PATTERN_LENGTH]] -** ^(
SQLITE_LIMIT_LIKE_PATTERN_LENGTH
-**
The maximum length of the pattern argument to the [LIKE] or -** [GLOB] operators.
)^ -** -** [[SQLITE_LIMIT_VARIABLE_NUMBER]] -** ^(
SQLITE_LIMIT_VARIABLE_NUMBER
-**
The maximum index number of any [parameter] in an SQL statement.)^ -** -** [[SQLITE_LIMIT_TRIGGER_DEPTH]] ^(
SQLITE_LIMIT_TRIGGER_DEPTH
-**
The maximum depth of recursion for triggers.
)^ -** -** [[SQLITE_LIMIT_WORKER_THREADS]] ^(
SQLITE_LIMIT_WORKER_THREADS
-**
The maximum number of auxiliary worker threads that a single -** [prepared statement] may start.
)^ -**
-*/ -#define SQLITE_LIMIT_LENGTH 0 -#define SQLITE_LIMIT_SQL_LENGTH 1 -#define SQLITE_LIMIT_COLUMN 2 -#define SQLITE_LIMIT_EXPR_DEPTH 3 -#define SQLITE_LIMIT_COMPOUND_SELECT 4 -#define SQLITE_LIMIT_VDBE_OP 5 -#define SQLITE_LIMIT_FUNCTION_ARG 6 -#define SQLITE_LIMIT_ATTACHED 7 -#define SQLITE_LIMIT_LIKE_PATTERN_LENGTH 8 -#define SQLITE_LIMIT_VARIABLE_NUMBER 9 -#define SQLITE_LIMIT_TRIGGER_DEPTH 10 -#define SQLITE_LIMIT_WORKER_THREADS 11 - -/* -** CAPI3REF: Compiling An SQL Statement -** KEYWORDS: {SQL statement compiler} -** METHOD: sqlite3 -** CONSTRUCTOR: sqlite3_stmt -** -** To execute an SQL query, it must first be compiled into a byte-code -** program using one of these routines. -** -** The first argument, "db", is a [database connection] obtained from a -** prior successful call to [sqlite3_open()], [sqlite3_open_v2()] or -** [sqlite3_open16()]. The database connection must not have been closed. -** -** The second argument, "zSql", is the statement to be compiled, encoded -** as either UTF-8 or UTF-16. The sqlite3_prepare() and sqlite3_prepare_v2() -** interfaces use UTF-8, and sqlite3_prepare16() and sqlite3_prepare16_v2() -** use UTF-16. -** -** ^If the nByte argument is negative, then zSql is read up to the -** first zero terminator. ^If nByte is positive, then it is the -** number of bytes read from zSql. ^If nByte is zero, then no prepared -** statement is generated. -** If the caller knows that the supplied string is nul-terminated, then -** there is a small performance advantage to passing an nByte parameter that -** is the number of bytes in the input string including -** the nul-terminator. -** -** ^If pzTail is not NULL then *pzTail is made to point to the first byte -** past the end of the first SQL statement in zSql. These routines only -** compile the first statement in zSql, so *pzTail is left pointing to -** what remains uncompiled. -** -** ^*ppStmt is left pointing to a compiled [prepared statement] that can be -** executed using [sqlite3_step()]. ^If there is an error, *ppStmt is set -** to NULL. ^If the input text contains no SQL (if the input is an empty -** string or a comment) then *ppStmt is set to NULL. -** The calling procedure is responsible for deleting the compiled -** SQL statement using [sqlite3_finalize()] after it has finished with it. -** ppStmt may not be NULL. -** -** ^On success, the sqlite3_prepare() family of routines return [SQLITE_OK]; -** otherwise an [error code] is returned. -** -** The sqlite3_prepare_v2() and sqlite3_prepare16_v2() interfaces are -** recommended for all new programs. The two older interfaces are retained -** for backwards compatibility, but their use is discouraged. -** ^In the "v2" interfaces, the prepared statement -** that is returned (the [sqlite3_stmt] object) contains a copy of the -** original SQL text. This causes the [sqlite3_step()] interface to -** behave differently in three ways: -** -**
    -**
  1. -** ^If the database schema changes, instead of returning [SQLITE_SCHEMA] as it -** always used to do, [sqlite3_step()] will automatically recompile the SQL -** statement and try to run it again. As many as [SQLITE_MAX_SCHEMA_RETRY] -** retries will occur before sqlite3_step() gives up and returns an error. -**
  2. -** -**
  3. -** ^When an error occurs, [sqlite3_step()] will return one of the detailed -** [error codes] or [extended error codes]. ^The legacy behavior was that -** [sqlite3_step()] would only return a generic [SQLITE_ERROR] result code -** and the application would have to make a second call to [sqlite3_reset()] -** in order to find the underlying cause of the problem. With the "v2" prepare -** interfaces, the underlying reason for the error is returned immediately. -**
  4. -** -**
  5. -** ^If the specific value bound to [parameter | host parameter] in the -** WHERE clause might influence the choice of query plan for a statement, -** then the statement will be automatically recompiled, as if there had been -** a schema change, on the first [sqlite3_step()] call following any change -** to the [sqlite3_bind_text | bindings] of that [parameter]. -** ^The specific value of WHERE-clause [parameter] might influence the -** choice of query plan if the parameter is the left-hand side of a [LIKE] -** or [GLOB] operator or if the parameter is compared to an indexed column -** and the [SQLITE_ENABLE_STAT3] compile-time option is enabled. -**
  6. -**
-*/ -SQLITE_API int SQLITE_STDCALL sqlite3_prepare( - sqlite3 *db, /* Database handle */ - const char *zSql, /* SQL statement, UTF-8 encoded */ - int nByte, /* Maximum length of zSql in bytes. */ - sqlite3_stmt **ppStmt, /* OUT: Statement handle */ - const char **pzTail /* OUT: Pointer to unused portion of zSql */ -); -SQLITE_API int SQLITE_STDCALL sqlite3_prepare_v2( - sqlite3 *db, /* Database handle */ - const char *zSql, /* SQL statement, UTF-8 encoded */ - int nByte, /* Maximum length of zSql in bytes. */ - sqlite3_stmt **ppStmt, /* OUT: Statement handle */ - const char **pzTail /* OUT: Pointer to unused portion of zSql */ -); -SQLITE_API int SQLITE_STDCALL sqlite3_prepare16( - sqlite3 *db, /* Database handle */ - const void *zSql, /* SQL statement, UTF-16 encoded */ - int nByte, /* Maximum length of zSql in bytes. */ - sqlite3_stmt **ppStmt, /* OUT: Statement handle */ - const void **pzTail /* OUT: Pointer to unused portion of zSql */ -); -SQLITE_API int SQLITE_STDCALL sqlite3_prepare16_v2( - sqlite3 *db, /* Database handle */ - const void *zSql, /* SQL statement, UTF-16 encoded */ - int nByte, /* Maximum length of zSql in bytes. */ - sqlite3_stmt **ppStmt, /* OUT: Statement handle */ - const void **pzTail /* OUT: Pointer to unused portion of zSql */ -); - -/* -** CAPI3REF: Retrieving Statement SQL -** METHOD: sqlite3_stmt -** -** ^This interface can be used to retrieve a saved copy of the original -** SQL text used to create a [prepared statement] if that statement was -** compiled using either [sqlite3_prepare_v2()] or [sqlite3_prepare16_v2()]. -*/ -SQLITE_API const char *SQLITE_STDCALL sqlite3_sql(sqlite3_stmt *pStmt); - -/* -** CAPI3REF: Determine If An SQL Statement Writes The Database -** METHOD: sqlite3_stmt -** -** ^The sqlite3_stmt_readonly(X) interface returns true (non-zero) if -** and only if the [prepared statement] X makes no direct changes to -** the content of the database file. -** -** Note that [application-defined SQL functions] or -** [virtual tables] might change the database indirectly as a side effect. -** ^(For example, if an application defines a function "eval()" that -** calls [sqlite3_exec()], then the following SQL statement would -** change the database file through side-effects: -** -**
-**    SELECT eval('DELETE FROM t1') FROM t2;
-** 
-** -** But because the [SELECT] statement does not change the database file -** directly, sqlite3_stmt_readonly() would still return true.)^ -** -** ^Transaction control statements such as [BEGIN], [COMMIT], [ROLLBACK], -** [SAVEPOINT], and [RELEASE] cause sqlite3_stmt_readonly() to return true, -** since the statements themselves do not actually modify the database but -** rather they control the timing of when other statements modify the -** database. ^The [ATTACH] and [DETACH] statements also cause -** sqlite3_stmt_readonly() to return true since, while those statements -** change the configuration of a database connection, they do not make -** changes to the content of the database files on disk. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_stmt_readonly(sqlite3_stmt *pStmt); - -/* -** CAPI3REF: Determine If A Prepared Statement Has Been Reset -** METHOD: sqlite3_stmt -** -** ^The sqlite3_stmt_busy(S) interface returns true (non-zero) if the -** [prepared statement] S has been stepped at least once using -** [sqlite3_step(S)] but has neither run to completion (returned -** [SQLITE_DONE] from [sqlite3_step(S)]) nor -** been reset using [sqlite3_reset(S)]. ^The sqlite3_stmt_busy(S) -** interface returns false if S is a NULL pointer. If S is not a -** NULL pointer and is not a pointer to a valid [prepared statement] -** object, then the behavior is undefined and probably undesirable. -** -** This interface can be used in combination [sqlite3_next_stmt()] -** to locate all prepared statements associated with a database -** connection that are in need of being reset. This can be used, -** for example, in diagnostic routines to search for prepared -** statements that are holding a transaction open. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_stmt_busy(sqlite3_stmt*); - -/* -** CAPI3REF: Dynamically Typed Value Object -** KEYWORDS: {protected sqlite3_value} {unprotected sqlite3_value} -** -** SQLite uses the sqlite3_value object to represent all values -** that can be stored in a database table. SQLite uses dynamic typing -** for the values it stores. ^Values stored in sqlite3_value objects -** can be integers, floating point values, strings, BLOBs, or NULL. -** -** An sqlite3_value object may be either "protected" or "unprotected". -** Some interfaces require a protected sqlite3_value. Other interfaces -** will accept either a protected or an unprotected sqlite3_value. -** Every interface that accepts sqlite3_value arguments specifies -** whether or not it requires a protected sqlite3_value. The -** [sqlite3_value_dup()] interface can be used to construct a new -** protected sqlite3_value from an unprotected sqlite3_value. -** -** The terms "protected" and "unprotected" refer to whether or not -** a mutex is held. An internal mutex is held for a protected -** sqlite3_value object but no mutex is held for an unprotected -** sqlite3_value object. If SQLite is compiled to be single-threaded -** (with [SQLITE_THREADSAFE=0] and with [sqlite3_threadsafe()] returning 0) -** or if SQLite is run in one of reduced mutex modes -** [SQLITE_CONFIG_SINGLETHREAD] or [SQLITE_CONFIG_MULTITHREAD] -** then there is no distinction between protected and unprotected -** sqlite3_value objects and they can be used interchangeably. However, -** for maximum code portability it is recommended that applications -** still make the distinction between protected and unprotected -** sqlite3_value objects even when not strictly required. -** -** ^The sqlite3_value objects that are passed as parameters into the -** implementation of [application-defined SQL functions] are protected. -** ^The sqlite3_value object returned by -** [sqlite3_column_value()] is unprotected. -** Unprotected sqlite3_value objects may only be used with -** [sqlite3_result_value()] and [sqlite3_bind_value()]. -** The [sqlite3_value_blob | sqlite3_value_type()] family of -** interfaces require protected sqlite3_value objects. -*/ -typedef struct Mem sqlite3_value; - -/* -** CAPI3REF: SQL Function Context Object -** -** The context in which an SQL function executes is stored in an -** sqlite3_context object. ^A pointer to an sqlite3_context object -** is always first parameter to [application-defined SQL functions]. -** The application-defined SQL function implementation will pass this -** pointer through into calls to [sqlite3_result_int | sqlite3_result()], -** [sqlite3_aggregate_context()], [sqlite3_user_data()], -** [sqlite3_context_db_handle()], [sqlite3_get_auxdata()], -** and/or [sqlite3_set_auxdata()]. -*/ -typedef struct sqlite3_context sqlite3_context; - -/* -** CAPI3REF: Binding Values To Prepared Statements -** KEYWORDS: {host parameter} {host parameters} {host parameter name} -** KEYWORDS: {SQL parameter} {SQL parameters} {parameter binding} -** METHOD: sqlite3_stmt -** -** ^(In the SQL statement text input to [sqlite3_prepare_v2()] and its variants, -** literals may be replaced by a [parameter] that matches one of following -** templates: -** -**
    -**
  • ? -**
  • ?NNN -**
  • :VVV -**
  • @VVV -**
  • $VVV -**
-** -** In the templates above, NNN represents an integer literal, -** and VVV represents an alphanumeric identifier.)^ ^The values of these -** parameters (also called "host parameter names" or "SQL parameters") -** can be set using the sqlite3_bind_*() routines defined here. -** -** ^The first argument to the sqlite3_bind_*() routines is always -** a pointer to the [sqlite3_stmt] object returned from -** [sqlite3_prepare_v2()] or its variants. -** -** ^The second argument is the index of the SQL parameter to be set. -** ^The leftmost SQL parameter has an index of 1. ^When the same named -** SQL parameter is used more than once, second and subsequent -** occurrences have the same index as the first occurrence. -** ^The index for named parameters can be looked up using the -** [sqlite3_bind_parameter_index()] API if desired. ^The index -** for "?NNN" parameters is the value of NNN. -** ^The NNN value must be between 1 and the [sqlite3_limit()] -** parameter [SQLITE_LIMIT_VARIABLE_NUMBER] (default value: 999). -** -** ^The third argument is the value to bind to the parameter. -** ^If the third parameter to sqlite3_bind_text() or sqlite3_bind_text16() -** or sqlite3_bind_blob() is a NULL pointer then the fourth parameter -** is ignored and the end result is the same as sqlite3_bind_null(). -** -** ^(In those routines that have a fourth argument, its value is the -** number of bytes in the parameter. To be clear: the value is the -** number of bytes in the value, not the number of characters.)^ -** ^If the fourth parameter to sqlite3_bind_text() or sqlite3_bind_text16() -** is negative, then the length of the string is -** the number of bytes up to the first zero terminator. -** If the fourth parameter to sqlite3_bind_blob() is negative, then -** the behavior is undefined. -** If a non-negative fourth parameter is provided to sqlite3_bind_text() -** or sqlite3_bind_text16() or sqlite3_bind_text64() then -** that parameter must be the byte offset -** where the NUL terminator would occur assuming the string were NUL -** terminated. If any NUL characters occur at byte offsets less than -** the value of the fourth parameter then the resulting string value will -** contain embedded NULs. The result of expressions involving strings -** with embedded NULs is undefined. -** -** ^The fifth argument to the BLOB and string binding interfaces -** is a destructor used to dispose of the BLOB or -** string after SQLite has finished with it. ^The destructor is called -** to dispose of the BLOB or string even if the call to bind API fails. -** ^If the fifth argument is -** the special value [SQLITE_STATIC], then SQLite assumes that the -** information is in static, unmanaged space and does not need to be freed. -** ^If the fifth argument has the value [SQLITE_TRANSIENT], then -** SQLite makes its own private copy of the data immediately, before -** the sqlite3_bind_*() routine returns. -** -** ^The sixth argument to sqlite3_bind_text64() must be one of -** [SQLITE_UTF8], [SQLITE_UTF16], [SQLITE_UTF16BE], or [SQLITE_UTF16LE] -** to specify the encoding of the text in the third parameter. If -** the sixth argument to sqlite3_bind_text64() is not one of the -** allowed values shown above, or if the text encoding is different -** from the encoding specified by the sixth parameter, then the behavior -** is undefined. -** -** ^The sqlite3_bind_zeroblob() routine binds a BLOB of length N that -** is filled with zeroes. ^A zeroblob uses a fixed amount of memory -** (just an integer to hold its size) while it is being processed. -** Zeroblobs are intended to serve as placeholders for BLOBs whose -** content is later written using -** [sqlite3_blob_open | incremental BLOB I/O] routines. -** ^A negative value for the zeroblob results in a zero-length BLOB. -** -** ^If any of the sqlite3_bind_*() routines are called with a NULL pointer -** for the [prepared statement] or with a prepared statement for which -** [sqlite3_step()] has been called more recently than [sqlite3_reset()], -** then the call will return [SQLITE_MISUSE]. If any sqlite3_bind_() -** routine is passed a [prepared statement] that has been finalized, the -** result is undefined and probably harmful. -** -** ^Bindings are not cleared by the [sqlite3_reset()] routine. -** ^Unbound parameters are interpreted as NULL. -** -** ^The sqlite3_bind_* routines return [SQLITE_OK] on success or an -** [error code] if anything goes wrong. -** ^[SQLITE_TOOBIG] might be returned if the size of a string or BLOB -** exceeds limits imposed by [sqlite3_limit]([SQLITE_LIMIT_LENGTH]) or -** [SQLITE_MAX_LENGTH]. -** ^[SQLITE_RANGE] is returned if the parameter -** index is out of range. ^[SQLITE_NOMEM] is returned if malloc() fails. -** -** See also: [sqlite3_bind_parameter_count()], -** [sqlite3_bind_parameter_name()], and [sqlite3_bind_parameter_index()]. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_bind_blob(sqlite3_stmt*, int, const void*, int n, void(*)(void*)); -SQLITE_API int SQLITE_STDCALL sqlite3_bind_blob64(sqlite3_stmt*, int, const void*, sqlite3_uint64, - void(*)(void*)); -SQLITE_API int SQLITE_STDCALL sqlite3_bind_double(sqlite3_stmt*, int, double); -SQLITE_API int SQLITE_STDCALL sqlite3_bind_int(sqlite3_stmt*, int, int); -SQLITE_API int SQLITE_STDCALL sqlite3_bind_int64(sqlite3_stmt*, int, sqlite3_int64); -SQLITE_API int SQLITE_STDCALL sqlite3_bind_null(sqlite3_stmt*, int); -SQLITE_API int SQLITE_STDCALL sqlite3_bind_text(sqlite3_stmt*,int,const char*,int,void(*)(void*)); -SQLITE_API int SQLITE_STDCALL sqlite3_bind_text16(sqlite3_stmt*, int, const void*, int, void(*)(void*)); -SQLITE_API int SQLITE_STDCALL sqlite3_bind_text64(sqlite3_stmt*, int, const char*, sqlite3_uint64, - void(*)(void*), unsigned char encoding); -SQLITE_API int SQLITE_STDCALL sqlite3_bind_value(sqlite3_stmt*, int, const sqlite3_value*); -SQLITE_API int SQLITE_STDCALL sqlite3_bind_zeroblob(sqlite3_stmt*, int, int n); -SQLITE_API int SQLITE_STDCALL sqlite3_bind_zeroblob64(sqlite3_stmt*, int, sqlite3_uint64); - -/* -** CAPI3REF: Number Of SQL Parameters -** METHOD: sqlite3_stmt -** -** ^This routine can be used to find the number of [SQL parameters] -** in a [prepared statement]. SQL parameters are tokens of the -** form "?", "?NNN", ":AAA", "$AAA", or "@AAA" that serve as -** placeholders for values that are [sqlite3_bind_blob | bound] -** to the parameters at a later time. -** -** ^(This routine actually returns the index of the largest (rightmost) -** parameter. For all forms except ?NNN, this will correspond to the -** number of unique parameters. If parameters of the ?NNN form are used, -** there may be gaps in the list.)^ -** -** See also: [sqlite3_bind_blob|sqlite3_bind()], -** [sqlite3_bind_parameter_name()], and -** [sqlite3_bind_parameter_index()]. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_bind_parameter_count(sqlite3_stmt*); - -/* -** CAPI3REF: Name Of A Host Parameter -** METHOD: sqlite3_stmt -** -** ^The sqlite3_bind_parameter_name(P,N) interface returns -** the name of the N-th [SQL parameter] in the [prepared statement] P. -** ^(SQL parameters of the form "?NNN" or ":AAA" or "@AAA" or "$AAA" -** have a name which is the string "?NNN" or ":AAA" or "@AAA" or "$AAA" -** respectively. -** In other words, the initial ":" or "$" or "@" or "?" -** is included as part of the name.)^ -** ^Parameters of the form "?" without a following integer have no name -** and are referred to as "nameless" or "anonymous parameters". -** -** ^The first host parameter has an index of 1, not 0. -** -** ^If the value N is out of range or if the N-th parameter is -** nameless, then NULL is returned. ^The returned string is -** always in UTF-8 encoding even if the named parameter was -** originally specified as UTF-16 in [sqlite3_prepare16()] or -** [sqlite3_prepare16_v2()]. -** -** See also: [sqlite3_bind_blob|sqlite3_bind()], -** [sqlite3_bind_parameter_count()], and -** [sqlite3_bind_parameter_index()]. -*/ -SQLITE_API const char *SQLITE_STDCALL sqlite3_bind_parameter_name(sqlite3_stmt*, int); - -/* -** CAPI3REF: Index Of A Parameter With A Given Name -** METHOD: sqlite3_stmt -** -** ^Return the index of an SQL parameter given its name. ^The -** index value returned is suitable for use as the second -** parameter to [sqlite3_bind_blob|sqlite3_bind()]. ^A zero -** is returned if no matching parameter is found. ^The parameter -** name must be given in UTF-8 even if the original statement -** was prepared from UTF-16 text using [sqlite3_prepare16_v2()]. -** -** See also: [sqlite3_bind_blob|sqlite3_bind()], -** [sqlite3_bind_parameter_count()], and -** [sqlite3_bind_parameter_name()]. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_bind_parameter_index(sqlite3_stmt*, const char *zName); - -/* -** CAPI3REF: Reset All Bindings On A Prepared Statement -** METHOD: sqlite3_stmt -** -** ^Contrary to the intuition of many, [sqlite3_reset()] does not reset -** the [sqlite3_bind_blob | bindings] on a [prepared statement]. -** ^Use this routine to reset all host parameters to NULL. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_clear_bindings(sqlite3_stmt*); - -/* -** CAPI3REF: Number Of Columns In A Result Set -** METHOD: sqlite3_stmt -** -** ^Return the number of columns in the result set returned by the -** [prepared statement]. ^This routine returns 0 if pStmt is an SQL -** statement that does not return data (for example an [UPDATE]). -** -** See also: [sqlite3_data_count()] -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_column_count(sqlite3_stmt *pStmt); - -/* -** CAPI3REF: Column Names In A Result Set -** METHOD: sqlite3_stmt -** -** ^These routines return the name assigned to a particular column -** in the result set of a [SELECT] statement. ^The sqlite3_column_name() -** interface returns a pointer to a zero-terminated UTF-8 string -** and sqlite3_column_name16() returns a pointer to a zero-terminated -** UTF-16 string. ^The first parameter is the [prepared statement] -** that implements the [SELECT] statement. ^The second parameter is the -** column number. ^The leftmost column is number 0. -** -** ^The returned string pointer is valid until either the [prepared statement] -** is destroyed by [sqlite3_finalize()] or until the statement is automatically -** reprepared by the first call to [sqlite3_step()] for a particular run -** or until the next call to -** sqlite3_column_name() or sqlite3_column_name16() on the same column. -** -** ^If sqlite3_malloc() fails during the processing of either routine -** (for example during a conversion from UTF-8 to UTF-16) then a -** NULL pointer is returned. -** -** ^The name of a result column is the value of the "AS" clause for -** that column, if there is an AS clause. If there is no AS clause -** then the name of the column is unspecified and may change from -** one release of SQLite to the next. -*/ -SQLITE_API const char *SQLITE_STDCALL sqlite3_column_name(sqlite3_stmt*, int N); -SQLITE_API const void *SQLITE_STDCALL sqlite3_column_name16(sqlite3_stmt*, int N); - -/* -** CAPI3REF: Source Of Data In A Query Result -** METHOD: sqlite3_stmt -** -** ^These routines provide a means to determine the database, table, and -** table column that is the origin of a particular result column in -** [SELECT] statement. -** ^The name of the database or table or column can be returned as -** either a UTF-8 or UTF-16 string. ^The _database_ routines return -** the database name, the _table_ routines return the table name, and -** the origin_ routines return the column name. -** ^The returned string is valid until the [prepared statement] is destroyed -** using [sqlite3_finalize()] or until the statement is automatically -** reprepared by the first call to [sqlite3_step()] for a particular run -** or until the same information is requested -** again in a different encoding. -** -** ^The names returned are the original un-aliased names of the -** database, table, and column. -** -** ^The first argument to these interfaces is a [prepared statement]. -** ^These functions return information about the Nth result column returned by -** the statement, where N is the second function argument. -** ^The left-most column is column 0 for these routines. -** -** ^If the Nth column returned by the statement is an expression or -** subquery and is not a column value, then all of these functions return -** NULL. ^These routine might also return NULL if a memory allocation error -** occurs. ^Otherwise, they return the name of the attached database, table, -** or column that query result column was extracted from. -** -** ^As with all other SQLite APIs, those whose names end with "16" return -** UTF-16 encoded strings and the other functions return UTF-8. -** -** ^These APIs are only available if the library was compiled with the -** [SQLITE_ENABLE_COLUMN_METADATA] C-preprocessor symbol. -** -** If two or more threads call one or more of these routines against the same -** prepared statement and column at the same time then the results are -** undefined. -** -** If two or more threads call one or more -** [sqlite3_column_database_name | column metadata interfaces] -** for the same [prepared statement] and result column -** at the same time then the results are undefined. -*/ -SQLITE_API const char *SQLITE_STDCALL sqlite3_column_database_name(sqlite3_stmt*,int); -SQLITE_API const void *SQLITE_STDCALL sqlite3_column_database_name16(sqlite3_stmt*,int); -SQLITE_API const char *SQLITE_STDCALL sqlite3_column_table_name(sqlite3_stmt*,int); -SQLITE_API const void *SQLITE_STDCALL sqlite3_column_table_name16(sqlite3_stmt*,int); -SQLITE_API const char *SQLITE_STDCALL sqlite3_column_origin_name(sqlite3_stmt*,int); -SQLITE_API const void *SQLITE_STDCALL sqlite3_column_origin_name16(sqlite3_stmt*,int); - -/* -** CAPI3REF: Declared Datatype Of A Query Result -** METHOD: sqlite3_stmt -** -** ^(The first parameter is a [prepared statement]. -** If this statement is a [SELECT] statement and the Nth column of the -** returned result set of that [SELECT] is a table column (not an -** expression or subquery) then the declared type of the table -** column is returned.)^ ^If the Nth column of the result set is an -** expression or subquery, then a NULL pointer is returned. -** ^The returned string is always UTF-8 encoded. -** -** ^(For example, given the database schema: -** -** CREATE TABLE t1(c1 VARIANT); -** -** and the following statement to be compiled: -** -** SELECT c1 + 1, c1 FROM t1; -** -** this routine would return the string "VARIANT" for the second result -** column (i==1), and a NULL pointer for the first result column (i==0).)^ -** -** ^SQLite uses dynamic run-time typing. ^So just because a column -** is declared to contain a particular type does not mean that the -** data stored in that column is of the declared type. SQLite is -** strongly typed, but the typing is dynamic not static. ^Type -** is associated with individual values, not with the containers -** used to hold those values. -*/ -SQLITE_API const char *SQLITE_STDCALL sqlite3_column_decltype(sqlite3_stmt*,int); -SQLITE_API const void *SQLITE_STDCALL sqlite3_column_decltype16(sqlite3_stmt*,int); - -/* -** CAPI3REF: Evaluate An SQL Statement -** METHOD: sqlite3_stmt -** -** After a [prepared statement] has been prepared using either -** [sqlite3_prepare_v2()] or [sqlite3_prepare16_v2()] or one of the legacy -** interfaces [sqlite3_prepare()] or [sqlite3_prepare16()], this function -** must be called one or more times to evaluate the statement. -** -** The details of the behavior of the sqlite3_step() interface depend -** on whether the statement was prepared using the newer "v2" interface -** [sqlite3_prepare_v2()] and [sqlite3_prepare16_v2()] or the older legacy -** interface [sqlite3_prepare()] and [sqlite3_prepare16()]. The use of the -** new "v2" interface is recommended for new applications but the legacy -** interface will continue to be supported. -** -** ^In the legacy interface, the return value will be either [SQLITE_BUSY], -** [SQLITE_DONE], [SQLITE_ROW], [SQLITE_ERROR], or [SQLITE_MISUSE]. -** ^With the "v2" interface, any of the other [result codes] or -** [extended result codes] might be returned as well. -** -** ^[SQLITE_BUSY] means that the database engine was unable to acquire the -** database locks it needs to do its job. ^If the statement is a [COMMIT] -** or occurs outside of an explicit transaction, then you can retry the -** statement. If the statement is not a [COMMIT] and occurs within an -** explicit transaction then you should rollback the transaction before -** continuing. -** -** ^[SQLITE_DONE] means that the statement has finished executing -** successfully. sqlite3_step() should not be called again on this virtual -** machine without first calling [sqlite3_reset()] to reset the virtual -** machine back to its initial state. -** -** ^If the SQL statement being executed returns any data, then [SQLITE_ROW] -** is returned each time a new row of data is ready for processing by the -** caller. The values may be accessed using the [column access functions]. -** sqlite3_step() is called again to retrieve the next row of data. -** -** ^[SQLITE_ERROR] means that a run-time error (such as a constraint -** violation) has occurred. sqlite3_step() should not be called again on -** the VM. More information may be found by calling [sqlite3_errmsg()]. -** ^With the legacy interface, a more specific error code (for example, -** [SQLITE_INTERRUPT], [SQLITE_SCHEMA], [SQLITE_CORRUPT], and so forth) -** can be obtained by calling [sqlite3_reset()] on the -** [prepared statement]. ^In the "v2" interface, -** the more specific error code is returned directly by sqlite3_step(). -** -** [SQLITE_MISUSE] means that the this routine was called inappropriately. -** Perhaps it was called on a [prepared statement] that has -** already been [sqlite3_finalize | finalized] or on one that had -** previously returned [SQLITE_ERROR] or [SQLITE_DONE]. Or it could -** be the case that the same database connection is being used by two or -** more threads at the same moment in time. -** -** For all versions of SQLite up to and including 3.6.23.1, a call to -** [sqlite3_reset()] was required after sqlite3_step() returned anything -** other than [SQLITE_ROW] before any subsequent invocation of -** sqlite3_step(). Failure to reset the prepared statement using -** [sqlite3_reset()] would result in an [SQLITE_MISUSE] return from -** sqlite3_step(). But after version 3.6.23.1, sqlite3_step() began -** calling [sqlite3_reset()] automatically in this circumstance rather -** than returning [SQLITE_MISUSE]. This is not considered a compatibility -** break because any application that ever receives an SQLITE_MISUSE error -** is broken by definition. The [SQLITE_OMIT_AUTORESET] compile-time option -** can be used to restore the legacy behavior. -** -** Goofy Interface Alert: In the legacy interface, the sqlite3_step() -** API always returns a generic error code, [SQLITE_ERROR], following any -** error other than [SQLITE_BUSY] and [SQLITE_MISUSE]. You must call -** [sqlite3_reset()] or [sqlite3_finalize()] in order to find one of the -** specific [error codes] that better describes the error. -** We admit that this is a goofy design. The problem has been fixed -** with the "v2" interface. If you prepare all of your SQL statements -** using either [sqlite3_prepare_v2()] or [sqlite3_prepare16_v2()] instead -** of the legacy [sqlite3_prepare()] and [sqlite3_prepare16()] interfaces, -** then the more specific [error codes] are returned directly -** by sqlite3_step(). The use of the "v2" interface is recommended. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_step(sqlite3_stmt*); - -/* -** CAPI3REF: Number of columns in a result set -** METHOD: sqlite3_stmt -** -** ^The sqlite3_data_count(P) interface returns the number of columns in the -** current row of the result set of [prepared statement] P. -** ^If prepared statement P does not have results ready to return -** (via calls to the [sqlite3_column_int | sqlite3_column_*()] of -** interfaces) then sqlite3_data_count(P) returns 0. -** ^The sqlite3_data_count(P) routine also returns 0 if P is a NULL pointer. -** ^The sqlite3_data_count(P) routine returns 0 if the previous call to -** [sqlite3_step](P) returned [SQLITE_DONE]. ^The sqlite3_data_count(P) -** will return non-zero if previous call to [sqlite3_step](P) returned -** [SQLITE_ROW], except in the case of the [PRAGMA incremental_vacuum] -** where it always returns zero since each step of that multi-step -** pragma returns 0 columns of data. -** -** See also: [sqlite3_column_count()] -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_data_count(sqlite3_stmt *pStmt); - -/* -** CAPI3REF: Fundamental Datatypes -** KEYWORDS: SQLITE_TEXT -** -** ^(Every value in SQLite has one of five fundamental datatypes: -** -**
    -**
  • 64-bit signed integer -**
  • 64-bit IEEE floating point number -**
  • string -**
  • BLOB -**
  • NULL -**
)^ -** -** These constants are codes for each of those types. -** -** Note that the SQLITE_TEXT constant was also used in SQLite version 2 -** for a completely different meaning. Software that links against both -** SQLite version 2 and SQLite version 3 should use SQLITE3_TEXT, not -** SQLITE_TEXT. -*/ -#define SQLITE_INTEGER 1 -#define SQLITE_FLOAT 2 -#define SQLITE_BLOB 4 -#define SQLITE_NULL 5 -#ifdef SQLITE_TEXT -# undef SQLITE_TEXT -#else -# define SQLITE_TEXT 3 -#endif -#define SQLITE3_TEXT 3 - -/* -** CAPI3REF: Result Values From A Query -** KEYWORDS: {column access functions} -** METHOD: sqlite3_stmt -** -** ^These routines return information about a single column of the current -** result row of a query. ^In every case the first argument is a pointer -** to the [prepared statement] that is being evaluated (the [sqlite3_stmt*] -** that was returned from [sqlite3_prepare_v2()] or one of its variants) -** and the second argument is the index of the column for which information -** should be returned. ^The leftmost column of the result set has the index 0. -** ^The number of columns in the result can be determined using -** [sqlite3_column_count()]. -** -** If the SQL statement does not currently point to a valid row, or if the -** column index is out of range, the result is undefined. -** These routines may only be called when the most recent call to -** [sqlite3_step()] has returned [SQLITE_ROW] and neither -** [sqlite3_reset()] nor [sqlite3_finalize()] have been called subsequently. -** If any of these routines are called after [sqlite3_reset()] or -** [sqlite3_finalize()] or after [sqlite3_step()] has returned -** something other than [SQLITE_ROW], the results are undefined. -** If [sqlite3_step()] or [sqlite3_reset()] or [sqlite3_finalize()] -** are called from a different thread while any of these routines -** are pending, then the results are undefined. -** -** ^The sqlite3_column_type() routine returns the -** [SQLITE_INTEGER | datatype code] for the initial data type -** of the result column. ^The returned value is one of [SQLITE_INTEGER], -** [SQLITE_FLOAT], [SQLITE_TEXT], [SQLITE_BLOB], or [SQLITE_NULL]. The value -** returned by sqlite3_column_type() is only meaningful if no type -** conversions have occurred as described below. After a type conversion, -** the value returned by sqlite3_column_type() is undefined. Future -** versions of SQLite may change the behavior of sqlite3_column_type() -** following a type conversion. -** -** ^If the result is a BLOB or UTF-8 string then the sqlite3_column_bytes() -** routine returns the number of bytes in that BLOB or string. -** ^If the result is a UTF-16 string, then sqlite3_column_bytes() converts -** the string to UTF-8 and then returns the number of bytes. -** ^If the result is a numeric value then sqlite3_column_bytes() uses -** [sqlite3_snprintf()] to convert that value to a UTF-8 string and returns -** the number of bytes in that string. -** ^If the result is NULL, then sqlite3_column_bytes() returns zero. -** -** ^If the result is a BLOB or UTF-16 string then the sqlite3_column_bytes16() -** routine returns the number of bytes in that BLOB or string. -** ^If the result is a UTF-8 string, then sqlite3_column_bytes16() converts -** the string to UTF-16 and then returns the number of bytes. -** ^If the result is a numeric value then sqlite3_column_bytes16() uses -** [sqlite3_snprintf()] to convert that value to a UTF-16 string and returns -** the number of bytes in that string. -** ^If the result is NULL, then sqlite3_column_bytes16() returns zero. -** -** ^The values returned by [sqlite3_column_bytes()] and -** [sqlite3_column_bytes16()] do not include the zero terminators at the end -** of the string. ^For clarity: the values returned by -** [sqlite3_column_bytes()] and [sqlite3_column_bytes16()] are the number of -** bytes in the string, not the number of characters. -** -** ^Strings returned by sqlite3_column_text() and sqlite3_column_text16(), -** even empty strings, are always zero-terminated. ^The return -** value from sqlite3_column_blob() for a zero-length BLOB is a NULL pointer. -** -** Warning: ^The object returned by [sqlite3_column_value()] is an -** [unprotected sqlite3_value] object. In a multithreaded environment, -** an unprotected sqlite3_value object may only be used safely with -** [sqlite3_bind_value()] and [sqlite3_result_value()]. -** If the [unprotected sqlite3_value] object returned by -** [sqlite3_column_value()] is used in any other way, including calls -** to routines like [sqlite3_value_int()], [sqlite3_value_text()], -** or [sqlite3_value_bytes()], the behavior is not threadsafe. -** -** These routines attempt to convert the value where appropriate. ^For -** example, if the internal representation is FLOAT and a text result -** is requested, [sqlite3_snprintf()] is used internally to perform the -** conversion automatically. ^(The following table details the conversions -** that are applied: -** -**
-** -**
Internal
Type
Requested
Type
Conversion -** -**
NULL INTEGER Result is 0 -**
NULL FLOAT Result is 0.0 -**
NULL TEXT Result is a NULL pointer -**
NULL BLOB Result is a NULL pointer -**
INTEGER FLOAT Convert from integer to float -**
INTEGER TEXT ASCII rendering of the integer -**
INTEGER BLOB Same as INTEGER->TEXT -**
FLOAT INTEGER [CAST] to INTEGER -**
FLOAT TEXT ASCII rendering of the float -**
FLOAT BLOB [CAST] to BLOB -**
TEXT INTEGER [CAST] to INTEGER -**
TEXT FLOAT [CAST] to REAL -**
TEXT BLOB No change -**
BLOB INTEGER [CAST] to INTEGER -**
BLOB FLOAT [CAST] to REAL -**
BLOB TEXT Add a zero terminator if needed -**
-**
)^ -** -** Note that when type conversions occur, pointers returned by prior -** calls to sqlite3_column_blob(), sqlite3_column_text(), and/or -** sqlite3_column_text16() may be invalidated. -** Type conversions and pointer invalidations might occur -** in the following cases: -** -**
    -**
  • The initial content is a BLOB and sqlite3_column_text() or -** sqlite3_column_text16() is called. A zero-terminator might -** need to be added to the string.
  • -**
  • The initial content is UTF-8 text and sqlite3_column_bytes16() or -** sqlite3_column_text16() is called. The content must be converted -** to UTF-16.
  • -**
  • The initial content is UTF-16 text and sqlite3_column_bytes() or -** sqlite3_column_text() is called. The content must be converted -** to UTF-8.
  • -**
-** -** ^Conversions between UTF-16be and UTF-16le are always done in place and do -** not invalidate a prior pointer, though of course the content of the buffer -** that the prior pointer references will have been modified. Other kinds -** of conversion are done in place when it is possible, but sometimes they -** are not possible and in those cases prior pointers are invalidated. -** -** The safest policy is to invoke these routines -** in one of the following ways: -** -**
    -**
  • sqlite3_column_text() followed by sqlite3_column_bytes()
  • -**
  • sqlite3_column_blob() followed by sqlite3_column_bytes()
  • -**
  • sqlite3_column_text16() followed by sqlite3_column_bytes16()
  • -**
-** -** In other words, you should call sqlite3_column_text(), -** sqlite3_column_blob(), or sqlite3_column_text16() first to force the result -** into the desired format, then invoke sqlite3_column_bytes() or -** sqlite3_column_bytes16() to find the size of the result. Do not mix calls -** to sqlite3_column_text() or sqlite3_column_blob() with calls to -** sqlite3_column_bytes16(), and do not mix calls to sqlite3_column_text16() -** with calls to sqlite3_column_bytes(). -** -** ^The pointers returned are valid until a type conversion occurs as -** described above, or until [sqlite3_step()] or [sqlite3_reset()] or -** [sqlite3_finalize()] is called. ^The memory space used to hold strings -** and BLOBs is freed automatically. Do not pass the pointers returned -** from [sqlite3_column_blob()], [sqlite3_column_text()], etc. into -** [sqlite3_free()]. -** -** ^(If a memory allocation error occurs during the evaluation of any -** of these routines, a default value is returned. The default value -** is either the integer 0, the floating point number 0.0, or a NULL -** pointer. Subsequent calls to [sqlite3_errcode()] will return -** [SQLITE_NOMEM].)^ -*/ -SQLITE_API const void *SQLITE_STDCALL sqlite3_column_blob(sqlite3_stmt*, int iCol); -SQLITE_API int SQLITE_STDCALL sqlite3_column_bytes(sqlite3_stmt*, int iCol); -SQLITE_API int SQLITE_STDCALL sqlite3_column_bytes16(sqlite3_stmt*, int iCol); -SQLITE_API double SQLITE_STDCALL sqlite3_column_double(sqlite3_stmt*, int iCol); -SQLITE_API int SQLITE_STDCALL sqlite3_column_int(sqlite3_stmt*, int iCol); -SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3_column_int64(sqlite3_stmt*, int iCol); -SQLITE_API const unsigned char *SQLITE_STDCALL sqlite3_column_text(sqlite3_stmt*, int iCol); -SQLITE_API const void *SQLITE_STDCALL sqlite3_column_text16(sqlite3_stmt*, int iCol); -SQLITE_API int SQLITE_STDCALL sqlite3_column_type(sqlite3_stmt*, int iCol); -SQLITE_API sqlite3_value *SQLITE_STDCALL sqlite3_column_value(sqlite3_stmt*, int iCol); - -/* -** CAPI3REF: Destroy A Prepared Statement Object -** DESTRUCTOR: sqlite3_stmt -** -** ^The sqlite3_finalize() function is called to delete a [prepared statement]. -** ^If the most recent evaluation of the statement encountered no errors -** or if the statement is never been evaluated, then sqlite3_finalize() returns -** SQLITE_OK. ^If the most recent evaluation of statement S failed, then -** sqlite3_finalize(S) returns the appropriate [error code] or -** [extended error code]. -** -** ^The sqlite3_finalize(S) routine can be called at any point during -** the life cycle of [prepared statement] S: -** before statement S is ever evaluated, after -** one or more calls to [sqlite3_reset()], or after any call -** to [sqlite3_step()] regardless of whether or not the statement has -** completed execution. -** -** ^Invoking sqlite3_finalize() on a NULL pointer is a harmless no-op. -** -** The application must finalize every [prepared statement] in order to avoid -** resource leaks. It is a grievous error for the application to try to use -** a prepared statement after it has been finalized. Any use of a prepared -** statement after it has been finalized can result in undefined and -** undesirable behavior such as segfaults and heap corruption. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_finalize(sqlite3_stmt *pStmt); - -/* -** CAPI3REF: Reset A Prepared Statement Object -** METHOD: sqlite3_stmt -** -** The sqlite3_reset() function is called to reset a [prepared statement] -** object back to its initial state, ready to be re-executed. -** ^Any SQL statement variables that had values bound to them using -** the [sqlite3_bind_blob | sqlite3_bind_*() API] retain their values. -** Use [sqlite3_clear_bindings()] to reset the bindings. -** -** ^The [sqlite3_reset(S)] interface resets the [prepared statement] S -** back to the beginning of its program. -** -** ^If the most recent call to [sqlite3_step(S)] for the -** [prepared statement] S returned [SQLITE_ROW] or [SQLITE_DONE], -** or if [sqlite3_step(S)] has never before been called on S, -** then [sqlite3_reset(S)] returns [SQLITE_OK]. -** -** ^If the most recent call to [sqlite3_step(S)] for the -** [prepared statement] S indicated an error, then -** [sqlite3_reset(S)] returns an appropriate [error code]. -** -** ^The [sqlite3_reset(S)] interface does not change the values -** of any [sqlite3_bind_blob|bindings] on the [prepared statement] S. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_reset(sqlite3_stmt *pStmt); - -/* -** CAPI3REF: Create Or Redefine SQL Functions -** KEYWORDS: {function creation routines} -** KEYWORDS: {application-defined SQL function} -** KEYWORDS: {application-defined SQL functions} -** METHOD: sqlite3 -** -** ^These functions (collectively known as "function creation routines") -** are used to add SQL functions or aggregates or to redefine the behavior -** of existing SQL functions or aggregates. The only differences between -** these routines are the text encoding expected for -** the second parameter (the name of the function being created) -** and the presence or absence of a destructor callback for -** the application data pointer. -** -** ^The first parameter is the [database connection] to which the SQL -** function is to be added. ^If an application uses more than one database -** connection then application-defined SQL functions must be added -** to each database connection separately. -** -** ^The second parameter is the name of the SQL function to be created or -** redefined. ^The length of the name is limited to 255 bytes in a UTF-8 -** representation, exclusive of the zero-terminator. ^Note that the name -** length limit is in UTF-8 bytes, not characters nor UTF-16 bytes. -** ^Any attempt to create a function with a longer name -** will result in [SQLITE_MISUSE] being returned. -** -** ^The third parameter (nArg) -** is the number of arguments that the SQL function or -** aggregate takes. ^If this parameter is -1, then the SQL function or -** aggregate may take any number of arguments between 0 and the limit -** set by [sqlite3_limit]([SQLITE_LIMIT_FUNCTION_ARG]). If the third -** parameter is less than -1 or greater than 127 then the behavior is -** undefined. -** -** ^The fourth parameter, eTextRep, specifies what -** [SQLITE_UTF8 | text encoding] this SQL function prefers for -** its parameters. The application should set this parameter to -** [SQLITE_UTF16LE] if the function implementation invokes -** [sqlite3_value_text16le()] on an input, or [SQLITE_UTF16BE] if the -** implementation invokes [sqlite3_value_text16be()] on an input, or -** [SQLITE_UTF16] if [sqlite3_value_text16()] is used, or [SQLITE_UTF8] -** otherwise. ^The same SQL function may be registered multiple times using -** different preferred text encodings, with different implementations for -** each encoding. -** ^When multiple implementations of the same function are available, SQLite -** will pick the one that involves the least amount of data conversion. -** -** ^The fourth parameter may optionally be ORed with [SQLITE_DETERMINISTIC] -** to signal that the function will always return the same result given -** the same inputs within a single SQL statement. Most SQL functions are -** deterministic. The built-in [random()] SQL function is an example of a -** function that is not deterministic. The SQLite query planner is able to -** perform additional optimizations on deterministic functions, so use -** of the [SQLITE_DETERMINISTIC] flag is recommended where possible. -** -** ^(The fifth parameter is an arbitrary pointer. The implementation of the -** function can gain access to this pointer using [sqlite3_user_data()].)^ -** -** ^The sixth, seventh and eighth parameters, xFunc, xStep and xFinal, are -** pointers to C-language functions that implement the SQL function or -** aggregate. ^A scalar SQL function requires an implementation of the xFunc -** callback only; NULL pointers must be passed as the xStep and xFinal -** parameters. ^An aggregate SQL function requires an implementation of xStep -** and xFinal and NULL pointer must be passed for xFunc. ^To delete an existing -** SQL function or aggregate, pass NULL pointers for all three function -** callbacks. -** -** ^(If the ninth parameter to sqlite3_create_function_v2() is not NULL, -** then it is destructor for the application data pointer. -** The destructor is invoked when the function is deleted, either by being -** overloaded or when the database connection closes.)^ -** ^The destructor is also invoked if the call to -** sqlite3_create_function_v2() fails. -** ^When the destructor callback of the tenth parameter is invoked, it -** is passed a single argument which is a copy of the application data -** pointer which was the fifth parameter to sqlite3_create_function_v2(). -** -** ^It is permitted to register multiple implementations of the same -** functions with the same name but with either differing numbers of -** arguments or differing preferred text encodings. ^SQLite will use -** the implementation that most closely matches the way in which the -** SQL function is used. ^A function implementation with a non-negative -** nArg parameter is a better match than a function implementation with -** a negative nArg. ^A function where the preferred text encoding -** matches the database encoding is a better -** match than a function where the encoding is different. -** ^A function where the encoding difference is between UTF16le and UTF16be -** is a closer match than a function where the encoding difference is -** between UTF8 and UTF16. -** -** ^Built-in functions may be overloaded by new application-defined functions. -** -** ^An application-defined function is permitted to call other -** SQLite interfaces. However, such calls must not -** close the database connection nor finalize or reset the prepared -** statement in which the function is running. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_create_function( - sqlite3 *db, - const char *zFunctionName, - int nArg, - int eTextRep, - void *pApp, - void (*xFunc)(sqlite3_context*,int,sqlite3_value**), - void (*xStep)(sqlite3_context*,int,sqlite3_value**), - void (*xFinal)(sqlite3_context*) -); -SQLITE_API int SQLITE_STDCALL sqlite3_create_function16( - sqlite3 *db, - const void *zFunctionName, - int nArg, - int eTextRep, - void *pApp, - void (*xFunc)(sqlite3_context*,int,sqlite3_value**), - void (*xStep)(sqlite3_context*,int,sqlite3_value**), - void (*xFinal)(sqlite3_context*) -); -SQLITE_API int SQLITE_STDCALL sqlite3_create_function_v2( - sqlite3 *db, - const char *zFunctionName, - int nArg, - int eTextRep, - void *pApp, - void (*xFunc)(sqlite3_context*,int,sqlite3_value**), - void (*xStep)(sqlite3_context*,int,sqlite3_value**), - void (*xFinal)(sqlite3_context*), - void(*xDestroy)(void*) -); - -/* -** CAPI3REF: Text Encodings -** -** These constant define integer codes that represent the various -** text encodings supported by SQLite. -*/ -#define SQLITE_UTF8 1 /* IMP: R-37514-35566 */ -#define SQLITE_UTF16LE 2 /* IMP: R-03371-37637 */ -#define SQLITE_UTF16BE 3 /* IMP: R-51971-34154 */ -#define SQLITE_UTF16 4 /* Use native byte order */ -#define SQLITE_ANY 5 /* Deprecated */ -#define SQLITE_UTF16_ALIGNED 8 /* sqlite3_create_collation only */ - -/* -** CAPI3REF: Function Flags -** -** These constants may be ORed together with the -** [SQLITE_UTF8 | preferred text encoding] as the fourth argument -** to [sqlite3_create_function()], [sqlite3_create_function16()], or -** [sqlite3_create_function_v2()]. -*/ -#define SQLITE_DETERMINISTIC 0x800 - -/* -** CAPI3REF: Deprecated Functions -** DEPRECATED -** -** These functions are [deprecated]. In order to maintain -** backwards compatibility with older code, these functions continue -** to be supported. However, new applications should avoid -** the use of these functions. To encourage programmers to avoid -** these functions, we will not explain what they do. -*/ -#ifndef SQLITE_OMIT_DEPRECATED -SQLITE_API SQLITE_DEPRECATED int SQLITE_STDCALL sqlite3_aggregate_count(sqlite3_context*); -SQLITE_API SQLITE_DEPRECATED int SQLITE_STDCALL sqlite3_expired(sqlite3_stmt*); -SQLITE_API SQLITE_DEPRECATED int SQLITE_STDCALL sqlite3_transfer_bindings(sqlite3_stmt*, sqlite3_stmt*); -SQLITE_API SQLITE_DEPRECATED int SQLITE_STDCALL sqlite3_global_recover(void); -SQLITE_API SQLITE_DEPRECATED void SQLITE_STDCALL sqlite3_thread_cleanup(void); -SQLITE_API SQLITE_DEPRECATED int SQLITE_STDCALL sqlite3_memory_alarm(void(*)(void*,sqlite3_int64,int), - void*,sqlite3_int64); -#endif - -/* -** CAPI3REF: Obtaining SQL Values -** METHOD: sqlite3_value -** -** The C-language implementation of SQL functions and aggregates uses -** this set of interface routines to access the parameter values on -** the function or aggregate. -** -** The xFunc (for scalar functions) or xStep (for aggregates) parameters -** to [sqlite3_create_function()] and [sqlite3_create_function16()] -** define callbacks that implement the SQL functions and aggregates. -** The 3rd parameter to these callbacks is an array of pointers to -** [protected sqlite3_value] objects. There is one [sqlite3_value] object for -** each parameter to the SQL function. These routines are used to -** extract values from the [sqlite3_value] objects. -** -** These routines work only with [protected sqlite3_value] objects. -** Any attempt to use these routines on an [unprotected sqlite3_value] -** object results in undefined behavior. -** -** ^These routines work just like the corresponding [column access functions] -** except that these routines take a single [protected sqlite3_value] object -** pointer instead of a [sqlite3_stmt*] pointer and an integer column number. -** -** ^The sqlite3_value_text16() interface extracts a UTF-16 string -** in the native byte-order of the host machine. ^The -** sqlite3_value_text16be() and sqlite3_value_text16le() interfaces -** extract UTF-16 strings as big-endian and little-endian respectively. -** -** ^(The sqlite3_value_numeric_type() interface attempts to apply -** numeric affinity to the value. This means that an attempt is -** made to convert the value to an integer or floating point. If -** such a conversion is possible without loss of information (in other -** words, if the value is a string that looks like a number) -** then the conversion is performed. Otherwise no conversion occurs. -** The [SQLITE_INTEGER | datatype] after conversion is returned.)^ -** -** Please pay particular attention to the fact that the pointer returned -** from [sqlite3_value_blob()], [sqlite3_value_text()], or -** [sqlite3_value_text16()] can be invalidated by a subsequent call to -** [sqlite3_value_bytes()], [sqlite3_value_bytes16()], [sqlite3_value_text()], -** or [sqlite3_value_text16()]. -** -** These routines must be called from the same thread as -** the SQL function that supplied the [sqlite3_value*] parameters. -*/ -SQLITE_API const void *SQLITE_STDCALL sqlite3_value_blob(sqlite3_value*); -SQLITE_API int SQLITE_STDCALL sqlite3_value_bytes(sqlite3_value*); -SQLITE_API int SQLITE_STDCALL sqlite3_value_bytes16(sqlite3_value*); -SQLITE_API double SQLITE_STDCALL sqlite3_value_double(sqlite3_value*); -SQLITE_API int SQLITE_STDCALL sqlite3_value_int(sqlite3_value*); -SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3_value_int64(sqlite3_value*); -SQLITE_API const unsigned char *SQLITE_STDCALL sqlite3_value_text(sqlite3_value*); -SQLITE_API const void *SQLITE_STDCALL sqlite3_value_text16(sqlite3_value*); -SQLITE_API const void *SQLITE_STDCALL sqlite3_value_text16le(sqlite3_value*); -SQLITE_API const void *SQLITE_STDCALL sqlite3_value_text16be(sqlite3_value*); -SQLITE_API int SQLITE_STDCALL sqlite3_value_type(sqlite3_value*); -SQLITE_API int SQLITE_STDCALL sqlite3_value_numeric_type(sqlite3_value*); - -/* -** CAPI3REF: Finding The Subtype Of SQL Values -** METHOD: sqlite3_value -** -** The sqlite3_value_subtype(V) function returns the subtype for -** an [application-defined SQL function] argument V. The subtype -** information can be used to pass a limited amount of context from -** one SQL function to another. Use the [sqlite3_result_subtype()] -** routine to set the subtype for the return value of an SQL function. -** -** SQLite makes no use of subtype itself. It merely passes the subtype -** from the result of one [application-defined SQL function] into the -** input of another. -*/ -SQLITE_API unsigned int SQLITE_STDCALL sqlite3_value_subtype(sqlite3_value*); - -/* -** CAPI3REF: Copy And Free SQL Values -** METHOD: sqlite3_value -** -** ^The sqlite3_value_dup(V) interface makes a copy of the [sqlite3_value] -** object D and returns a pointer to that copy. ^The [sqlite3_value] returned -** is a [protected sqlite3_value] object even if the input is not. -** ^The sqlite3_value_dup(V) interface returns NULL if V is NULL or if a -** memory allocation fails. -** -** ^The sqlite3_value_free(V) interface frees an [sqlite3_value] object -** previously obtained from [sqlite3_value_dup()]. ^If V is a NULL pointer -** then sqlite3_value_free(V) is a harmless no-op. -*/ -SQLITE_API sqlite3_value *SQLITE_STDCALL sqlite3_value_dup(const sqlite3_value*); -SQLITE_API void SQLITE_STDCALL sqlite3_value_free(sqlite3_value*); - -/* -** CAPI3REF: Obtain Aggregate Function Context -** METHOD: sqlite3_context -** -** Implementations of aggregate SQL functions use this -** routine to allocate memory for storing their state. -** -** ^The first time the sqlite3_aggregate_context(C,N) routine is called -** for a particular aggregate function, SQLite -** allocates N of memory, zeroes out that memory, and returns a pointer -** to the new memory. ^On second and subsequent calls to -** sqlite3_aggregate_context() for the same aggregate function instance, -** the same buffer is returned. Sqlite3_aggregate_context() is normally -** called once for each invocation of the xStep callback and then one -** last time when the xFinal callback is invoked. ^(When no rows match -** an aggregate query, the xStep() callback of the aggregate function -** implementation is never called and xFinal() is called exactly once. -** In those cases, sqlite3_aggregate_context() might be called for the -** first time from within xFinal().)^ -** -** ^The sqlite3_aggregate_context(C,N) routine returns a NULL pointer -** when first called if N is less than or equal to zero or if a memory -** allocate error occurs. -** -** ^(The amount of space allocated by sqlite3_aggregate_context(C,N) is -** determined by the N parameter on first successful call. Changing the -** value of N in subsequent call to sqlite3_aggregate_context() within -** the same aggregate function instance will not resize the memory -** allocation.)^ Within the xFinal callback, it is customary to set -** N=0 in calls to sqlite3_aggregate_context(C,N) so that no -** pointless memory allocations occur. -** -** ^SQLite automatically frees the memory allocated by -** sqlite3_aggregate_context() when the aggregate query concludes. -** -** The first parameter must be a copy of the -** [sqlite3_context | SQL function context] that is the first parameter -** to the xStep or xFinal callback routine that implements the aggregate -** function. -** -** This routine must be called from the same thread in which -** the aggregate SQL function is running. -*/ -SQLITE_API void *SQLITE_STDCALL sqlite3_aggregate_context(sqlite3_context*, int nBytes); - -/* -** CAPI3REF: User Data For Functions -** METHOD: sqlite3_context -** -** ^The sqlite3_user_data() interface returns a copy of -** the pointer that was the pUserData parameter (the 5th parameter) -** of the [sqlite3_create_function()] -** and [sqlite3_create_function16()] routines that originally -** registered the application defined function. -** -** This routine must be called from the same thread in which -** the application-defined function is running. -*/ -SQLITE_API void *SQLITE_STDCALL sqlite3_user_data(sqlite3_context*); - -/* -** CAPI3REF: Database Connection For Functions -** METHOD: sqlite3_context -** -** ^The sqlite3_context_db_handle() interface returns a copy of -** the pointer to the [database connection] (the 1st parameter) -** of the [sqlite3_create_function()] -** and [sqlite3_create_function16()] routines that originally -** registered the application defined function. -*/ -SQLITE_API sqlite3 *SQLITE_STDCALL sqlite3_context_db_handle(sqlite3_context*); - -/* -** CAPI3REF: Function Auxiliary Data -** METHOD: sqlite3_context -** -** These functions may be used by (non-aggregate) SQL functions to -** associate metadata with argument values. If the same value is passed to -** multiple invocations of the same SQL function during query execution, under -** some circumstances the associated metadata may be preserved. An example -** of where this might be useful is in a regular-expression matching -** function. The compiled version of the regular expression can be stored as -** metadata associated with the pattern string. -** Then as long as the pattern string remains the same, -** the compiled regular expression can be reused on multiple -** invocations of the same function. -** -** ^The sqlite3_get_auxdata() interface returns a pointer to the metadata -** associated by the sqlite3_set_auxdata() function with the Nth argument -** value to the application-defined function. ^If there is no metadata -** associated with the function argument, this sqlite3_get_auxdata() interface -** returns a NULL pointer. -** -** ^The sqlite3_set_auxdata(C,N,P,X) interface saves P as metadata for the N-th -** argument of the application-defined function. ^Subsequent -** calls to sqlite3_get_auxdata(C,N) return P from the most recent -** sqlite3_set_auxdata(C,N,P,X) call if the metadata is still valid or -** NULL if the metadata has been discarded. -** ^After each call to sqlite3_set_auxdata(C,N,P,X) where X is not NULL, -** SQLite will invoke the destructor function X with parameter P exactly -** once, when the metadata is discarded. -** SQLite is free to discard the metadata at any time, including:
    -**
  • when the corresponding function parameter changes, or -**
  • when [sqlite3_reset()] or [sqlite3_finalize()] is called for the -** SQL statement, or -**
  • when sqlite3_set_auxdata() is invoked again on the same parameter, or -**
  • during the original sqlite3_set_auxdata() call when a memory -** allocation error occurs.
)^ -** -** Note the last bullet in particular. The destructor X in -** sqlite3_set_auxdata(C,N,P,X) might be called immediately, before the -** sqlite3_set_auxdata() interface even returns. Hence sqlite3_set_auxdata() -** should be called near the end of the function implementation and the -** function implementation should not make any use of P after -** sqlite3_set_auxdata() has been called. -** -** ^(In practice, metadata is preserved between function calls for -** function parameters that are compile-time constants, including literal -** values and [parameters] and expressions composed from the same.)^ -** -** These routines must be called from the same thread in which -** the SQL function is running. -*/ -SQLITE_API void *SQLITE_STDCALL sqlite3_get_auxdata(sqlite3_context*, int N); -SQLITE_API void SQLITE_STDCALL sqlite3_set_auxdata(sqlite3_context*, int N, void*, void (*)(void*)); - - -/* -** CAPI3REF: Constants Defining Special Destructor Behavior -** -** These are special values for the destructor that is passed in as the -** final argument to routines like [sqlite3_result_blob()]. ^If the destructor -** argument is SQLITE_STATIC, it means that the content pointer is constant -** and will never change. It does not need to be destroyed. ^The -** SQLITE_TRANSIENT value means that the content will likely change in -** the near future and that SQLite should make its own private copy of -** the content before returning. -** -** The typedef is necessary to work around problems in certain -** C++ compilers. -*/ -typedef void (*sqlite3_destructor_type)(void*); -#define SQLITE_STATIC ((sqlite3_destructor_type)0) -#define SQLITE_TRANSIENT ((sqlite3_destructor_type)-1) - -/* -** CAPI3REF: Setting The Result Of An SQL Function -** METHOD: sqlite3_context -** -** These routines are used by the xFunc or xFinal callbacks that -** implement SQL functions and aggregates. See -** [sqlite3_create_function()] and [sqlite3_create_function16()] -** for additional information. -** -** These functions work very much like the [parameter binding] family of -** functions used to bind values to host parameters in prepared statements. -** Refer to the [SQL parameter] documentation for additional information. -** -** ^The sqlite3_result_blob() interface sets the result from -** an application-defined function to be the BLOB whose content is pointed -** to by the second parameter and which is N bytes long where N is the -** third parameter. -** -** ^The sqlite3_result_zeroblob(C,N) and sqlite3_result_zeroblob64(C,N) -** interfaces set the result of the application-defined function to be -** a BLOB containing all zero bytes and N bytes in size. -** -** ^The sqlite3_result_double() interface sets the result from -** an application-defined function to be a floating point value specified -** by its 2nd argument. -** -** ^The sqlite3_result_error() and sqlite3_result_error16() functions -** cause the implemented SQL function to throw an exception. -** ^SQLite uses the string pointed to by the -** 2nd parameter of sqlite3_result_error() or sqlite3_result_error16() -** as the text of an error message. ^SQLite interprets the error -** message string from sqlite3_result_error() as UTF-8. ^SQLite -** interprets the string from sqlite3_result_error16() as UTF-16 in native -** byte order. ^If the third parameter to sqlite3_result_error() -** or sqlite3_result_error16() is negative then SQLite takes as the error -** message all text up through the first zero character. -** ^If the third parameter to sqlite3_result_error() or -** sqlite3_result_error16() is non-negative then SQLite takes that many -** bytes (not characters) from the 2nd parameter as the error message. -** ^The sqlite3_result_error() and sqlite3_result_error16() -** routines make a private copy of the error message text before -** they return. Hence, the calling function can deallocate or -** modify the text after they return without harm. -** ^The sqlite3_result_error_code() function changes the error code -** returned by SQLite as a result of an error in a function. ^By default, -** the error code is SQLITE_ERROR. ^A subsequent call to sqlite3_result_error() -** or sqlite3_result_error16() resets the error code to SQLITE_ERROR. -** -** ^The sqlite3_result_error_toobig() interface causes SQLite to throw an -** error indicating that a string or BLOB is too long to represent. -** -** ^The sqlite3_result_error_nomem() interface causes SQLite to throw an -** error indicating that a memory allocation failed. -** -** ^The sqlite3_result_int() interface sets the return value -** of the application-defined function to be the 32-bit signed integer -** value given in the 2nd argument. -** ^The sqlite3_result_int64() interface sets the return value -** of the application-defined function to be the 64-bit signed integer -** value given in the 2nd argument. -** -** ^The sqlite3_result_null() interface sets the return value -** of the application-defined function to be NULL. -** -** ^The sqlite3_result_text(), sqlite3_result_text16(), -** sqlite3_result_text16le(), and sqlite3_result_text16be() interfaces -** set the return value of the application-defined function to be -** a text string which is represented as UTF-8, UTF-16 native byte order, -** UTF-16 little endian, or UTF-16 big endian, respectively. -** ^The sqlite3_result_text64() interface sets the return value of an -** application-defined function to be a text string in an encoding -** specified by the fifth (and last) parameter, which must be one -** of [SQLITE_UTF8], [SQLITE_UTF16], [SQLITE_UTF16BE], or [SQLITE_UTF16LE]. -** ^SQLite takes the text result from the application from -** the 2nd parameter of the sqlite3_result_text* interfaces. -** ^If the 3rd parameter to the sqlite3_result_text* interfaces -** is negative, then SQLite takes result text from the 2nd parameter -** through the first zero character. -** ^If the 3rd parameter to the sqlite3_result_text* interfaces -** is non-negative, then as many bytes (not characters) of the text -** pointed to by the 2nd parameter are taken as the application-defined -** function result. If the 3rd parameter is non-negative, then it -** must be the byte offset into the string where the NUL terminator would -** appear if the string where NUL terminated. If any NUL characters occur -** in the string at a byte offset that is less than the value of the 3rd -** parameter, then the resulting string will contain embedded NULs and the -** result of expressions operating on strings with embedded NULs is undefined. -** ^If the 4th parameter to the sqlite3_result_text* interfaces -** or sqlite3_result_blob is a non-NULL pointer, then SQLite calls that -** function as the destructor on the text or BLOB result when it has -** finished using that result. -** ^If the 4th parameter to the sqlite3_result_text* interfaces or to -** sqlite3_result_blob is the special constant SQLITE_STATIC, then SQLite -** assumes that the text or BLOB result is in constant space and does not -** copy the content of the parameter nor call a destructor on the content -** when it has finished using that result. -** ^If the 4th parameter to the sqlite3_result_text* interfaces -** or sqlite3_result_blob is the special constant SQLITE_TRANSIENT -** then SQLite makes a copy of the result into space obtained from -** from [sqlite3_malloc()] before it returns. -** -** ^The sqlite3_result_value() interface sets the result of -** the application-defined function to be a copy of the -** [unprotected sqlite3_value] object specified by the 2nd parameter. ^The -** sqlite3_result_value() interface makes a copy of the [sqlite3_value] -** so that the [sqlite3_value] specified in the parameter may change or -** be deallocated after sqlite3_result_value() returns without harm. -** ^A [protected sqlite3_value] object may always be used where an -** [unprotected sqlite3_value] object is required, so either -** kind of [sqlite3_value] object can be used with this interface. -** -** If these routines are called from within the different thread -** than the one containing the application-defined function that received -** the [sqlite3_context] pointer, the results are undefined. -*/ -SQLITE_API void SQLITE_STDCALL sqlite3_result_blob(sqlite3_context*, const void*, int, void(*)(void*)); -SQLITE_API void SQLITE_STDCALL sqlite3_result_blob64(sqlite3_context*,const void*, - sqlite3_uint64,void(*)(void*)); -SQLITE_API void SQLITE_STDCALL sqlite3_result_double(sqlite3_context*, double); -SQLITE_API void SQLITE_STDCALL sqlite3_result_error(sqlite3_context*, const char*, int); -SQLITE_API void SQLITE_STDCALL sqlite3_result_error16(sqlite3_context*, const void*, int); -SQLITE_API void SQLITE_STDCALL sqlite3_result_error_toobig(sqlite3_context*); -SQLITE_API void SQLITE_STDCALL sqlite3_result_error_nomem(sqlite3_context*); -SQLITE_API void SQLITE_STDCALL sqlite3_result_error_code(sqlite3_context*, int); -SQLITE_API void SQLITE_STDCALL sqlite3_result_int(sqlite3_context*, int); -SQLITE_API void SQLITE_STDCALL sqlite3_result_int64(sqlite3_context*, sqlite3_int64); -SQLITE_API void SQLITE_STDCALL sqlite3_result_null(sqlite3_context*); -SQLITE_API void SQLITE_STDCALL sqlite3_result_text(sqlite3_context*, const char*, int, void(*)(void*)); -SQLITE_API void SQLITE_STDCALL sqlite3_result_text64(sqlite3_context*, const char*,sqlite3_uint64, - void(*)(void*), unsigned char encoding); -SQLITE_API void SQLITE_STDCALL sqlite3_result_text16(sqlite3_context*, const void*, int, void(*)(void*)); -SQLITE_API void SQLITE_STDCALL sqlite3_result_text16le(sqlite3_context*, const void*, int,void(*)(void*)); -SQLITE_API void SQLITE_STDCALL sqlite3_result_text16be(sqlite3_context*, const void*, int,void(*)(void*)); -SQLITE_API void SQLITE_STDCALL sqlite3_result_value(sqlite3_context*, sqlite3_value*); -SQLITE_API void SQLITE_STDCALL sqlite3_result_zeroblob(sqlite3_context*, int n); -SQLITE_API int SQLITE_STDCALL sqlite3_result_zeroblob64(sqlite3_context*, sqlite3_uint64 n); - - -/* -** CAPI3REF: Setting The Subtype Of An SQL Function -** METHOD: sqlite3_context -** -** The sqlite3_result_subtype(C,T) function causes the subtype of -** the result from the [application-defined SQL function] with -** [sqlite3_context] C to be the value T. Only the lower 8 bits -** of the subtype T are preserved in current versions of SQLite; -** higher order bits are discarded. -** The number of subtype bytes preserved by SQLite might increase -** in future releases of SQLite. -*/ -SQLITE_API void SQLITE_STDCALL sqlite3_result_subtype(sqlite3_context*,unsigned int); - -/* -** CAPI3REF: Define New Collating Sequences -** METHOD: sqlite3 -** -** ^These functions add, remove, or modify a [collation] associated -** with the [database connection] specified as the first argument. -** -** ^The name of the collation is a UTF-8 string -** for sqlite3_create_collation() and sqlite3_create_collation_v2() -** and a UTF-16 string in native byte order for sqlite3_create_collation16(). -** ^Collation names that compare equal according to [sqlite3_strnicmp()] are -** considered to be the same name. -** -** ^(The third argument (eTextRep) must be one of the constants: -**
    -**
  • [SQLITE_UTF8], -**
  • [SQLITE_UTF16LE], -**
  • [SQLITE_UTF16BE], -**
  • [SQLITE_UTF16], or -**
  • [SQLITE_UTF16_ALIGNED]. -**
)^ -** ^The eTextRep argument determines the encoding of strings passed -** to the collating function callback, xCallback. -** ^The [SQLITE_UTF16] and [SQLITE_UTF16_ALIGNED] values for eTextRep -** force strings to be UTF16 with native byte order. -** ^The [SQLITE_UTF16_ALIGNED] value for eTextRep forces strings to begin -** on an even byte address. -** -** ^The fourth argument, pArg, is an application data pointer that is passed -** through as the first argument to the collating function callback. -** -** ^The fifth argument, xCallback, is a pointer to the collating function. -** ^Multiple collating functions can be registered using the same name but -** with different eTextRep parameters and SQLite will use whichever -** function requires the least amount of data transformation. -** ^If the xCallback argument is NULL then the collating function is -** deleted. ^When all collating functions having the same name are deleted, -** that collation is no longer usable. -** -** ^The collating function callback is invoked with a copy of the pArg -** application data pointer and with two strings in the encoding specified -** by the eTextRep argument. The collating function must return an -** integer that is negative, zero, or positive -** if the first string is less than, equal to, or greater than the second, -** respectively. A collating function must always return the same answer -** given the same inputs. If two or more collating functions are registered -** to the same collation name (using different eTextRep values) then all -** must give an equivalent answer when invoked with equivalent strings. -** The collating function must obey the following properties for all -** strings A, B, and C: -** -**
    -**
  1. If A==B then B==A. -**
  2. If A==B and B==C then A==C. -**
  3. If A<B THEN B>A. -**
  4. If A<B and B<C then A<C. -**
-** -** If a collating function fails any of the above constraints and that -** collating function is registered and used, then the behavior of SQLite -** is undefined. -** -** ^The sqlite3_create_collation_v2() works like sqlite3_create_collation() -** with the addition that the xDestroy callback is invoked on pArg when -** the collating function is deleted. -** ^Collating functions are deleted when they are overridden by later -** calls to the collation creation functions or when the -** [database connection] is closed using [sqlite3_close()]. -** -** ^The xDestroy callback is not called if the -** sqlite3_create_collation_v2() function fails. Applications that invoke -** sqlite3_create_collation_v2() with a non-NULL xDestroy argument should -** check the return code and dispose of the application data pointer -** themselves rather than expecting SQLite to deal with it for them. -** This is different from every other SQLite interface. The inconsistency -** is unfortunate but cannot be changed without breaking backwards -** compatibility. -** -** See also: [sqlite3_collation_needed()] and [sqlite3_collation_needed16()]. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_create_collation( - sqlite3*, - const char *zName, - int eTextRep, - void *pArg, - int(*xCompare)(void*,int,const void*,int,const void*) -); -SQLITE_API int SQLITE_STDCALL sqlite3_create_collation_v2( - sqlite3*, - const char *zName, - int eTextRep, - void *pArg, - int(*xCompare)(void*,int,const void*,int,const void*), - void(*xDestroy)(void*) -); -SQLITE_API int SQLITE_STDCALL sqlite3_create_collation16( - sqlite3*, - const void *zName, - int eTextRep, - void *pArg, - int(*xCompare)(void*,int,const void*,int,const void*) -); - -/* -** CAPI3REF: Collation Needed Callbacks -** METHOD: sqlite3 -** -** ^To avoid having to register all collation sequences before a database -** can be used, a single callback function may be registered with the -** [database connection] to be invoked whenever an undefined collation -** sequence is required. -** -** ^If the function is registered using the sqlite3_collation_needed() API, -** then it is passed the names of undefined collation sequences as strings -** encoded in UTF-8. ^If sqlite3_collation_needed16() is used, -** the names are passed as UTF-16 in machine native byte order. -** ^A call to either function replaces the existing collation-needed callback. -** -** ^(When the callback is invoked, the first argument passed is a copy -** of the second argument to sqlite3_collation_needed() or -** sqlite3_collation_needed16(). The second argument is the database -** connection. The third argument is one of [SQLITE_UTF8], [SQLITE_UTF16BE], -** or [SQLITE_UTF16LE], indicating the most desirable form of the collation -** sequence function required. The fourth parameter is the name of the -** required collation sequence.)^ -** -** The callback function should register the desired collation using -** [sqlite3_create_collation()], [sqlite3_create_collation16()], or -** [sqlite3_create_collation_v2()]. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_collation_needed( - sqlite3*, - void*, - void(*)(void*,sqlite3*,int eTextRep,const char*) -); -SQLITE_API int SQLITE_STDCALL sqlite3_collation_needed16( - sqlite3*, - void*, - void(*)(void*,sqlite3*,int eTextRep,const void*) -); - -#ifdef SQLITE_HAS_CODEC -/* -** Specify the key for an encrypted database. This routine should be -** called right after sqlite3_open(). -** -** The code to implement this API is not available in the public release -** of SQLite. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_key( - sqlite3 *db, /* Database to be rekeyed */ - const void *pKey, int nKey /* The key */ -); -SQLITE_API int SQLITE_STDCALL sqlite3_key_v2( - sqlite3 *db, /* Database to be rekeyed */ - const char *zDbName, /* Name of the database */ - const void *pKey, int nKey /* The key */ -); - -/* -** Change the key on an open database. If the current database is not -** encrypted, this routine will encrypt it. If pNew==0 or nNew==0, the -** database is decrypted. -** -** The code to implement this API is not available in the public release -** of SQLite. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_rekey( - sqlite3 *db, /* Database to be rekeyed */ - const void *pKey, int nKey /* The new key */ -); -SQLITE_API int SQLITE_STDCALL sqlite3_rekey_v2( - sqlite3 *db, /* Database to be rekeyed */ - const char *zDbName, /* Name of the database */ - const void *pKey, int nKey /* The new key */ -); - -/* -** Specify the activation key for a SEE database. Unless -** activated, none of the SEE routines will work. -*/ -SQLITE_API void SQLITE_STDCALL sqlite3_activate_see( - const char *zPassPhrase /* Activation phrase */ -); -#endif - -#ifdef SQLITE_ENABLE_CEROD -/* -** Specify the activation key for a CEROD database. Unless -** activated, none of the CEROD routines will work. -*/ -SQLITE_API void SQLITE_STDCALL sqlite3_activate_cerod( - const char *zPassPhrase /* Activation phrase */ -); -#endif - -/* -** CAPI3REF: Suspend Execution For A Short Time -** -** The sqlite3_sleep() function causes the current thread to suspend execution -** for at least a number of milliseconds specified in its parameter. -** -** If the operating system does not support sleep requests with -** millisecond time resolution, then the time will be rounded up to -** the nearest second. The number of milliseconds of sleep actually -** requested from the operating system is returned. -** -** ^SQLite implements this interface by calling the xSleep() -** method of the default [sqlite3_vfs] object. If the xSleep() method -** of the default VFS is not implemented correctly, or not implemented at -** all, then the behavior of sqlite3_sleep() may deviate from the description -** in the previous paragraphs. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_sleep(int); - -/* -** CAPI3REF: Name Of The Folder Holding Temporary Files -** -** ^(If this global variable is made to point to a string which is -** the name of a folder (a.k.a. directory), then all temporary files -** created by SQLite when using a built-in [sqlite3_vfs | VFS] -** will be placed in that directory.)^ ^If this variable -** is a NULL pointer, then SQLite performs a search for an appropriate -** temporary file directory. -** -** Applications are strongly discouraged from using this global variable. -** It is required to set a temporary folder on Windows Runtime (WinRT). -** But for all other platforms, it is highly recommended that applications -** neither read nor write this variable. This global variable is a relic -** that exists for backwards compatibility of legacy applications and should -** be avoided in new projects. -** -** It is not safe to read or modify this variable in more than one -** thread at a time. It is not safe to read or modify this variable -** if a [database connection] is being used at the same time in a separate -** thread. -** It is intended that this variable be set once -** as part of process initialization and before any SQLite interface -** routines have been called and that this variable remain unchanged -** thereafter. -** -** ^The [temp_store_directory pragma] may modify this variable and cause -** it to point to memory obtained from [sqlite3_malloc]. ^Furthermore, -** the [temp_store_directory pragma] always assumes that any string -** that this variable points to is held in memory obtained from -** [sqlite3_malloc] and the pragma may attempt to free that memory -** using [sqlite3_free]. -** Hence, if this variable is modified directly, either it should be -** made NULL or made to point to memory obtained from [sqlite3_malloc] -** or else the use of the [temp_store_directory pragma] should be avoided. -** Except when requested by the [temp_store_directory pragma], SQLite -** does not free the memory that sqlite3_temp_directory points to. If -** the application wants that memory to be freed, it must do -** so itself, taking care to only do so after all [database connection] -** objects have been destroyed. -** -** Note to Windows Runtime users: The temporary directory must be set -** prior to calling [sqlite3_open] or [sqlite3_open_v2]. Otherwise, various -** features that require the use of temporary files may fail. Here is an -** example of how to do this using C++ with the Windows Runtime: -** -**
-** LPCWSTR zPath = Windows::Storage::ApplicationData::Current->
-**       TemporaryFolder->Path->Data();
-** char zPathBuf[MAX_PATH + 1];
-** memset(zPathBuf, 0, sizeof(zPathBuf));
-** WideCharToMultiByte(CP_UTF8, 0, zPath, -1, zPathBuf, sizeof(zPathBuf),
-**       NULL, NULL);
-** sqlite3_temp_directory = sqlite3_mprintf("%s", zPathBuf);
-** 
-*/ -SQLITE_API SQLITE_EXTERN char *sqlite3_temp_directory; - -/* -** CAPI3REF: Name Of The Folder Holding Database Files -** -** ^(If this global variable is made to point to a string which is -** the name of a folder (a.k.a. directory), then all database files -** specified with a relative pathname and created or accessed by -** SQLite when using a built-in windows [sqlite3_vfs | VFS] will be assumed -** to be relative to that directory.)^ ^If this variable is a NULL -** pointer, then SQLite assumes that all database files specified -** with a relative pathname are relative to the current directory -** for the process. Only the windows VFS makes use of this global -** variable; it is ignored by the unix VFS. -** -** Changing the value of this variable while a database connection is -** open can result in a corrupt database. -** -** It is not safe to read or modify this variable in more than one -** thread at a time. It is not safe to read or modify this variable -** if a [database connection] is being used at the same time in a separate -** thread. -** It is intended that this variable be set once -** as part of process initialization and before any SQLite interface -** routines have been called and that this variable remain unchanged -** thereafter. -** -** ^The [data_store_directory pragma] may modify this variable and cause -** it to point to memory obtained from [sqlite3_malloc]. ^Furthermore, -** the [data_store_directory pragma] always assumes that any string -** that this variable points to is held in memory obtained from -** [sqlite3_malloc] and the pragma may attempt to free that memory -** using [sqlite3_free]. -** Hence, if this variable is modified directly, either it should be -** made NULL or made to point to memory obtained from [sqlite3_malloc] -** or else the use of the [data_store_directory pragma] should be avoided. -*/ -SQLITE_API SQLITE_EXTERN char *sqlite3_data_directory; - -/* -** CAPI3REF: Test For Auto-Commit Mode -** KEYWORDS: {autocommit mode} -** METHOD: sqlite3 -** -** ^The sqlite3_get_autocommit() interface returns non-zero or -** zero if the given database connection is or is not in autocommit mode, -** respectively. ^Autocommit mode is on by default. -** ^Autocommit mode is disabled by a [BEGIN] statement. -** ^Autocommit mode is re-enabled by a [COMMIT] or [ROLLBACK]. -** -** If certain kinds of errors occur on a statement within a multi-statement -** transaction (errors including [SQLITE_FULL], [SQLITE_IOERR], -** [SQLITE_NOMEM], [SQLITE_BUSY], and [SQLITE_INTERRUPT]) then the -** transaction might be rolled back automatically. The only way to -** find out whether SQLite automatically rolled back the transaction after -** an error is to use this function. -** -** If another thread changes the autocommit status of the database -** connection while this routine is running, then the return value -** is undefined. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_get_autocommit(sqlite3*); - -/* -** CAPI3REF: Find The Database Handle Of A Prepared Statement -** METHOD: sqlite3_stmt -** -** ^The sqlite3_db_handle interface returns the [database connection] handle -** to which a [prepared statement] belongs. ^The [database connection] -** returned by sqlite3_db_handle is the same [database connection] -** that was the first argument -** to the [sqlite3_prepare_v2()] call (or its variants) that was used to -** create the statement in the first place. -*/ -SQLITE_API sqlite3 *SQLITE_STDCALL sqlite3_db_handle(sqlite3_stmt*); - -/* -** CAPI3REF: Return The Filename For A Database Connection -** METHOD: sqlite3 -** -** ^The sqlite3_db_filename(D,N) interface returns a pointer to a filename -** associated with database N of connection D. ^The main database file -** has the name "main". If there is no attached database N on the database -** connection D, or if database N is a temporary or in-memory database, then -** a NULL pointer is returned. -** -** ^The filename returned by this function is the output of the -** xFullPathname method of the [VFS]. ^In other words, the filename -** will be an absolute pathname, even if the filename used -** to open the database originally was a URI or relative pathname. -*/ -SQLITE_API const char *SQLITE_STDCALL sqlite3_db_filename(sqlite3 *db, const char *zDbName); - -/* -** CAPI3REF: Determine if a database is read-only -** METHOD: sqlite3 -** -** ^The sqlite3_db_readonly(D,N) interface returns 1 if the database N -** of connection D is read-only, 0 if it is read/write, or -1 if N is not -** the name of a database on connection D. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_db_readonly(sqlite3 *db, const char *zDbName); - -/* -** CAPI3REF: Find the next prepared statement -** METHOD: sqlite3 -** -** ^This interface returns a pointer to the next [prepared statement] after -** pStmt associated with the [database connection] pDb. ^If pStmt is NULL -** then this interface returns a pointer to the first prepared statement -** associated with the database connection pDb. ^If no prepared statement -** satisfies the conditions of this routine, it returns NULL. -** -** The [database connection] pointer D in a call to -** [sqlite3_next_stmt(D,S)] must refer to an open database -** connection and in particular must not be a NULL pointer. -*/ -SQLITE_API sqlite3_stmt *SQLITE_STDCALL sqlite3_next_stmt(sqlite3 *pDb, sqlite3_stmt *pStmt); - -/* -** CAPI3REF: Commit And Rollback Notification Callbacks -** METHOD: sqlite3 -** -** ^The sqlite3_commit_hook() interface registers a callback -** function to be invoked whenever a transaction is [COMMIT | committed]. -** ^Any callback set by a previous call to sqlite3_commit_hook() -** for the same database connection is overridden. -** ^The sqlite3_rollback_hook() interface registers a callback -** function to be invoked whenever a transaction is [ROLLBACK | rolled back]. -** ^Any callback set by a previous call to sqlite3_rollback_hook() -** for the same database connection is overridden. -** ^The pArg argument is passed through to the callback. -** ^If the callback on a commit hook function returns non-zero, -** then the commit is converted into a rollback. -** -** ^The sqlite3_commit_hook(D,C,P) and sqlite3_rollback_hook(D,C,P) functions -** return the P argument from the previous call of the same function -** on the same [database connection] D, or NULL for -** the first call for each function on D. -** -** The commit and rollback hook callbacks are not reentrant. -** The callback implementation must not do anything that will modify -** the database connection that invoked the callback. Any actions -** to modify the database connection must be deferred until after the -** completion of the [sqlite3_step()] call that triggered the commit -** or rollback hook in the first place. -** Note that running any other SQL statements, including SELECT statements, -** or merely calling [sqlite3_prepare_v2()] and [sqlite3_step()] will modify -** the database connections for the meaning of "modify" in this paragraph. -** -** ^Registering a NULL function disables the callback. -** -** ^When the commit hook callback routine returns zero, the [COMMIT] -** operation is allowed to continue normally. ^If the commit hook -** returns non-zero, then the [COMMIT] is converted into a [ROLLBACK]. -** ^The rollback hook is invoked on a rollback that results from a commit -** hook returning non-zero, just as it would be with any other rollback. -** -** ^For the purposes of this API, a transaction is said to have been -** rolled back if an explicit "ROLLBACK" statement is executed, or -** an error or constraint causes an implicit rollback to occur. -** ^The rollback callback is not invoked if a transaction is -** automatically rolled back because the database connection is closed. -** -** See also the [sqlite3_update_hook()] interface. -*/ -SQLITE_API void *SQLITE_STDCALL sqlite3_commit_hook(sqlite3*, int(*)(void*), void*); -SQLITE_API void *SQLITE_STDCALL sqlite3_rollback_hook(sqlite3*, void(*)(void *), void*); - -/* -** CAPI3REF: Data Change Notification Callbacks -** METHOD: sqlite3 -** -** ^The sqlite3_update_hook() interface registers a callback function -** with the [database connection] identified by the first argument -** to be invoked whenever a row is updated, inserted or deleted in -** a [rowid table]. -** ^Any callback set by a previous call to this function -** for the same database connection is overridden. -** -** ^The second argument is a pointer to the function to invoke when a -** row is updated, inserted or deleted in a rowid table. -** ^The first argument to the callback is a copy of the third argument -** to sqlite3_update_hook(). -** ^The second callback argument is one of [SQLITE_INSERT], [SQLITE_DELETE], -** or [SQLITE_UPDATE], depending on the operation that caused the callback -** to be invoked. -** ^The third and fourth arguments to the callback contain pointers to the -** database and table name containing the affected row. -** ^The final callback parameter is the [rowid] of the row. -** ^In the case of an update, this is the [rowid] after the update takes place. -** -** ^(The update hook is not invoked when internal system tables are -** modified (i.e. sqlite_master and sqlite_sequence).)^ -** ^The update hook is not invoked when [WITHOUT ROWID] tables are modified. -** -** ^In the current implementation, the update hook -** is not invoked when duplication rows are deleted because of an -** [ON CONFLICT | ON CONFLICT REPLACE] clause. ^Nor is the update hook -** invoked when rows are deleted using the [truncate optimization]. -** The exceptions defined in this paragraph might change in a future -** release of SQLite. -** -** The update hook implementation must not do anything that will modify -** the database connection that invoked the update hook. Any actions -** to modify the database connection must be deferred until after the -** completion of the [sqlite3_step()] call that triggered the update hook. -** Note that [sqlite3_prepare_v2()] and [sqlite3_step()] both modify their -** database connections for the meaning of "modify" in this paragraph. -** -** ^The sqlite3_update_hook(D,C,P) function -** returns the P argument from the previous call -** on the same [database connection] D, or NULL for -** the first call on D. -** -** See also the [sqlite3_commit_hook()], [sqlite3_rollback_hook()], -** and [sqlite3_preupdate_hook()] interfaces. -*/ -SQLITE_API void *SQLITE_STDCALL sqlite3_update_hook( - sqlite3*, - void(*)(void *,int ,char const *,char const *,sqlite3_int64), - void* -); - -/* -** CAPI3REF: Enable Or Disable Shared Pager Cache -** -** ^(This routine enables or disables the sharing of the database cache -** and schema data structures between [database connection | connections] -** to the same database. Sharing is enabled if the argument is true -** and disabled if the argument is false.)^ -** -** ^Cache sharing is enabled and disabled for an entire process. -** This is a change as of SQLite version 3.5.0. In prior versions of SQLite, -** sharing was enabled or disabled for each thread separately. -** -** ^(The cache sharing mode set by this interface effects all subsequent -** calls to [sqlite3_open()], [sqlite3_open_v2()], and [sqlite3_open16()]. -** Existing database connections continue use the sharing mode -** that was in effect at the time they were opened.)^ -** -** ^(This routine returns [SQLITE_OK] if shared cache was enabled or disabled -** successfully. An [error code] is returned otherwise.)^ -** -** ^Shared cache is disabled by default. But this might change in -** future releases of SQLite. Applications that care about shared -** cache setting should set it explicitly. -** -** Note: This method is disabled on MacOS X 10.7 and iOS version 5.0 -** and will always return SQLITE_MISUSE. On those systems, -** shared cache mode should be enabled per-database connection via -** [sqlite3_open_v2()] with [SQLITE_OPEN_SHAREDCACHE]. -** -** This interface is threadsafe on processors where writing a -** 32-bit integer is atomic. -** -** See Also: [SQLite Shared-Cache Mode] -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_enable_shared_cache(int); - -/* -** CAPI3REF: Attempt To Free Heap Memory -** -** ^The sqlite3_release_memory() interface attempts to free N bytes -** of heap memory by deallocating non-essential memory allocations -** held by the database library. Memory used to cache database -** pages to improve performance is an example of non-essential memory. -** ^sqlite3_release_memory() returns the number of bytes actually freed, -** which might be more or less than the amount requested. -** ^The sqlite3_release_memory() routine is a no-op returning zero -** if SQLite is not compiled with [SQLITE_ENABLE_MEMORY_MANAGEMENT]. -** -** See also: [sqlite3_db_release_memory()] -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_release_memory(int); - -/* -** CAPI3REF: Free Memory Used By A Database Connection -** METHOD: sqlite3 -** -** ^The sqlite3_db_release_memory(D) interface attempts to free as much heap -** memory as possible from database connection D. Unlike the -** [sqlite3_release_memory()] interface, this interface is in effect even -** when the [SQLITE_ENABLE_MEMORY_MANAGEMENT] compile-time option is -** omitted. -** -** See also: [sqlite3_release_memory()] -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_db_release_memory(sqlite3*); - -/* -** CAPI3REF: Impose A Limit On Heap Size -** -** ^The sqlite3_soft_heap_limit64() interface sets and/or queries the -** soft limit on the amount of heap memory that may be allocated by SQLite. -** ^SQLite strives to keep heap memory utilization below the soft heap -** limit by reducing the number of pages held in the page cache -** as heap memory usages approaches the limit. -** ^The soft heap limit is "soft" because even though SQLite strives to stay -** below the limit, it will exceed the limit rather than generate -** an [SQLITE_NOMEM] error. In other words, the soft heap limit -** is advisory only. -** -** ^The return value from sqlite3_soft_heap_limit64() is the size of -** the soft heap limit prior to the call, or negative in the case of an -** error. ^If the argument N is negative -** then no change is made to the soft heap limit. Hence, the current -** size of the soft heap limit can be determined by invoking -** sqlite3_soft_heap_limit64() with a negative argument. -** -** ^If the argument N is zero then the soft heap limit is disabled. -** -** ^(The soft heap limit is not enforced in the current implementation -** if one or more of following conditions are true: -** -**
    -**
  • The soft heap limit is set to zero. -**
  • Memory accounting is disabled using a combination of the -** [sqlite3_config]([SQLITE_CONFIG_MEMSTATUS],...) start-time option and -** the [SQLITE_DEFAULT_MEMSTATUS] compile-time option. -**
  • An alternative page cache implementation is specified using -** [sqlite3_config]([SQLITE_CONFIG_PCACHE2],...). -**
  • The page cache allocates from its own memory pool supplied -** by [sqlite3_config]([SQLITE_CONFIG_PAGECACHE],...) rather than -** from the heap. -**
)^ -** -** Beginning with SQLite version 3.7.3, the soft heap limit is enforced -** regardless of whether or not the [SQLITE_ENABLE_MEMORY_MANAGEMENT] -** compile-time option is invoked. With [SQLITE_ENABLE_MEMORY_MANAGEMENT], -** the soft heap limit is enforced on every memory allocation. Without -** [SQLITE_ENABLE_MEMORY_MANAGEMENT], the soft heap limit is only enforced -** when memory is allocated by the page cache. Testing suggests that because -** the page cache is the predominate memory user in SQLite, most -** applications will achieve adequate soft heap limit enforcement without -** the use of [SQLITE_ENABLE_MEMORY_MANAGEMENT]. -** -** The circumstances under which SQLite will enforce the soft heap limit may -** changes in future releases of SQLite. -*/ -SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3_soft_heap_limit64(sqlite3_int64 N); - -/* -** CAPI3REF: Deprecated Soft Heap Limit Interface -** DEPRECATED -** -** This is a deprecated version of the [sqlite3_soft_heap_limit64()] -** interface. This routine is provided for historical compatibility -** only. All new applications should use the -** [sqlite3_soft_heap_limit64()] interface rather than this one. -*/ -SQLITE_API SQLITE_DEPRECATED void SQLITE_STDCALL sqlite3_soft_heap_limit(int N); - - -/* -** CAPI3REF: Extract Metadata About A Column Of A Table -** METHOD: sqlite3 -** -** ^(The sqlite3_table_column_metadata(X,D,T,C,....) routine returns -** information about column C of table T in database D -** on [database connection] X.)^ ^The sqlite3_table_column_metadata() -** interface returns SQLITE_OK and fills in the non-NULL pointers in -** the final five arguments with appropriate values if the specified -** column exists. ^The sqlite3_table_column_metadata() interface returns -** SQLITE_ERROR and if the specified column does not exist. -** ^If the column-name parameter to sqlite3_table_column_metadata() is a -** NULL pointer, then this routine simply checks for the existance of the -** table and returns SQLITE_OK if the table exists and SQLITE_ERROR if it -** does not. -** -** ^The column is identified by the second, third and fourth parameters to -** this function. ^(The second parameter is either the name of the database -** (i.e. "main", "temp", or an attached database) containing the specified -** table or NULL.)^ ^If it is NULL, then all attached databases are searched -** for the table using the same algorithm used by the database engine to -** resolve unqualified table references. -** -** ^The third and fourth parameters to this function are the table and column -** name of the desired column, respectively. -** -** ^Metadata is returned by writing to the memory locations passed as the 5th -** and subsequent parameters to this function. ^Any of these arguments may be -** NULL, in which case the corresponding element of metadata is omitted. -** -** ^(
-** -**
Parameter Output
Type
Description -** -**
5th const char* Data type -**
6th const char* Name of default collation sequence -**
7th int True if column has a NOT NULL constraint -**
8th int True if column is part of the PRIMARY KEY -**
9th int True if column is [AUTOINCREMENT] -**
-**
)^ -** -** ^The memory pointed to by the character pointers returned for the -** declaration type and collation sequence is valid until the next -** call to any SQLite API function. -** -** ^If the specified table is actually a view, an [error code] is returned. -** -** ^If the specified column is "rowid", "oid" or "_rowid_" and the table -** is not a [WITHOUT ROWID] table and an -** [INTEGER PRIMARY KEY] column has been explicitly declared, then the output -** parameters are set for the explicitly declared column. ^(If there is no -** [INTEGER PRIMARY KEY] column, then the outputs -** for the [rowid] are set as follows: -** -**
-**     data type: "INTEGER"
-**     collation sequence: "BINARY"
-**     not null: 0
-**     primary key: 1
-**     auto increment: 0
-** 
)^ -** -** ^This function causes all database schemas to be read from disk and -** parsed, if that has not already been done, and returns an error if -** any errors are encountered while loading the schema. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_table_column_metadata( - sqlite3 *db, /* Connection handle */ - const char *zDbName, /* Database name or NULL */ - const char *zTableName, /* Table name */ - const char *zColumnName, /* Column name */ - char const **pzDataType, /* OUTPUT: Declared data type */ - char const **pzCollSeq, /* OUTPUT: Collation sequence name */ - int *pNotNull, /* OUTPUT: True if NOT NULL constraint exists */ - int *pPrimaryKey, /* OUTPUT: True if column part of PK */ - int *pAutoinc /* OUTPUT: True if column is auto-increment */ -); - -/* -** CAPI3REF: Load An Extension -** METHOD: sqlite3 -** -** ^This interface loads an SQLite extension library from the named file. -** -** ^The sqlite3_load_extension() interface attempts to load an -** [SQLite extension] library contained in the file zFile. If -** the file cannot be loaded directly, attempts are made to load -** with various operating-system specific extensions added. -** So for example, if "samplelib" cannot be loaded, then names like -** "samplelib.so" or "samplelib.dylib" or "samplelib.dll" might -** be tried also. -** -** ^The entry point is zProc. -** ^(zProc may be 0, in which case SQLite will try to come up with an -** entry point name on its own. It first tries "sqlite3_extension_init". -** If that does not work, it constructs a name "sqlite3_X_init" where the -** X is consists of the lower-case equivalent of all ASCII alphabetic -** characters in the filename from the last "/" to the first following -** "." and omitting any initial "lib".)^ -** ^The sqlite3_load_extension() interface returns -** [SQLITE_OK] on success and [SQLITE_ERROR] if something goes wrong. -** ^If an error occurs and pzErrMsg is not 0, then the -** [sqlite3_load_extension()] interface shall attempt to -** fill *pzErrMsg with error message text stored in memory -** obtained from [sqlite3_malloc()]. The calling function -** should free this memory by calling [sqlite3_free()]. -** -** ^Extension loading must be enabled using -** [sqlite3_enable_load_extension()] or -** [sqlite3_db_config](db,[SQLITE_DBCONFIG_ENABLE_LOAD_EXTENSION],1,NULL) -** prior to calling this API, -** otherwise an error will be returned. -** -** Security warning: It is recommended that the -** [SQLITE_DBCONFIG_ENABLE_LOAD_EXTENSION] method be used to enable only this -** interface. The use of the [sqlite3_enable_load_extension()] interface -** should be avoided. This will keep the SQL function [load_extension()] -** disabled and prevent SQL injections from giving attackers -** access to extension loading capabilities. -** -** See also the [load_extension() SQL function]. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_load_extension( - sqlite3 *db, /* Load the extension into this database connection */ - const char *zFile, /* Name of the shared library containing extension */ - const char *zProc, /* Entry point. Derived from zFile if 0 */ - char **pzErrMsg /* Put error message here if not 0 */ -); - -/* -** CAPI3REF: Enable Or Disable Extension Loading -** METHOD: sqlite3 -** -** ^So as not to open security holes in older applications that are -** unprepared to deal with [extension loading], and as a means of disabling -** [extension loading] while evaluating user-entered SQL, the following API -** is provided to turn the [sqlite3_load_extension()] mechanism on and off. -** -** ^Extension loading is off by default. -** ^Call the sqlite3_enable_load_extension() routine with onoff==1 -** to turn extension loading on and call it with onoff==0 to turn -** it back off again. -** -** ^This interface enables or disables both the C-API -** [sqlite3_load_extension()] and the SQL function [load_extension()]. -** Use [sqlite3_db_config](db,[SQLITE_DBCONFIG_ENABLE_LOAD_EXTENSION],..) -** to enable or disable only the C-API. -** -** Security warning: It is recommended that extension loading -** be disabled using the [SQLITE_DBCONFIG_ENABLE_LOAD_EXTENSION] method -** rather than this interface, so the [load_extension()] SQL function -** remains disabled. This will prevent SQL injections from giving attackers -** access to extension loading capabilities. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_enable_load_extension(sqlite3 *db, int onoff); - -/* -** CAPI3REF: Automatically Load Statically Linked Extensions -** -** ^This interface causes the xEntryPoint() function to be invoked for -** each new [database connection] that is created. The idea here is that -** xEntryPoint() is the entry point for a statically linked [SQLite extension] -** that is to be automatically loaded into all new database connections. -** -** ^(Even though the function prototype shows that xEntryPoint() takes -** no arguments and returns void, SQLite invokes xEntryPoint() with three -** arguments and expects and integer result as if the signature of the -** entry point where as follows: -** -**
-**    int xEntryPoint(
-**      sqlite3 *db,
-**      const char **pzErrMsg,
-**      const struct sqlite3_api_routines *pThunk
-**    );
-** 
)^ -** -** If the xEntryPoint routine encounters an error, it should make *pzErrMsg -** point to an appropriate error message (obtained from [sqlite3_mprintf()]) -** and return an appropriate [error code]. ^SQLite ensures that *pzErrMsg -** is NULL before calling the xEntryPoint(). ^SQLite will invoke -** [sqlite3_free()] on *pzErrMsg after xEntryPoint() returns. ^If any -** xEntryPoint() returns an error, the [sqlite3_open()], [sqlite3_open16()], -** or [sqlite3_open_v2()] call that provoked the xEntryPoint() will fail. -** -** ^Calling sqlite3_auto_extension(X) with an entry point X that is already -** on the list of automatic extensions is a harmless no-op. ^No entry point -** will be called more than once for each database connection that is opened. -** -** See also: [sqlite3_reset_auto_extension()] -** and [sqlite3_cancel_auto_extension()] -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_auto_extension(void (*xEntryPoint)(void)); - -/* -** CAPI3REF: Cancel Automatic Extension Loading -** -** ^The [sqlite3_cancel_auto_extension(X)] interface unregisters the -** initialization routine X that was registered using a prior call to -** [sqlite3_auto_extension(X)]. ^The [sqlite3_cancel_auto_extension(X)] -** routine returns 1 if initialization routine X was successfully -** unregistered and it returns 0 if X was not on the list of initialization -** routines. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_cancel_auto_extension(void (*xEntryPoint)(void)); - -/* -** CAPI3REF: Reset Automatic Extension Loading -** -** ^This interface disables all automatic extensions previously -** registered using [sqlite3_auto_extension()]. -*/ -SQLITE_API void SQLITE_STDCALL sqlite3_reset_auto_extension(void); - -/* -** The interface to the virtual-table mechanism is currently considered -** to be experimental. The interface might change in incompatible ways. -** If this is a problem for you, do not use the interface at this time. -** -** When the virtual-table mechanism stabilizes, we will declare the -** interface fixed, support it indefinitely, and remove this comment. -*/ - -/* -** Structures used by the virtual table interface -*/ -typedef struct sqlite3_vtab sqlite3_vtab; -typedef struct sqlite3_index_info sqlite3_index_info; -typedef struct sqlite3_vtab_cursor sqlite3_vtab_cursor; -typedef struct sqlite3_module sqlite3_module; - -/* -** CAPI3REF: Virtual Table Object -** KEYWORDS: sqlite3_module {virtual table module} -** -** This structure, sometimes called a "virtual table module", -** defines the implementation of a [virtual tables]. -** This structure consists mostly of methods for the module. -** -** ^A virtual table module is created by filling in a persistent -** instance of this structure and passing a pointer to that instance -** to [sqlite3_create_module()] or [sqlite3_create_module_v2()]. -** ^The registration remains valid until it is replaced by a different -** module or until the [database connection] closes. The content -** of this structure must not change while it is registered with -** any database connection. -*/ -struct sqlite3_module { - int iVersion; - int (*xCreate)(sqlite3*, void *pAux, - int argc, const char *const*argv, - sqlite3_vtab **ppVTab, char**); - int (*xConnect)(sqlite3*, void *pAux, - int argc, const char *const*argv, - sqlite3_vtab **ppVTab, char**); - int (*xBestIndex)(sqlite3_vtab *pVTab, sqlite3_index_info*); - int (*xDisconnect)(sqlite3_vtab *pVTab); - int (*xDestroy)(sqlite3_vtab *pVTab); - int (*xOpen)(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor); - int (*xClose)(sqlite3_vtab_cursor*); - int (*xFilter)(sqlite3_vtab_cursor*, int idxNum, const char *idxStr, - int argc, sqlite3_value **argv); - int (*xNext)(sqlite3_vtab_cursor*); - int (*xEof)(sqlite3_vtab_cursor*); - int (*xColumn)(sqlite3_vtab_cursor*, sqlite3_context*, int); - int (*xRowid)(sqlite3_vtab_cursor*, sqlite3_int64 *pRowid); - int (*xUpdate)(sqlite3_vtab *, int, sqlite3_value **, sqlite3_int64 *); - int (*xBegin)(sqlite3_vtab *pVTab); - int (*xSync)(sqlite3_vtab *pVTab); - int (*xCommit)(sqlite3_vtab *pVTab); - int (*xRollback)(sqlite3_vtab *pVTab); - int (*xFindFunction)(sqlite3_vtab *pVtab, int nArg, const char *zName, - void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), - void **ppArg); - int (*xRename)(sqlite3_vtab *pVtab, const char *zNew); - /* The methods above are in version 1 of the sqlite_module object. Those - ** below are for version 2 and greater. */ - int (*xSavepoint)(sqlite3_vtab *pVTab, int); - int (*xRelease)(sqlite3_vtab *pVTab, int); - int (*xRollbackTo)(sqlite3_vtab *pVTab, int); -}; - -/* -** CAPI3REF: Virtual Table Indexing Information -** KEYWORDS: sqlite3_index_info -** -** The sqlite3_index_info structure and its substructures is used as part -** of the [virtual table] interface to -** pass information into and receive the reply from the [xBestIndex] -** method of a [virtual table module]. The fields under **Inputs** are the -** inputs to xBestIndex and are read-only. xBestIndex inserts its -** results into the **Outputs** fields. -** -** ^(The aConstraint[] array records WHERE clause constraints of the form: -** -**
column OP expr
-** -** where OP is =, <, <=, >, or >=.)^ ^(The particular operator is -** stored in aConstraint[].op using one of the -** [SQLITE_INDEX_CONSTRAINT_EQ | SQLITE_INDEX_CONSTRAINT_ values].)^ -** ^(The index of the column is stored in -** aConstraint[].iColumn.)^ ^(aConstraint[].usable is TRUE if the -** expr on the right-hand side can be evaluated (and thus the constraint -** is usable) and false if it cannot.)^ -** -** ^The optimizer automatically inverts terms of the form "expr OP column" -** and makes other simplifications to the WHERE clause in an attempt to -** get as many WHERE clause terms into the form shown above as possible. -** ^The aConstraint[] array only reports WHERE clause terms that are -** relevant to the particular virtual table being queried. -** -** ^Information about the ORDER BY clause is stored in aOrderBy[]. -** ^Each term of aOrderBy records a column of the ORDER BY clause. -** -** The colUsed field indicates which columns of the virtual table may be -** required by the current scan. Virtual table columns are numbered from -** zero in the order in which they appear within the CREATE TABLE statement -** passed to sqlite3_declare_vtab(). For the first 63 columns (columns 0-62), -** the corresponding bit is set within the colUsed mask if the column may be -** required by SQLite. If the table has at least 64 columns and any column -** to the right of the first 63 is required, then bit 63 of colUsed is also -** set. In other words, column iCol may be required if the expression -** (colUsed & ((sqlite3_uint64)1 << (iCol>=63 ? 63 : iCol))) evaluates to -** non-zero. -** -** The [xBestIndex] method must fill aConstraintUsage[] with information -** about what parameters to pass to xFilter. ^If argvIndex>0 then -** the right-hand side of the corresponding aConstraint[] is evaluated -** and becomes the argvIndex-th entry in argv. ^(If aConstraintUsage[].omit -** is true, then the constraint is assumed to be fully handled by the -** virtual table and is not checked again by SQLite.)^ -** -** ^The idxNum and idxPtr values are recorded and passed into the -** [xFilter] method. -** ^[sqlite3_free()] is used to free idxPtr if and only if -** needToFreeIdxPtr is true. -** -** ^The orderByConsumed means that output from [xFilter]/[xNext] will occur in -** the correct order to satisfy the ORDER BY clause so that no separate -** sorting step is required. -** -** ^The estimatedCost value is an estimate of the cost of a particular -** strategy. A cost of N indicates that the cost of the strategy is similar -** to a linear scan of an SQLite table with N rows. A cost of log(N) -** indicates that the expense of the operation is similar to that of a -** binary search on a unique indexed field of an SQLite table with N rows. -** -** ^The estimatedRows value is an estimate of the number of rows that -** will be returned by the strategy. -** -** The xBestIndex method may optionally populate the idxFlags field with a -** mask of SQLITE_INDEX_SCAN_* flags. Currently there is only one such flag - -** SQLITE_INDEX_SCAN_UNIQUE. If the xBestIndex method sets this flag, SQLite -** assumes that the strategy may visit at most one row. -** -** Additionally, if xBestIndex sets the SQLITE_INDEX_SCAN_UNIQUE flag, then -** SQLite also assumes that if a call to the xUpdate() method is made as -** part of the same statement to delete or update a virtual table row and the -** implementation returns SQLITE_CONSTRAINT, then there is no need to rollback -** any database changes. In other words, if the xUpdate() returns -** SQLITE_CONSTRAINT, the database contents must be exactly as they were -** before xUpdate was called. By contrast, if SQLITE_INDEX_SCAN_UNIQUE is not -** set and xUpdate returns SQLITE_CONSTRAINT, any database changes made by -** the xUpdate method are automatically rolled back by SQLite. -** -** IMPORTANT: The estimatedRows field was added to the sqlite3_index_info -** structure for SQLite version 3.8.2. If a virtual table extension is -** used with an SQLite version earlier than 3.8.2, the results of attempting -** to read or write the estimatedRows field are undefined (but are likely -** to included crashing the application). The estimatedRows field should -** therefore only be used if [sqlite3_libversion_number()] returns a -** value greater than or equal to 3008002. Similarly, the idxFlags field -** was added for version 3.9.0. It may therefore only be used if -** sqlite3_libversion_number() returns a value greater than or equal to -** 3009000. -*/ -struct sqlite3_index_info { - /* Inputs */ - int nConstraint; /* Number of entries in aConstraint */ - struct sqlite3_index_constraint { - int iColumn; /* Column constrained. -1 for ROWID */ - unsigned char op; /* Constraint operator */ - unsigned char usable; /* True if this constraint is usable */ - int iTermOffset; /* Used internally - xBestIndex should ignore */ - } *aConstraint; /* Table of WHERE clause constraints */ - int nOrderBy; /* Number of terms in the ORDER BY clause */ - struct sqlite3_index_orderby { - int iColumn; /* Column number */ - unsigned char desc; /* True for DESC. False for ASC. */ - } *aOrderBy; /* The ORDER BY clause */ - /* Outputs */ - struct sqlite3_index_constraint_usage { - int argvIndex; /* if >0, constraint is part of argv to xFilter */ - unsigned char omit; /* Do not code a test for this constraint */ - } *aConstraintUsage; - int idxNum; /* Number used to identify the index */ - char *idxStr; /* String, possibly obtained from sqlite3_malloc */ - int needToFreeIdxStr; /* Free idxStr using sqlite3_free() if true */ - int orderByConsumed; /* True if output is already ordered */ - double estimatedCost; /* Estimated cost of using this index */ - /* Fields below are only available in SQLite 3.8.2 and later */ - sqlite3_int64 estimatedRows; /* Estimated number of rows returned */ - /* Fields below are only available in SQLite 3.9.0 and later */ - int idxFlags; /* Mask of SQLITE_INDEX_SCAN_* flags */ - /* Fields below are only available in SQLite 3.10.0 and later */ - sqlite3_uint64 colUsed; /* Input: Mask of columns used by statement */ -}; - -/* -** CAPI3REF: Virtual Table Scan Flags -*/ -#define SQLITE_INDEX_SCAN_UNIQUE 1 /* Scan visits at most 1 row */ - -/* -** CAPI3REF: Virtual Table Constraint Operator Codes -** -** These macros defined the allowed values for the -** [sqlite3_index_info].aConstraint[].op field. Each value represents -** an operator that is part of a constraint term in the wHERE clause of -** a query that uses a [virtual table]. -*/ -#define SQLITE_INDEX_CONSTRAINT_EQ 2 -#define SQLITE_INDEX_CONSTRAINT_GT 4 -#define SQLITE_INDEX_CONSTRAINT_LE 8 -#define SQLITE_INDEX_CONSTRAINT_LT 16 -#define SQLITE_INDEX_CONSTRAINT_GE 32 -#define SQLITE_INDEX_CONSTRAINT_MATCH 64 -#define SQLITE_INDEX_CONSTRAINT_LIKE 65 -#define SQLITE_INDEX_CONSTRAINT_GLOB 66 -#define SQLITE_INDEX_CONSTRAINT_REGEXP 67 - -/* -** CAPI3REF: Register A Virtual Table Implementation -** METHOD: sqlite3 -** -** ^These routines are used to register a new [virtual table module] name. -** ^Module names must be registered before -** creating a new [virtual table] using the module and before using a -** preexisting [virtual table] for the module. -** -** ^The module name is registered on the [database connection] specified -** by the first parameter. ^The name of the module is given by the -** second parameter. ^The third parameter is a pointer to -** the implementation of the [virtual table module]. ^The fourth -** parameter is an arbitrary client data pointer that is passed through -** into the [xCreate] and [xConnect] methods of the virtual table module -** when a new virtual table is be being created or reinitialized. -** -** ^The sqlite3_create_module_v2() interface has a fifth parameter which -** is a pointer to a destructor for the pClientData. ^SQLite will -** invoke the destructor function (if it is not NULL) when SQLite -** no longer needs the pClientData pointer. ^The destructor will also -** be invoked if the call to sqlite3_create_module_v2() fails. -** ^The sqlite3_create_module() -** interface is equivalent to sqlite3_create_module_v2() with a NULL -** destructor. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_create_module( - sqlite3 *db, /* SQLite connection to register module with */ - const char *zName, /* Name of the module */ - const sqlite3_module *p, /* Methods for the module */ - void *pClientData /* Client data for xCreate/xConnect */ -); -SQLITE_API int SQLITE_STDCALL sqlite3_create_module_v2( - sqlite3 *db, /* SQLite connection to register module with */ - const char *zName, /* Name of the module */ - const sqlite3_module *p, /* Methods for the module */ - void *pClientData, /* Client data for xCreate/xConnect */ - void(*xDestroy)(void*) /* Module destructor function */ -); - -/* -** CAPI3REF: Virtual Table Instance Object -** KEYWORDS: sqlite3_vtab -** -** Every [virtual table module] implementation uses a subclass -** of this object to describe a particular instance -** of the [virtual table]. Each subclass will -** be tailored to the specific needs of the module implementation. -** The purpose of this superclass is to define certain fields that are -** common to all module implementations. -** -** ^Virtual tables methods can set an error message by assigning a -** string obtained from [sqlite3_mprintf()] to zErrMsg. The method should -** take care that any prior string is freed by a call to [sqlite3_free()] -** prior to assigning a new string to zErrMsg. ^After the error message -** is delivered up to the client application, the string will be automatically -** freed by sqlite3_free() and the zErrMsg field will be zeroed. -*/ -struct sqlite3_vtab { - const sqlite3_module *pModule; /* The module for this virtual table */ - int nRef; /* Number of open cursors */ - char *zErrMsg; /* Error message from sqlite3_mprintf() */ - /* Virtual table implementations will typically add additional fields */ -}; - -/* -** CAPI3REF: Virtual Table Cursor Object -** KEYWORDS: sqlite3_vtab_cursor {virtual table cursor} -** -** Every [virtual table module] implementation uses a subclass of the -** following structure to describe cursors that point into the -** [virtual table] and are used -** to loop through the virtual table. Cursors are created using the -** [sqlite3_module.xOpen | xOpen] method of the module and are destroyed -** by the [sqlite3_module.xClose | xClose] method. Cursors are used -** by the [xFilter], [xNext], [xEof], [xColumn], and [xRowid] methods -** of the module. Each module implementation will define -** the content of a cursor structure to suit its own needs. -** -** This superclass exists in order to define fields of the cursor that -** are common to all implementations. -*/ -struct sqlite3_vtab_cursor { - sqlite3_vtab *pVtab; /* Virtual table of this cursor */ - /* Virtual table implementations will typically add additional fields */ -}; - -/* -** CAPI3REF: Declare The Schema Of A Virtual Table -** -** ^The [xCreate] and [xConnect] methods of a -** [virtual table module] call this interface -** to declare the format (the names and datatypes of the columns) of -** the virtual tables they implement. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_declare_vtab(sqlite3*, const char *zSQL); - -/* -** CAPI3REF: Overload A Function For A Virtual Table -** METHOD: sqlite3 -** -** ^(Virtual tables can provide alternative implementations of functions -** using the [xFindFunction] method of the [virtual table module]. -** But global versions of those functions -** must exist in order to be overloaded.)^ -** -** ^(This API makes sure a global version of a function with a particular -** name and number of parameters exists. If no such function exists -** before this API is called, a new function is created.)^ ^The implementation -** of the new function always causes an exception to be thrown. So -** the new function is not good for anything by itself. Its only -** purpose is to be a placeholder function that can be overloaded -** by a [virtual table]. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_overload_function(sqlite3*, const char *zFuncName, int nArg); - -/* -** The interface to the virtual-table mechanism defined above (back up -** to a comment remarkably similar to this one) is currently considered -** to be experimental. The interface might change in incompatible ways. -** If this is a problem for you, do not use the interface at this time. -** -** When the virtual-table mechanism stabilizes, we will declare the -** interface fixed, support it indefinitely, and remove this comment. -*/ - -/* -** CAPI3REF: A Handle To An Open BLOB -** KEYWORDS: {BLOB handle} {BLOB handles} -** -** An instance of this object represents an open BLOB on which -** [sqlite3_blob_open | incremental BLOB I/O] can be performed. -** ^Objects of this type are created by [sqlite3_blob_open()] -** and destroyed by [sqlite3_blob_close()]. -** ^The [sqlite3_blob_read()] and [sqlite3_blob_write()] interfaces -** can be used to read or write small subsections of the BLOB. -** ^The [sqlite3_blob_bytes()] interface returns the size of the BLOB in bytes. -*/ -typedef struct sqlite3_blob sqlite3_blob; - -/* -** CAPI3REF: Open A BLOB For Incremental I/O -** METHOD: sqlite3 -** CONSTRUCTOR: sqlite3_blob -** -** ^(This interfaces opens a [BLOB handle | handle] to the BLOB located -** in row iRow, column zColumn, table zTable in database zDb; -** in other words, the same BLOB that would be selected by: -** -**
-**     SELECT zColumn FROM zDb.zTable WHERE [rowid] = iRow;
-** 
)^ -** -** ^(Parameter zDb is not the filename that contains the database, but -** rather the symbolic name of the database. For attached databases, this is -** the name that appears after the AS keyword in the [ATTACH] statement. -** For the main database file, the database name is "main". For TEMP -** tables, the database name is "temp".)^ -** -** ^If the flags parameter is non-zero, then the BLOB is opened for read -** and write access. ^If the flags parameter is zero, the BLOB is opened for -** read-only access. -** -** ^(On success, [SQLITE_OK] is returned and the new [BLOB handle] is stored -** in *ppBlob. Otherwise an [error code] is returned and, unless the error -** code is SQLITE_MISUSE, *ppBlob is set to NULL.)^ ^This means that, provided -** the API is not misused, it is always safe to call [sqlite3_blob_close()] -** on *ppBlob after this function it returns. -** -** This function fails with SQLITE_ERROR if any of the following are true: -**
    -**
  • ^(Database zDb does not exist)^, -**
  • ^(Table zTable does not exist within database zDb)^, -**
  • ^(Table zTable is a WITHOUT ROWID table)^, -**
  • ^(Column zColumn does not exist)^, -**
  • ^(Row iRow is not present in the table)^, -**
  • ^(The specified column of row iRow contains a value that is not -** a TEXT or BLOB value)^, -**
  • ^(Column zColumn is part of an index, PRIMARY KEY or UNIQUE -** constraint and the blob is being opened for read/write access)^, -**
  • ^([foreign key constraints | Foreign key constraints] are enabled, -** column zColumn is part of a [child key] definition and the blob is -** being opened for read/write access)^. -**
-** -** ^Unless it returns SQLITE_MISUSE, this function sets the -** [database connection] error code and message accessible via -** [sqlite3_errcode()] and [sqlite3_errmsg()] and related functions. -** -** -** ^(If the row that a BLOB handle points to is modified by an -** [UPDATE], [DELETE], or by [ON CONFLICT] side-effects -** then the BLOB handle is marked as "expired". -** This is true if any column of the row is changed, even a column -** other than the one the BLOB handle is open on.)^ -** ^Calls to [sqlite3_blob_read()] and [sqlite3_blob_write()] for -** an expired BLOB handle fail with a return code of [SQLITE_ABORT]. -** ^(Changes written into a BLOB prior to the BLOB expiring are not -** rolled back by the expiration of the BLOB. Such changes will eventually -** commit if the transaction continues to completion.)^ -** -** ^Use the [sqlite3_blob_bytes()] interface to determine the size of -** the opened blob. ^The size of a blob may not be changed by this -** interface. Use the [UPDATE] SQL command to change the size of a -** blob. -** -** ^The [sqlite3_bind_zeroblob()] and [sqlite3_result_zeroblob()] interfaces -** and the built-in [zeroblob] SQL function may be used to create a -** zero-filled blob to read or write using the incremental-blob interface. -** -** To avoid a resource leak, every open [BLOB handle] should eventually -** be released by a call to [sqlite3_blob_close()]. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_blob_open( - sqlite3*, - const char *zDb, - const char *zTable, - const char *zColumn, - sqlite3_int64 iRow, - int flags, - sqlite3_blob **ppBlob -); - -/* -** CAPI3REF: Move a BLOB Handle to a New Row -** METHOD: sqlite3_blob -** -** ^This function is used to move an existing blob handle so that it points -** to a different row of the same database table. ^The new row is identified -** by the rowid value passed as the second argument. Only the row can be -** changed. ^The database, table and column on which the blob handle is open -** remain the same. Moving an existing blob handle to a new row can be -** faster than closing the existing handle and opening a new one. -** -** ^(The new row must meet the same criteria as for [sqlite3_blob_open()] - -** it must exist and there must be either a blob or text value stored in -** the nominated column.)^ ^If the new row is not present in the table, or if -** it does not contain a blob or text value, or if another error occurs, an -** SQLite error code is returned and the blob handle is considered aborted. -** ^All subsequent calls to [sqlite3_blob_read()], [sqlite3_blob_write()] or -** [sqlite3_blob_reopen()] on an aborted blob handle immediately return -** SQLITE_ABORT. ^Calling [sqlite3_blob_bytes()] on an aborted blob handle -** always returns zero. -** -** ^This function sets the database handle error code and message. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_blob_reopen(sqlite3_blob *, sqlite3_int64); - -/* -** CAPI3REF: Close A BLOB Handle -** DESTRUCTOR: sqlite3_blob -** -** ^This function closes an open [BLOB handle]. ^(The BLOB handle is closed -** unconditionally. Even if this routine returns an error code, the -** handle is still closed.)^ -** -** ^If the blob handle being closed was opened for read-write access, and if -** the database is in auto-commit mode and there are no other open read-write -** blob handles or active write statements, the current transaction is -** committed. ^If an error occurs while committing the transaction, an error -** code is returned and the transaction rolled back. -** -** Calling this function with an argument that is not a NULL pointer or an -** open blob handle results in undefined behaviour. ^Calling this routine -** with a null pointer (such as would be returned by a failed call to -** [sqlite3_blob_open()]) is a harmless no-op. ^Otherwise, if this function -** is passed a valid open blob handle, the values returned by the -** sqlite3_errcode() and sqlite3_errmsg() functions are set before returning. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_blob_close(sqlite3_blob *); - -/* -** CAPI3REF: Return The Size Of An Open BLOB -** METHOD: sqlite3_blob -** -** ^Returns the size in bytes of the BLOB accessible via the -** successfully opened [BLOB handle] in its only argument. ^The -** incremental blob I/O routines can only read or overwriting existing -** blob content; they cannot change the size of a blob. -** -** This routine only works on a [BLOB handle] which has been created -** by a prior successful call to [sqlite3_blob_open()] and which has not -** been closed by [sqlite3_blob_close()]. Passing any other pointer in -** to this routine results in undefined and probably undesirable behavior. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_blob_bytes(sqlite3_blob *); - -/* -** CAPI3REF: Read Data From A BLOB Incrementally -** METHOD: sqlite3_blob -** -** ^(This function is used to read data from an open [BLOB handle] into a -** caller-supplied buffer. N bytes of data are copied into buffer Z -** from the open BLOB, starting at offset iOffset.)^ -** -** ^If offset iOffset is less than N bytes from the end of the BLOB, -** [SQLITE_ERROR] is returned and no data is read. ^If N or iOffset is -** less than zero, [SQLITE_ERROR] is returned and no data is read. -** ^The size of the blob (and hence the maximum value of N+iOffset) -** can be determined using the [sqlite3_blob_bytes()] interface. -** -** ^An attempt to read from an expired [BLOB handle] fails with an -** error code of [SQLITE_ABORT]. -** -** ^(On success, sqlite3_blob_read() returns SQLITE_OK. -** Otherwise, an [error code] or an [extended error code] is returned.)^ -** -** This routine only works on a [BLOB handle] which has been created -** by a prior successful call to [sqlite3_blob_open()] and which has not -** been closed by [sqlite3_blob_close()]. Passing any other pointer in -** to this routine results in undefined and probably undesirable behavior. -** -** See also: [sqlite3_blob_write()]. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_blob_read(sqlite3_blob *, void *Z, int N, int iOffset); - -/* -** CAPI3REF: Write Data Into A BLOB Incrementally -** METHOD: sqlite3_blob -** -** ^(This function is used to write data into an open [BLOB handle] from a -** caller-supplied buffer. N bytes of data are copied from the buffer Z -** into the open BLOB, starting at offset iOffset.)^ -** -** ^(On success, sqlite3_blob_write() returns SQLITE_OK. -** Otherwise, an [error code] or an [extended error code] is returned.)^ -** ^Unless SQLITE_MISUSE is returned, this function sets the -** [database connection] error code and message accessible via -** [sqlite3_errcode()] and [sqlite3_errmsg()] and related functions. -** -** ^If the [BLOB handle] passed as the first argument was not opened for -** writing (the flags parameter to [sqlite3_blob_open()] was zero), -** this function returns [SQLITE_READONLY]. -** -** This function may only modify the contents of the BLOB; it is -** not possible to increase the size of a BLOB using this API. -** ^If offset iOffset is less than N bytes from the end of the BLOB, -** [SQLITE_ERROR] is returned and no data is written. The size of the -** BLOB (and hence the maximum value of N+iOffset) can be determined -** using the [sqlite3_blob_bytes()] interface. ^If N or iOffset are less -** than zero [SQLITE_ERROR] is returned and no data is written. -** -** ^An attempt to write to an expired [BLOB handle] fails with an -** error code of [SQLITE_ABORT]. ^Writes to the BLOB that occurred -** before the [BLOB handle] expired are not rolled back by the -** expiration of the handle, though of course those changes might -** have been overwritten by the statement that expired the BLOB handle -** or by other independent statements. -** -** This routine only works on a [BLOB handle] which has been created -** by a prior successful call to [sqlite3_blob_open()] and which has not -** been closed by [sqlite3_blob_close()]. Passing any other pointer in -** to this routine results in undefined and probably undesirable behavior. -** -** See also: [sqlite3_blob_read()]. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_blob_write(sqlite3_blob *, const void *z, int n, int iOffset); - -/* -** CAPI3REF: Virtual File System Objects -** -** A virtual filesystem (VFS) is an [sqlite3_vfs] object -** that SQLite uses to interact -** with the underlying operating system. Most SQLite builds come with a -** single default VFS that is appropriate for the host computer. -** New VFSes can be registered and existing VFSes can be unregistered. -** The following interfaces are provided. -** -** ^The sqlite3_vfs_find() interface returns a pointer to a VFS given its name. -** ^Names are case sensitive. -** ^Names are zero-terminated UTF-8 strings. -** ^If there is no match, a NULL pointer is returned. -** ^If zVfsName is NULL then the default VFS is returned. -** -** ^New VFSes are registered with sqlite3_vfs_register(). -** ^Each new VFS becomes the default VFS if the makeDflt flag is set. -** ^The same VFS can be registered multiple times without injury. -** ^To make an existing VFS into the default VFS, register it again -** with the makeDflt flag set. If two different VFSes with the -** same name are registered, the behavior is undefined. If a -** VFS is registered with a name that is NULL or an empty string, -** then the behavior is undefined. -** -** ^Unregister a VFS with the sqlite3_vfs_unregister() interface. -** ^(If the default VFS is unregistered, another VFS is chosen as -** the default. The choice for the new VFS is arbitrary.)^ -*/ -SQLITE_API sqlite3_vfs *SQLITE_STDCALL sqlite3_vfs_find(const char *zVfsName); -SQLITE_API int SQLITE_STDCALL sqlite3_vfs_register(sqlite3_vfs*, int makeDflt); -SQLITE_API int SQLITE_STDCALL sqlite3_vfs_unregister(sqlite3_vfs*); - -/* -** CAPI3REF: Mutexes -** -** The SQLite core uses these routines for thread -** synchronization. Though they are intended for internal -** use by SQLite, code that links against SQLite is -** permitted to use any of these routines. -** -** The SQLite source code contains multiple implementations -** of these mutex routines. An appropriate implementation -** is selected automatically at compile-time. The following -** implementations are available in the SQLite core: -** -**
    -**
  • SQLITE_MUTEX_PTHREADS -**
  • SQLITE_MUTEX_W32 -**
  • SQLITE_MUTEX_NOOP -**
-** -** The SQLITE_MUTEX_NOOP implementation is a set of routines -** that does no real locking and is appropriate for use in -** a single-threaded application. The SQLITE_MUTEX_PTHREADS and -** SQLITE_MUTEX_W32 implementations are appropriate for use on Unix -** and Windows. -** -** If SQLite is compiled with the SQLITE_MUTEX_APPDEF preprocessor -** macro defined (with "-DSQLITE_MUTEX_APPDEF=1"), then no mutex -** implementation is included with the library. In this case the -** application must supply a custom mutex implementation using the -** [SQLITE_CONFIG_MUTEX] option of the sqlite3_config() function -** before calling sqlite3_initialize() or any other public sqlite3_ -** function that calls sqlite3_initialize(). -** -** ^The sqlite3_mutex_alloc() routine allocates a new -** mutex and returns a pointer to it. ^The sqlite3_mutex_alloc() -** routine returns NULL if it is unable to allocate the requested -** mutex. The argument to sqlite3_mutex_alloc() must one of these -** integer constants: -** -**
    -**
  • SQLITE_MUTEX_FAST -**
  • SQLITE_MUTEX_RECURSIVE -**
  • SQLITE_MUTEX_STATIC_MASTER -**
  • SQLITE_MUTEX_STATIC_MEM -**
  • SQLITE_MUTEX_STATIC_OPEN -**
  • SQLITE_MUTEX_STATIC_PRNG -**
  • SQLITE_MUTEX_STATIC_LRU -**
  • SQLITE_MUTEX_STATIC_PMEM -**
  • SQLITE_MUTEX_STATIC_APP1 -**
  • SQLITE_MUTEX_STATIC_APP2 -**
  • SQLITE_MUTEX_STATIC_APP3 -**
  • SQLITE_MUTEX_STATIC_VFS1 -**
  • SQLITE_MUTEX_STATIC_VFS2 -**
  • SQLITE_MUTEX_STATIC_VFS3 -**
-** -** ^The first two constants (SQLITE_MUTEX_FAST and SQLITE_MUTEX_RECURSIVE) -** cause sqlite3_mutex_alloc() to create -** a new mutex. ^The new mutex is recursive when SQLITE_MUTEX_RECURSIVE -** is used but not necessarily so when SQLITE_MUTEX_FAST is used. -** The mutex implementation does not need to make a distinction -** between SQLITE_MUTEX_RECURSIVE and SQLITE_MUTEX_FAST if it does -** not want to. SQLite will only request a recursive mutex in -** cases where it really needs one. If a faster non-recursive mutex -** implementation is available on the host platform, the mutex subsystem -** might return such a mutex in response to SQLITE_MUTEX_FAST. -** -** ^The other allowed parameters to sqlite3_mutex_alloc() (anything other -** than SQLITE_MUTEX_FAST and SQLITE_MUTEX_RECURSIVE) each return -** a pointer to a static preexisting mutex. ^Nine static mutexes are -** used by the current version of SQLite. Future versions of SQLite -** may add additional static mutexes. Static mutexes are for internal -** use by SQLite only. Applications that use SQLite mutexes should -** use only the dynamic mutexes returned by SQLITE_MUTEX_FAST or -** SQLITE_MUTEX_RECURSIVE. -** -** ^Note that if one of the dynamic mutex parameters (SQLITE_MUTEX_FAST -** or SQLITE_MUTEX_RECURSIVE) is used then sqlite3_mutex_alloc() -** returns a different mutex on every call. ^For the static -** mutex types, the same mutex is returned on every call that has -** the same type number. -** -** ^The sqlite3_mutex_free() routine deallocates a previously -** allocated dynamic mutex. Attempting to deallocate a static -** mutex results in undefined behavior. -** -** ^The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt -** to enter a mutex. ^If another thread is already within the mutex, -** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return -** SQLITE_BUSY. ^The sqlite3_mutex_try() interface returns [SQLITE_OK] -** upon successful entry. ^(Mutexes created using -** SQLITE_MUTEX_RECURSIVE can be entered multiple times by the same thread. -** In such cases, the -** mutex must be exited an equal number of times before another thread -** can enter.)^ If the same thread tries to enter any mutex other -** than an SQLITE_MUTEX_RECURSIVE more than once, the behavior is undefined. -** -** ^(Some systems (for example, Windows 95) do not support the operation -** implemented by sqlite3_mutex_try(). On those systems, sqlite3_mutex_try() -** will always return SQLITE_BUSY. The SQLite core only ever uses -** sqlite3_mutex_try() as an optimization so this is acceptable -** behavior.)^ -** -** ^The sqlite3_mutex_leave() routine exits a mutex that was -** previously entered by the same thread. The behavior -** is undefined if the mutex is not currently entered by the -** calling thread or is not currently allocated. -** -** ^If the argument to sqlite3_mutex_enter(), sqlite3_mutex_try(), or -** sqlite3_mutex_leave() is a NULL pointer, then all three routines -** behave as no-ops. -** -** See also: [sqlite3_mutex_held()] and [sqlite3_mutex_notheld()]. -*/ -SQLITE_API sqlite3_mutex *SQLITE_STDCALL sqlite3_mutex_alloc(int); -SQLITE_API void SQLITE_STDCALL sqlite3_mutex_free(sqlite3_mutex*); -SQLITE_API void SQLITE_STDCALL sqlite3_mutex_enter(sqlite3_mutex*); -SQLITE_API int SQLITE_STDCALL sqlite3_mutex_try(sqlite3_mutex*); -SQLITE_API void SQLITE_STDCALL sqlite3_mutex_leave(sqlite3_mutex*); - -/* -** CAPI3REF: Mutex Methods Object -** -** An instance of this structure defines the low-level routines -** used to allocate and use mutexes. -** -** Usually, the default mutex implementations provided by SQLite are -** sufficient, however the application has the option of substituting a custom -** implementation for specialized deployments or systems for which SQLite -** does not provide a suitable implementation. In this case, the application -** creates and populates an instance of this structure to pass -** to sqlite3_config() along with the [SQLITE_CONFIG_MUTEX] option. -** Additionally, an instance of this structure can be used as an -** output variable when querying the system for the current mutex -** implementation, using the [SQLITE_CONFIG_GETMUTEX] option. -** -** ^The xMutexInit method defined by this structure is invoked as -** part of system initialization by the sqlite3_initialize() function. -** ^The xMutexInit routine is called by SQLite exactly once for each -** effective call to [sqlite3_initialize()]. -** -** ^The xMutexEnd method defined by this structure is invoked as -** part of system shutdown by the sqlite3_shutdown() function. The -** implementation of this method is expected to release all outstanding -** resources obtained by the mutex methods implementation, especially -** those obtained by the xMutexInit method. ^The xMutexEnd() -** interface is invoked exactly once for each call to [sqlite3_shutdown()]. -** -** ^(The remaining seven methods defined by this structure (xMutexAlloc, -** xMutexFree, xMutexEnter, xMutexTry, xMutexLeave, xMutexHeld and -** xMutexNotheld) implement the following interfaces (respectively): -** -**
    -**
  • [sqlite3_mutex_alloc()]
  • -**
  • [sqlite3_mutex_free()]
  • -**
  • [sqlite3_mutex_enter()]
  • -**
  • [sqlite3_mutex_try()]
  • -**
  • [sqlite3_mutex_leave()]
  • -**
  • [sqlite3_mutex_held()]
  • -**
  • [sqlite3_mutex_notheld()]
  • -**
)^ -** -** The only difference is that the public sqlite3_XXX functions enumerated -** above silently ignore any invocations that pass a NULL pointer instead -** of a valid mutex handle. The implementations of the methods defined -** by this structure are not required to handle this case, the results -** of passing a NULL pointer instead of a valid mutex handle are undefined -** (i.e. it is acceptable to provide an implementation that segfaults if -** it is passed a NULL pointer). -** -** The xMutexInit() method must be threadsafe. It must be harmless to -** invoke xMutexInit() multiple times within the same process and without -** intervening calls to xMutexEnd(). Second and subsequent calls to -** xMutexInit() must be no-ops. -** -** xMutexInit() must not use SQLite memory allocation ([sqlite3_malloc()] -** and its associates). Similarly, xMutexAlloc() must not use SQLite memory -** allocation for a static mutex. ^However xMutexAlloc() may use SQLite -** memory allocation for a fast or recursive mutex. -** -** ^SQLite will invoke the xMutexEnd() method when [sqlite3_shutdown()] is -** called, but only if the prior call to xMutexInit returned SQLITE_OK. -** If xMutexInit fails in any way, it is expected to clean up after itself -** prior to returning. -*/ -typedef struct sqlite3_mutex_methods sqlite3_mutex_methods; -struct sqlite3_mutex_methods { - int (*xMutexInit)(void); - int (*xMutexEnd)(void); - sqlite3_mutex *(*xMutexAlloc)(int); - void (*xMutexFree)(sqlite3_mutex *); - void (*xMutexEnter)(sqlite3_mutex *); - int (*xMutexTry)(sqlite3_mutex *); - void (*xMutexLeave)(sqlite3_mutex *); - int (*xMutexHeld)(sqlite3_mutex *); - int (*xMutexNotheld)(sqlite3_mutex *); -}; - -/* -** CAPI3REF: Mutex Verification Routines -** -** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routines -** are intended for use inside assert() statements. The SQLite core -** never uses these routines except inside an assert() and applications -** are advised to follow the lead of the core. The SQLite core only -** provides implementations for these routines when it is compiled -** with the SQLITE_DEBUG flag. External mutex implementations -** are only required to provide these routines if SQLITE_DEBUG is -** defined and if NDEBUG is not defined. -** -** These routines should return true if the mutex in their argument -** is held or not held, respectively, by the calling thread. -** -** The implementation is not required to provide versions of these -** routines that actually work. If the implementation does not provide working -** versions of these routines, it should at least provide stubs that always -** return true so that one does not get spurious assertion failures. -** -** If the argument to sqlite3_mutex_held() is a NULL pointer then -** the routine should return 1. This seems counter-intuitive since -** clearly the mutex cannot be held if it does not exist. But -** the reason the mutex does not exist is because the build is not -** using mutexes. And we do not want the assert() containing the -** call to sqlite3_mutex_held() to fail, so a non-zero return is -** the appropriate thing to do. The sqlite3_mutex_notheld() -** interface should also return 1 when given a NULL pointer. -*/ -#ifndef NDEBUG -SQLITE_API int SQLITE_STDCALL sqlite3_mutex_held(sqlite3_mutex*); -SQLITE_API int SQLITE_STDCALL sqlite3_mutex_notheld(sqlite3_mutex*); -#endif - -/* -** CAPI3REF: Mutex Types -** -** The [sqlite3_mutex_alloc()] interface takes a single argument -** which is one of these integer constants. -** -** The set of static mutexes may change from one SQLite release to the -** next. Applications that override the built-in mutex logic must be -** prepared to accommodate additional static mutexes. -*/ -#define SQLITE_MUTEX_FAST 0 -#define SQLITE_MUTEX_RECURSIVE 1 -#define SQLITE_MUTEX_STATIC_MASTER 2 -#define SQLITE_MUTEX_STATIC_MEM 3 /* sqlite3_malloc() */ -#define SQLITE_MUTEX_STATIC_MEM2 4 /* NOT USED */ -#define SQLITE_MUTEX_STATIC_OPEN 4 /* sqlite3BtreeOpen() */ -#define SQLITE_MUTEX_STATIC_PRNG 5 /* sqlite3_random() */ -#define SQLITE_MUTEX_STATIC_LRU 6 /* lru page list */ -#define SQLITE_MUTEX_STATIC_LRU2 7 /* NOT USED */ -#define SQLITE_MUTEX_STATIC_PMEM 7 /* sqlite3PageMalloc() */ -#define SQLITE_MUTEX_STATIC_APP1 8 /* For use by application */ -#define SQLITE_MUTEX_STATIC_APP2 9 /* For use by application */ -#define SQLITE_MUTEX_STATIC_APP3 10 /* For use by application */ -#define SQLITE_MUTEX_STATIC_VFS1 11 /* For use by built-in VFS */ -#define SQLITE_MUTEX_STATIC_VFS2 12 /* For use by extension VFS */ -#define SQLITE_MUTEX_STATIC_VFS3 13 /* For use by application VFS */ - -/* -** CAPI3REF: Retrieve the mutex for a database connection -** METHOD: sqlite3 -** -** ^This interface returns a pointer the [sqlite3_mutex] object that -** serializes access to the [database connection] given in the argument -** when the [threading mode] is Serialized. -** ^If the [threading mode] is Single-thread or Multi-thread then this -** routine returns a NULL pointer. -*/ -SQLITE_API sqlite3_mutex *SQLITE_STDCALL sqlite3_db_mutex(sqlite3*); - -/* -** CAPI3REF: Low-Level Control Of Database Files -** METHOD: sqlite3 -** -** ^The [sqlite3_file_control()] interface makes a direct call to the -** xFileControl method for the [sqlite3_io_methods] object associated -** with a particular database identified by the second argument. ^The -** name of the database is "main" for the main database or "temp" for the -** TEMP database, or the name that appears after the AS keyword for -** databases that are added using the [ATTACH] SQL command. -** ^A NULL pointer can be used in place of "main" to refer to the -** main database file. -** ^The third and fourth parameters to this routine -** are passed directly through to the second and third parameters of -** the xFileControl method. ^The return value of the xFileControl -** method becomes the return value of this routine. -** -** ^The SQLITE_FCNTL_FILE_POINTER value for the op parameter causes -** a pointer to the underlying [sqlite3_file] object to be written into -** the space pointed to by the 4th parameter. ^The SQLITE_FCNTL_FILE_POINTER -** case is a short-circuit path which does not actually invoke the -** underlying sqlite3_io_methods.xFileControl method. -** -** ^If the second parameter (zDbName) does not match the name of any -** open database file, then SQLITE_ERROR is returned. ^This error -** code is not remembered and will not be recalled by [sqlite3_errcode()] -** or [sqlite3_errmsg()]. The underlying xFileControl method might -** also return SQLITE_ERROR. There is no way to distinguish between -** an incorrect zDbName and an SQLITE_ERROR return from the underlying -** xFileControl method. -** -** See also: [SQLITE_FCNTL_LOCKSTATE] -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_file_control(sqlite3*, const char *zDbName, int op, void*); - -/* -** CAPI3REF: Testing Interface -** -** ^The sqlite3_test_control() interface is used to read out internal -** state of SQLite and to inject faults into SQLite for testing -** purposes. ^The first parameter is an operation code that determines -** the number, meaning, and operation of all subsequent parameters. -** -** This interface is not for use by applications. It exists solely -** for verifying the correct operation of the SQLite library. Depending -** on how the SQLite library is compiled, this interface might not exist. -** -** The details of the operation codes, their meanings, the parameters -** they take, and what they do are all subject to change without notice. -** Unlike most of the SQLite API, this function is not guaranteed to -** operate consistently from one release to the next. -*/ -SQLITE_API int SQLITE_CDECL sqlite3_test_control(int op, ...); - -/* -** CAPI3REF: Testing Interface Operation Codes -** -** These constants are the valid operation code parameters used -** as the first argument to [sqlite3_test_control()]. -** -** These parameters and their meanings are subject to change -** without notice. These values are for testing purposes only. -** Applications should not use any of these parameters or the -** [sqlite3_test_control()] interface. -*/ -#define SQLITE_TESTCTRL_FIRST 5 -#define SQLITE_TESTCTRL_PRNG_SAVE 5 -#define SQLITE_TESTCTRL_PRNG_RESTORE 6 -#define SQLITE_TESTCTRL_PRNG_RESET 7 -#define SQLITE_TESTCTRL_BITVEC_TEST 8 -#define SQLITE_TESTCTRL_FAULT_INSTALL 9 -#define SQLITE_TESTCTRL_BENIGN_MALLOC_HOOKS 10 -#define SQLITE_TESTCTRL_PENDING_BYTE 11 -#define SQLITE_TESTCTRL_ASSERT 12 -#define SQLITE_TESTCTRL_ALWAYS 13 -#define SQLITE_TESTCTRL_RESERVE 14 -#define SQLITE_TESTCTRL_OPTIMIZATIONS 15 -#define SQLITE_TESTCTRL_ISKEYWORD 16 -#define SQLITE_TESTCTRL_SCRATCHMALLOC 17 -#define SQLITE_TESTCTRL_LOCALTIME_FAULT 18 -#define SQLITE_TESTCTRL_EXPLAIN_STMT 19 /* NOT USED */ -#define SQLITE_TESTCTRL_NEVER_CORRUPT 20 -#define SQLITE_TESTCTRL_VDBE_COVERAGE 21 -#define SQLITE_TESTCTRL_BYTEORDER 22 -#define SQLITE_TESTCTRL_ISINIT 23 -#define SQLITE_TESTCTRL_SORTER_MMAP 24 -#define SQLITE_TESTCTRL_IMPOSTER 25 -#define SQLITE_TESTCTRL_LAST 25 - -/* -** CAPI3REF: SQLite Runtime Status -** -** ^These interfaces are used to retrieve runtime status information -** about the performance of SQLite, and optionally to reset various -** highwater marks. ^The first argument is an integer code for -** the specific parameter to measure. ^(Recognized integer codes -** are of the form [status parameters | SQLITE_STATUS_...].)^ -** ^The current value of the parameter is returned into *pCurrent. -** ^The highest recorded value is returned in *pHighwater. ^If the -** resetFlag is true, then the highest record value is reset after -** *pHighwater is written. ^(Some parameters do not record the highest -** value. For those parameters -** nothing is written into *pHighwater and the resetFlag is ignored.)^ -** ^(Other parameters record only the highwater mark and not the current -** value. For these latter parameters nothing is written into *pCurrent.)^ -** -** ^The sqlite3_status() and sqlite3_status64() routines return -** SQLITE_OK on success and a non-zero [error code] on failure. -** -** If either the current value or the highwater mark is too large to -** be represented by a 32-bit integer, then the values returned by -** sqlite3_status() are undefined. -** -** See also: [sqlite3_db_status()] -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_status(int op, int *pCurrent, int *pHighwater, int resetFlag); -SQLITE_API int SQLITE_STDCALL sqlite3_status64( - int op, - sqlite3_int64 *pCurrent, - sqlite3_int64 *pHighwater, - int resetFlag -); - - -/* -** CAPI3REF: Status Parameters -** KEYWORDS: {status parameters} -** -** These integer constants designate various run-time status parameters -** that can be returned by [sqlite3_status()]. -** -**
-** [[SQLITE_STATUS_MEMORY_USED]] ^(
SQLITE_STATUS_MEMORY_USED
-**
This parameter is the current amount of memory checked out -** using [sqlite3_malloc()], either directly or indirectly. The -** figure includes calls made to [sqlite3_malloc()] by the application -** and internal memory usage by the SQLite library. Scratch memory -** controlled by [SQLITE_CONFIG_SCRATCH] and auxiliary page-cache -** memory controlled by [SQLITE_CONFIG_PAGECACHE] is not included in -** this parameter. The amount returned is the sum of the allocation -** sizes as reported by the xSize method in [sqlite3_mem_methods].
)^ -** -** [[SQLITE_STATUS_MALLOC_SIZE]] ^(
SQLITE_STATUS_MALLOC_SIZE
-**
This parameter records the largest memory allocation request -** handed to [sqlite3_malloc()] or [sqlite3_realloc()] (or their -** internal equivalents). Only the value returned in the -** *pHighwater parameter to [sqlite3_status()] is of interest. -** The value written into the *pCurrent parameter is undefined.
)^ -** -** [[SQLITE_STATUS_MALLOC_COUNT]] ^(
SQLITE_STATUS_MALLOC_COUNT
-**
This parameter records the number of separate memory allocations -** currently checked out.
)^ -** -** [[SQLITE_STATUS_PAGECACHE_USED]] ^(
SQLITE_STATUS_PAGECACHE_USED
-**
This parameter returns the number of pages used out of the -** [pagecache memory allocator] that was configured using -** [SQLITE_CONFIG_PAGECACHE]. The -** value returned is in pages, not in bytes.
)^ -** -** [[SQLITE_STATUS_PAGECACHE_OVERFLOW]] -** ^(
SQLITE_STATUS_PAGECACHE_OVERFLOW
-**
This parameter returns the number of bytes of page cache -** allocation which could not be satisfied by the [SQLITE_CONFIG_PAGECACHE] -** buffer and where forced to overflow to [sqlite3_malloc()]. The -** returned value includes allocations that overflowed because they -** where too large (they were larger than the "sz" parameter to -** [SQLITE_CONFIG_PAGECACHE]) and allocations that overflowed because -** no space was left in the page cache.
)^ -** -** [[SQLITE_STATUS_PAGECACHE_SIZE]] ^(
SQLITE_STATUS_PAGECACHE_SIZE
-**
This parameter records the largest memory allocation request -** handed to [pagecache memory allocator]. Only the value returned in the -** *pHighwater parameter to [sqlite3_status()] is of interest. -** The value written into the *pCurrent parameter is undefined.
)^ -** -** [[SQLITE_STATUS_SCRATCH_USED]] ^(
SQLITE_STATUS_SCRATCH_USED
-**
This parameter returns the number of allocations used out of the -** [scratch memory allocator] configured using -** [SQLITE_CONFIG_SCRATCH]. The value returned is in allocations, not -** in bytes. Since a single thread may only have one scratch allocation -** outstanding at time, this parameter also reports the number of threads -** using scratch memory at the same time.
)^ -** -** [[SQLITE_STATUS_SCRATCH_OVERFLOW]] ^(
SQLITE_STATUS_SCRATCH_OVERFLOW
-**
This parameter returns the number of bytes of scratch memory -** allocation which could not be satisfied by the [SQLITE_CONFIG_SCRATCH] -** buffer and where forced to overflow to [sqlite3_malloc()]. The values -** returned include overflows because the requested allocation was too -** larger (that is, because the requested allocation was larger than the -** "sz" parameter to [SQLITE_CONFIG_SCRATCH]) and because no scratch buffer -** slots were available. -**
)^ -** -** [[SQLITE_STATUS_SCRATCH_SIZE]] ^(
SQLITE_STATUS_SCRATCH_SIZE
-**
This parameter records the largest memory allocation request -** handed to [scratch memory allocator]. Only the value returned in the -** *pHighwater parameter to [sqlite3_status()] is of interest. -** The value written into the *pCurrent parameter is undefined.
)^ -** -** [[SQLITE_STATUS_PARSER_STACK]] ^(
SQLITE_STATUS_PARSER_STACK
-**
The *pHighwater parameter records the deepest parser stack. -** The *pCurrent value is undefined. The *pHighwater value is only -** meaningful if SQLite is compiled with [YYTRACKMAXSTACKDEPTH].
)^ -**
-** -** New status parameters may be added from time to time. -*/ -#define SQLITE_STATUS_MEMORY_USED 0 -#define SQLITE_STATUS_PAGECACHE_USED 1 -#define SQLITE_STATUS_PAGECACHE_OVERFLOW 2 -#define SQLITE_STATUS_SCRATCH_USED 3 -#define SQLITE_STATUS_SCRATCH_OVERFLOW 4 -#define SQLITE_STATUS_MALLOC_SIZE 5 -#define SQLITE_STATUS_PARSER_STACK 6 -#define SQLITE_STATUS_PAGECACHE_SIZE 7 -#define SQLITE_STATUS_SCRATCH_SIZE 8 -#define SQLITE_STATUS_MALLOC_COUNT 9 - -/* -** CAPI3REF: Database Connection Status -** METHOD: sqlite3 -** -** ^This interface is used to retrieve runtime status information -** about a single [database connection]. ^The first argument is the -** database connection object to be interrogated. ^The second argument -** is an integer constant, taken from the set of -** [SQLITE_DBSTATUS options], that -** determines the parameter to interrogate. The set of -** [SQLITE_DBSTATUS options] is likely -** to grow in future releases of SQLite. -** -** ^The current value of the requested parameter is written into *pCur -** and the highest instantaneous value is written into *pHiwtr. ^If -** the resetFlg is true, then the highest instantaneous value is -** reset back down to the current value. -** -** ^The sqlite3_db_status() routine returns SQLITE_OK on success and a -** non-zero [error code] on failure. -** -** See also: [sqlite3_status()] and [sqlite3_stmt_status()]. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_db_status(sqlite3*, int op, int *pCur, int *pHiwtr, int resetFlg); - -/* -** CAPI3REF: Status Parameters for database connections -** KEYWORDS: {SQLITE_DBSTATUS options} -** -** These constants are the available integer "verbs" that can be passed as -** the second argument to the [sqlite3_db_status()] interface. -** -** New verbs may be added in future releases of SQLite. Existing verbs -** might be discontinued. Applications should check the return code from -** [sqlite3_db_status()] to make sure that the call worked. -** The [sqlite3_db_status()] interface will return a non-zero error code -** if a discontinued or unsupported verb is invoked. -** -**
-** [[SQLITE_DBSTATUS_LOOKASIDE_USED]] ^(
SQLITE_DBSTATUS_LOOKASIDE_USED
-**
This parameter returns the number of lookaside memory slots currently -** checked out.
)^ -** -** [[SQLITE_DBSTATUS_LOOKASIDE_HIT]] ^(
SQLITE_DBSTATUS_LOOKASIDE_HIT
-**
This parameter returns the number malloc attempts that were -** satisfied using lookaside memory. Only the high-water value is meaningful; -** the current value is always zero.)^ -** -** [[SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE]] -** ^(
SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE
-**
This parameter returns the number malloc attempts that might have -** been satisfied using lookaside memory but failed due to the amount of -** memory requested being larger than the lookaside slot size. -** Only the high-water value is meaningful; -** the current value is always zero.)^ -** -** [[SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL]] -** ^(
SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL
-**
This parameter returns the number malloc attempts that might have -** been satisfied using lookaside memory but failed due to all lookaside -** memory already being in use. -** Only the high-water value is meaningful; -** the current value is always zero.)^ -** -** [[SQLITE_DBSTATUS_CACHE_USED]] ^(
SQLITE_DBSTATUS_CACHE_USED
-**
This parameter returns the approximate number of bytes of heap -** memory used by all pager caches associated with the database connection.)^ -** ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_USED is always 0. -** -** [[SQLITE_DBSTATUS_SCHEMA_USED]] ^(
SQLITE_DBSTATUS_SCHEMA_USED
-**
This parameter returns the approximate number of bytes of heap -** memory used to store the schema for all databases associated -** with the connection - main, temp, and any [ATTACH]-ed databases.)^ -** ^The full amount of memory used by the schemas is reported, even if the -** schema memory is shared with other database connections due to -** [shared cache mode] being enabled. -** ^The highwater mark associated with SQLITE_DBSTATUS_SCHEMA_USED is always 0. -** -** [[SQLITE_DBSTATUS_STMT_USED]] ^(
SQLITE_DBSTATUS_STMT_USED
-**
This parameter returns the approximate number of bytes of heap -** and lookaside memory used by all prepared statements associated with -** the database connection.)^ -** ^The highwater mark associated with SQLITE_DBSTATUS_STMT_USED is always 0. -**
-** -** [[SQLITE_DBSTATUS_CACHE_HIT]] ^(
SQLITE_DBSTATUS_CACHE_HIT
-**
This parameter returns the number of pager cache hits that have -** occurred.)^ ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_HIT -** is always 0. -**
-** -** [[SQLITE_DBSTATUS_CACHE_MISS]] ^(
SQLITE_DBSTATUS_CACHE_MISS
-**
This parameter returns the number of pager cache misses that have -** occurred.)^ ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_MISS -** is always 0. -**
-** -** [[SQLITE_DBSTATUS_CACHE_WRITE]] ^(
SQLITE_DBSTATUS_CACHE_WRITE
-**
This parameter returns the number of dirty cache entries that have -** been written to disk. Specifically, the number of pages written to the -** wal file in wal mode databases, or the number of pages written to the -** database file in rollback mode databases. Any pages written as part of -** transaction rollback or database recovery operations are not included. -** If an IO or other error occurs while writing a page to disk, the effect -** on subsequent SQLITE_DBSTATUS_CACHE_WRITE requests is undefined.)^ ^The -** highwater mark associated with SQLITE_DBSTATUS_CACHE_WRITE is always 0. -**
-** -** [[SQLITE_DBSTATUS_DEFERRED_FKS]] ^(
SQLITE_DBSTATUS_DEFERRED_FKS
-**
This parameter returns zero for the current value if and only if -** all foreign key constraints (deferred or immediate) have been -** resolved.)^ ^The highwater mark is always 0. -**
-**
-*/ -#define SQLITE_DBSTATUS_LOOKASIDE_USED 0 -#define SQLITE_DBSTATUS_CACHE_USED 1 -#define SQLITE_DBSTATUS_SCHEMA_USED 2 -#define SQLITE_DBSTATUS_STMT_USED 3 -#define SQLITE_DBSTATUS_LOOKASIDE_HIT 4 -#define SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE 5 -#define SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL 6 -#define SQLITE_DBSTATUS_CACHE_HIT 7 -#define SQLITE_DBSTATUS_CACHE_MISS 8 -#define SQLITE_DBSTATUS_CACHE_WRITE 9 -#define SQLITE_DBSTATUS_DEFERRED_FKS 10 -#define SQLITE_DBSTATUS_MAX 10 /* Largest defined DBSTATUS */ - - -/* -** CAPI3REF: Prepared Statement Status -** METHOD: sqlite3_stmt -** -** ^(Each prepared statement maintains various -** [SQLITE_STMTSTATUS counters] that measure the number -** of times it has performed specific operations.)^ These counters can -** be used to monitor the performance characteristics of the prepared -** statements. For example, if the number of table steps greatly exceeds -** the number of table searches or result rows, that would tend to indicate -** that the prepared statement is using a full table scan rather than -** an index. -** -** ^(This interface is used to retrieve and reset counter values from -** a [prepared statement]. The first argument is the prepared statement -** object to be interrogated. The second argument -** is an integer code for a specific [SQLITE_STMTSTATUS counter] -** to be interrogated.)^ -** ^The current value of the requested counter is returned. -** ^If the resetFlg is true, then the counter is reset to zero after this -** interface call returns. -** -** See also: [sqlite3_status()] and [sqlite3_db_status()]. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_stmt_status(sqlite3_stmt*, int op,int resetFlg); - -/* -** CAPI3REF: Status Parameters for prepared statements -** KEYWORDS: {SQLITE_STMTSTATUS counter} {SQLITE_STMTSTATUS counters} -** -** These preprocessor macros define integer codes that name counter -** values associated with the [sqlite3_stmt_status()] interface. -** The meanings of the various counters are as follows: -** -**
-** [[SQLITE_STMTSTATUS_FULLSCAN_STEP]]
SQLITE_STMTSTATUS_FULLSCAN_STEP
-**
^This is the number of times that SQLite has stepped forward in -** a table as part of a full table scan. Large numbers for this counter -** may indicate opportunities for performance improvement through -** careful use of indices.
-** -** [[SQLITE_STMTSTATUS_SORT]]
SQLITE_STMTSTATUS_SORT
-**
^This is the number of sort operations that have occurred. -** A non-zero value in this counter may indicate an opportunity to -** improvement performance through careful use of indices.
-** -** [[SQLITE_STMTSTATUS_AUTOINDEX]]
SQLITE_STMTSTATUS_AUTOINDEX
-**
^This is the number of rows inserted into transient indices that -** were created automatically in order to help joins run faster. -** A non-zero value in this counter may indicate an opportunity to -** improvement performance by adding permanent indices that do not -** need to be reinitialized each time the statement is run.
-** -** [[SQLITE_STMTSTATUS_VM_STEP]]
SQLITE_STMTSTATUS_VM_STEP
-**
^This is the number of virtual machine operations executed -** by the prepared statement if that number is less than or equal -** to 2147483647. The number of virtual machine operations can be -** used as a proxy for the total work done by the prepared statement. -** If the number of virtual machine operations exceeds 2147483647 -** then the value returned by this statement status code is undefined. -**
-**
-*/ -#define SQLITE_STMTSTATUS_FULLSCAN_STEP 1 -#define SQLITE_STMTSTATUS_SORT 2 -#define SQLITE_STMTSTATUS_AUTOINDEX 3 -#define SQLITE_STMTSTATUS_VM_STEP 4 - -/* -** CAPI3REF: Custom Page Cache Object -** -** The sqlite3_pcache type is opaque. It is implemented by -** the pluggable module. The SQLite core has no knowledge of -** its size or internal structure and never deals with the -** sqlite3_pcache object except by holding and passing pointers -** to the object. -** -** See [sqlite3_pcache_methods2] for additional information. -*/ -typedef struct sqlite3_pcache sqlite3_pcache; - -/* -** CAPI3REF: Custom Page Cache Object -** -** The sqlite3_pcache_page object represents a single page in the -** page cache. The page cache will allocate instances of this -** object. Various methods of the page cache use pointers to instances -** of this object as parameters or as their return value. -** -** See [sqlite3_pcache_methods2] for additional information. -*/ -typedef struct sqlite3_pcache_page sqlite3_pcache_page; -struct sqlite3_pcache_page { - void *pBuf; /* The content of the page */ - void *pExtra; /* Extra information associated with the page */ -}; - -/* -** CAPI3REF: Application Defined Page Cache. -** KEYWORDS: {page cache} -** -** ^(The [sqlite3_config]([SQLITE_CONFIG_PCACHE2], ...) interface can -** register an alternative page cache implementation by passing in an -** instance of the sqlite3_pcache_methods2 structure.)^ -** In many applications, most of the heap memory allocated by -** SQLite is used for the page cache. -** By implementing a -** custom page cache using this API, an application can better control -** the amount of memory consumed by SQLite, the way in which -** that memory is allocated and released, and the policies used to -** determine exactly which parts of a database file are cached and for -** how long. -** -** The alternative page cache mechanism is an -** extreme measure that is only needed by the most demanding applications. -** The built-in page cache is recommended for most uses. -** -** ^(The contents of the sqlite3_pcache_methods2 structure are copied to an -** internal buffer by SQLite within the call to [sqlite3_config]. Hence -** the application may discard the parameter after the call to -** [sqlite3_config()] returns.)^ -** -** [[the xInit() page cache method]] -** ^(The xInit() method is called once for each effective -** call to [sqlite3_initialize()])^ -** (usually only once during the lifetime of the process). ^(The xInit() -** method is passed a copy of the sqlite3_pcache_methods2.pArg value.)^ -** The intent of the xInit() method is to set up global data structures -** required by the custom page cache implementation. -** ^(If the xInit() method is NULL, then the -** built-in default page cache is used instead of the application defined -** page cache.)^ -** -** [[the xShutdown() page cache method]] -** ^The xShutdown() method is called by [sqlite3_shutdown()]. -** It can be used to clean up -** any outstanding resources before process shutdown, if required. -** ^The xShutdown() method may be NULL. -** -** ^SQLite automatically serializes calls to the xInit method, -** so the xInit method need not be threadsafe. ^The -** xShutdown method is only called from [sqlite3_shutdown()] so it does -** not need to be threadsafe either. All other methods must be threadsafe -** in multithreaded applications. -** -** ^SQLite will never invoke xInit() more than once without an intervening -** call to xShutdown(). -** -** [[the xCreate() page cache methods]] -** ^SQLite invokes the xCreate() method to construct a new cache instance. -** SQLite will typically create one cache instance for each open database file, -** though this is not guaranteed. ^The -** first parameter, szPage, is the size in bytes of the pages that must -** be allocated by the cache. ^szPage will always a power of two. ^The -** second parameter szExtra is a number of bytes of extra storage -** associated with each page cache entry. ^The szExtra parameter will -** a number less than 250. SQLite will use the -** extra szExtra bytes on each page to store metadata about the underlying -** database page on disk. The value passed into szExtra depends -** on the SQLite version, the target platform, and how SQLite was compiled. -** ^The third argument to xCreate(), bPurgeable, is true if the cache being -** created will be used to cache database pages of a file stored on disk, or -** false if it is used for an in-memory database. The cache implementation -** does not have to do anything special based with the value of bPurgeable; -** it is purely advisory. ^On a cache where bPurgeable is false, SQLite will -** never invoke xUnpin() except to deliberately delete a page. -** ^In other words, calls to xUnpin() on a cache with bPurgeable set to -** false will always have the "discard" flag set to true. -** ^Hence, a cache created with bPurgeable false will -** never contain any unpinned pages. -** -** [[the xCachesize() page cache method]] -** ^(The xCachesize() method may be called at any time by SQLite to set the -** suggested maximum cache-size (number of pages stored by) the cache -** instance passed as the first argument. This is the value configured using -** the SQLite "[PRAGMA cache_size]" command.)^ As with the bPurgeable -** parameter, the implementation is not required to do anything with this -** value; it is advisory only. -** -** [[the xPagecount() page cache methods]] -** The xPagecount() method must return the number of pages currently -** stored in the cache, both pinned and unpinned. -** -** [[the xFetch() page cache methods]] -** The xFetch() method locates a page in the cache and returns a pointer to -** an sqlite3_pcache_page object associated with that page, or a NULL pointer. -** The pBuf element of the returned sqlite3_pcache_page object will be a -** pointer to a buffer of szPage bytes used to store the content of a -** single database page. The pExtra element of sqlite3_pcache_page will be -** a pointer to the szExtra bytes of extra storage that SQLite has requested -** for each entry in the page cache. -** -** The page to be fetched is determined by the key. ^The minimum key value -** is 1. After it has been retrieved using xFetch, the page is considered -** to be "pinned". -** -** If the requested page is already in the page cache, then the page cache -** implementation must return a pointer to the page buffer with its content -** intact. If the requested page is not already in the cache, then the -** cache implementation should use the value of the createFlag -** parameter to help it determined what action to take: -** -** -**
createFlag Behavior when page is not already in cache -**
0 Do not allocate a new page. Return NULL. -**
1 Allocate a new page if it easy and convenient to do so. -** Otherwise return NULL. -**
2 Make every effort to allocate a new page. Only return -** NULL if allocating a new page is effectively impossible. -**
-** -** ^(SQLite will normally invoke xFetch() with a createFlag of 0 or 1. SQLite -** will only use a createFlag of 2 after a prior call with a createFlag of 1 -** failed.)^ In between the to xFetch() calls, SQLite may -** attempt to unpin one or more cache pages by spilling the content of -** pinned pages to disk and synching the operating system disk cache. -** -** [[the xUnpin() page cache method]] -** ^xUnpin() is called by SQLite with a pointer to a currently pinned page -** as its second argument. If the third parameter, discard, is non-zero, -** then the page must be evicted from the cache. -** ^If the discard parameter is -** zero, then the page may be discarded or retained at the discretion of -** page cache implementation. ^The page cache implementation -** may choose to evict unpinned pages at any time. -** -** The cache must not perform any reference counting. A single -** call to xUnpin() unpins the page regardless of the number of prior calls -** to xFetch(). -** -** [[the xRekey() page cache methods]] -** The xRekey() method is used to change the key value associated with the -** page passed as the second argument. If the cache -** previously contains an entry associated with newKey, it must be -** discarded. ^Any prior cache entry associated with newKey is guaranteed not -** to be pinned. -** -** When SQLite calls the xTruncate() method, the cache must discard all -** existing cache entries with page numbers (keys) greater than or equal -** to the value of the iLimit parameter passed to xTruncate(). If any -** of these pages are pinned, they are implicitly unpinned, meaning that -** they can be safely discarded. -** -** [[the xDestroy() page cache method]] -** ^The xDestroy() method is used to delete a cache allocated by xCreate(). -** All resources associated with the specified cache should be freed. ^After -** calling the xDestroy() method, SQLite considers the [sqlite3_pcache*] -** handle invalid, and will not use it with any other sqlite3_pcache_methods2 -** functions. -** -** [[the xShrink() page cache method]] -** ^SQLite invokes the xShrink() method when it wants the page cache to -** free up as much of heap memory as possible. The page cache implementation -** is not obligated to free any memory, but well-behaved implementations should -** do their best. -*/ -typedef struct sqlite3_pcache_methods2 sqlite3_pcache_methods2; -struct sqlite3_pcache_methods2 { - int iVersion; - void *pArg; - int (*xInit)(void*); - void (*xShutdown)(void*); - sqlite3_pcache *(*xCreate)(int szPage, int szExtra, int bPurgeable); - void (*xCachesize)(sqlite3_pcache*, int nCachesize); - int (*xPagecount)(sqlite3_pcache*); - sqlite3_pcache_page *(*xFetch)(sqlite3_pcache*, unsigned key, int createFlag); - void (*xUnpin)(sqlite3_pcache*, sqlite3_pcache_page*, int discard); - void (*xRekey)(sqlite3_pcache*, sqlite3_pcache_page*, - unsigned oldKey, unsigned newKey); - void (*xTruncate)(sqlite3_pcache*, unsigned iLimit); - void (*xDestroy)(sqlite3_pcache*); - void (*xShrink)(sqlite3_pcache*); -}; - -/* -** This is the obsolete pcache_methods object that has now been replaced -** by sqlite3_pcache_methods2. This object is not used by SQLite. It is -** retained in the header file for backwards compatibility only. -*/ -typedef struct sqlite3_pcache_methods sqlite3_pcache_methods; -struct sqlite3_pcache_methods { - void *pArg; - int (*xInit)(void*); - void (*xShutdown)(void*); - sqlite3_pcache *(*xCreate)(int szPage, int bPurgeable); - void (*xCachesize)(sqlite3_pcache*, int nCachesize); - int (*xPagecount)(sqlite3_pcache*); - void *(*xFetch)(sqlite3_pcache*, unsigned key, int createFlag); - void (*xUnpin)(sqlite3_pcache*, void*, int discard); - void (*xRekey)(sqlite3_pcache*, void*, unsigned oldKey, unsigned newKey); - void (*xTruncate)(sqlite3_pcache*, unsigned iLimit); - void (*xDestroy)(sqlite3_pcache*); -}; - - -/* -** CAPI3REF: Online Backup Object -** -** The sqlite3_backup object records state information about an ongoing -** online backup operation. ^The sqlite3_backup object is created by -** a call to [sqlite3_backup_init()] and is destroyed by a call to -** [sqlite3_backup_finish()]. -** -** See Also: [Using the SQLite Online Backup API] -*/ -typedef struct sqlite3_backup sqlite3_backup; - -/* -** CAPI3REF: Online Backup API. -** -** The backup API copies the content of one database into another. -** It is useful either for creating backups of databases or -** for copying in-memory databases to or from persistent files. -** -** See Also: [Using the SQLite Online Backup API] -** -** ^SQLite holds a write transaction open on the destination database file -** for the duration of the backup operation. -** ^The source database is read-locked only while it is being read; -** it is not locked continuously for the entire backup operation. -** ^Thus, the backup may be performed on a live source database without -** preventing other database connections from -** reading or writing to the source database while the backup is underway. -** -** ^(To perform a backup operation: -**
    -**
  1. sqlite3_backup_init() is called once to initialize the -** backup, -**
  2. sqlite3_backup_step() is called one or more times to transfer -** the data between the two databases, and finally -**
  3. sqlite3_backup_finish() is called to release all resources -** associated with the backup operation. -**
)^ -** There should be exactly one call to sqlite3_backup_finish() for each -** successful call to sqlite3_backup_init(). -** -** [[sqlite3_backup_init()]] sqlite3_backup_init() -** -** ^The D and N arguments to sqlite3_backup_init(D,N,S,M) are the -** [database connection] associated with the destination database -** and the database name, respectively. -** ^The database name is "main" for the main database, "temp" for the -** temporary database, or the name specified after the AS keyword in -** an [ATTACH] statement for an attached database. -** ^The S and M arguments passed to -** sqlite3_backup_init(D,N,S,M) identify the [database connection] -** and database name of the source database, respectively. -** ^The source and destination [database connections] (parameters S and D) -** must be different or else sqlite3_backup_init(D,N,S,M) will fail with -** an error. -** -** ^A call to sqlite3_backup_init() will fail, returning NULL, if -** there is already a read or read-write transaction open on the -** destination database. -** -** ^If an error occurs within sqlite3_backup_init(D,N,S,M), then NULL is -** returned and an error code and error message are stored in the -** destination [database connection] D. -** ^The error code and message for the failed call to sqlite3_backup_init() -** can be retrieved using the [sqlite3_errcode()], [sqlite3_errmsg()], and/or -** [sqlite3_errmsg16()] functions. -** ^A successful call to sqlite3_backup_init() returns a pointer to an -** [sqlite3_backup] object. -** ^The [sqlite3_backup] object may be used with the sqlite3_backup_step() and -** sqlite3_backup_finish() functions to perform the specified backup -** operation. -** -** [[sqlite3_backup_step()]] sqlite3_backup_step() -** -** ^Function sqlite3_backup_step(B,N) will copy up to N pages between -** the source and destination databases specified by [sqlite3_backup] object B. -** ^If N is negative, all remaining source pages are copied. -** ^If sqlite3_backup_step(B,N) successfully copies N pages and there -** are still more pages to be copied, then the function returns [SQLITE_OK]. -** ^If sqlite3_backup_step(B,N) successfully finishes copying all pages -** from source to destination, then it returns [SQLITE_DONE]. -** ^If an error occurs while running sqlite3_backup_step(B,N), -** then an [error code] is returned. ^As well as [SQLITE_OK] and -** [SQLITE_DONE], a call to sqlite3_backup_step() may return [SQLITE_READONLY], -** [SQLITE_NOMEM], [SQLITE_BUSY], [SQLITE_LOCKED], or an -** [SQLITE_IOERR_ACCESS | SQLITE_IOERR_XXX] extended error code. -** -** ^(The sqlite3_backup_step() might return [SQLITE_READONLY] if -**
    -**
  1. the destination database was opened read-only, or -**
  2. the destination database is using write-ahead-log journaling -** and the destination and source page sizes differ, or -**
  3. the destination database is an in-memory database and the -** destination and source page sizes differ. -**
)^ -** -** ^If sqlite3_backup_step() cannot obtain a required file-system lock, then -** the [sqlite3_busy_handler | busy-handler function] -** is invoked (if one is specified). ^If the -** busy-handler returns non-zero before the lock is available, then -** [SQLITE_BUSY] is returned to the caller. ^In this case the call to -** sqlite3_backup_step() can be retried later. ^If the source -** [database connection] -** is being used to write to the source database when sqlite3_backup_step() -** is called, then [SQLITE_LOCKED] is returned immediately. ^Again, in this -** case the call to sqlite3_backup_step() can be retried later on. ^(If -** [SQLITE_IOERR_ACCESS | SQLITE_IOERR_XXX], [SQLITE_NOMEM], or -** [SQLITE_READONLY] is returned, then -** there is no point in retrying the call to sqlite3_backup_step(). These -** errors are considered fatal.)^ The application must accept -** that the backup operation has failed and pass the backup operation handle -** to the sqlite3_backup_finish() to release associated resources. -** -** ^The first call to sqlite3_backup_step() obtains an exclusive lock -** on the destination file. ^The exclusive lock is not released until either -** sqlite3_backup_finish() is called or the backup operation is complete -** and sqlite3_backup_step() returns [SQLITE_DONE]. ^Every call to -** sqlite3_backup_step() obtains a [shared lock] on the source database that -** lasts for the duration of the sqlite3_backup_step() call. -** ^Because the source database is not locked between calls to -** sqlite3_backup_step(), the source database may be modified mid-way -** through the backup process. ^If the source database is modified by an -** external process or via a database connection other than the one being -** used by the backup operation, then the backup will be automatically -** restarted by the next call to sqlite3_backup_step(). ^If the source -** database is modified by the using the same database connection as is used -** by the backup operation, then the backup database is automatically -** updated at the same time. -** -** [[sqlite3_backup_finish()]] sqlite3_backup_finish() -** -** When sqlite3_backup_step() has returned [SQLITE_DONE], or when the -** application wishes to abandon the backup operation, the application -** should destroy the [sqlite3_backup] by passing it to sqlite3_backup_finish(). -** ^The sqlite3_backup_finish() interfaces releases all -** resources associated with the [sqlite3_backup] object. -** ^If sqlite3_backup_step() has not yet returned [SQLITE_DONE], then any -** active write-transaction on the destination database is rolled back. -** The [sqlite3_backup] object is invalid -** and may not be used following a call to sqlite3_backup_finish(). -** -** ^The value returned by sqlite3_backup_finish is [SQLITE_OK] if no -** sqlite3_backup_step() errors occurred, regardless or whether or not -** sqlite3_backup_step() completed. -** ^If an out-of-memory condition or IO error occurred during any prior -** sqlite3_backup_step() call on the same [sqlite3_backup] object, then -** sqlite3_backup_finish() returns the corresponding [error code]. -** -** ^A return of [SQLITE_BUSY] or [SQLITE_LOCKED] from sqlite3_backup_step() -** is not a permanent error and does not affect the return value of -** sqlite3_backup_finish(). -** -** [[sqlite3_backup_remaining()]] [[sqlite3_backup_pagecount()]] -** sqlite3_backup_remaining() and sqlite3_backup_pagecount() -** -** ^The sqlite3_backup_remaining() routine returns the number of pages still -** to be backed up at the conclusion of the most recent sqlite3_backup_step(). -** ^The sqlite3_backup_pagecount() routine returns the total number of pages -** in the source database at the conclusion of the most recent -** sqlite3_backup_step(). -** ^(The values returned by these functions are only updated by -** sqlite3_backup_step(). If the source database is modified in a way that -** changes the size of the source database or the number of pages remaining, -** those changes are not reflected in the output of sqlite3_backup_pagecount() -** and sqlite3_backup_remaining() until after the next -** sqlite3_backup_step().)^ -** -** Concurrent Usage of Database Handles -** -** ^The source [database connection] may be used by the application for other -** purposes while a backup operation is underway or being initialized. -** ^If SQLite is compiled and configured to support threadsafe database -** connections, then the source database connection may be used concurrently -** from within other threads. -** -** However, the application must guarantee that the destination -** [database connection] is not passed to any other API (by any thread) after -** sqlite3_backup_init() is called and before the corresponding call to -** sqlite3_backup_finish(). SQLite does not currently check to see -** if the application incorrectly accesses the destination [database connection] -** and so no error code is reported, but the operations may malfunction -** nevertheless. Use of the destination database connection while a -** backup is in progress might also also cause a mutex deadlock. -** -** If running in [shared cache mode], the application must -** guarantee that the shared cache used by the destination database -** is not accessed while the backup is running. In practice this means -** that the application must guarantee that the disk file being -** backed up to is not accessed by any connection within the process, -** not just the specific connection that was passed to sqlite3_backup_init(). -** -** The [sqlite3_backup] object itself is partially threadsafe. Multiple -** threads may safely make multiple concurrent calls to sqlite3_backup_step(). -** However, the sqlite3_backup_remaining() and sqlite3_backup_pagecount() -** APIs are not strictly speaking threadsafe. If they are invoked at the -** same time as another thread is invoking sqlite3_backup_step() it is -** possible that they return invalid values. -*/ -SQLITE_API sqlite3_backup *SQLITE_STDCALL sqlite3_backup_init( - sqlite3 *pDest, /* Destination database handle */ - const char *zDestName, /* Destination database name */ - sqlite3 *pSource, /* Source database handle */ - const char *zSourceName /* Source database name */ -); -SQLITE_API int SQLITE_STDCALL sqlite3_backup_step(sqlite3_backup *p, int nPage); -SQLITE_API int SQLITE_STDCALL sqlite3_backup_finish(sqlite3_backup *p); -SQLITE_API int SQLITE_STDCALL sqlite3_backup_remaining(sqlite3_backup *p); -SQLITE_API int SQLITE_STDCALL sqlite3_backup_pagecount(sqlite3_backup *p); - -/* -** CAPI3REF: Unlock Notification -** METHOD: sqlite3 -** -** ^When running in shared-cache mode, a database operation may fail with -** an [SQLITE_LOCKED] error if the required locks on the shared-cache or -** individual tables within the shared-cache cannot be obtained. See -** [SQLite Shared-Cache Mode] for a description of shared-cache locking. -** ^This API may be used to register a callback that SQLite will invoke -** when the connection currently holding the required lock relinquishes it. -** ^This API is only available if the library was compiled with the -** [SQLITE_ENABLE_UNLOCK_NOTIFY] C-preprocessor symbol defined. -** -** See Also: [Using the SQLite Unlock Notification Feature]. -** -** ^Shared-cache locks are released when a database connection concludes -** its current transaction, either by committing it or rolling it back. -** -** ^When a connection (known as the blocked connection) fails to obtain a -** shared-cache lock and SQLITE_LOCKED is returned to the caller, the -** identity of the database connection (the blocking connection) that -** has locked the required resource is stored internally. ^After an -** application receives an SQLITE_LOCKED error, it may call the -** sqlite3_unlock_notify() method with the blocked connection handle as -** the first argument to register for a callback that will be invoked -** when the blocking connections current transaction is concluded. ^The -** callback is invoked from within the [sqlite3_step] or [sqlite3_close] -** call that concludes the blocking connections transaction. -** -** ^(If sqlite3_unlock_notify() is called in a multi-threaded application, -** there is a chance that the blocking connection will have already -** concluded its transaction by the time sqlite3_unlock_notify() is invoked. -** If this happens, then the specified callback is invoked immediately, -** from within the call to sqlite3_unlock_notify().)^ -** -** ^If the blocked connection is attempting to obtain a write-lock on a -** shared-cache table, and more than one other connection currently holds -** a read-lock on the same table, then SQLite arbitrarily selects one of -** the other connections to use as the blocking connection. -** -** ^(There may be at most one unlock-notify callback registered by a -** blocked connection. If sqlite3_unlock_notify() is called when the -** blocked connection already has a registered unlock-notify callback, -** then the new callback replaces the old.)^ ^If sqlite3_unlock_notify() is -** called with a NULL pointer as its second argument, then any existing -** unlock-notify callback is canceled. ^The blocked connections -** unlock-notify callback may also be canceled by closing the blocked -** connection using [sqlite3_close()]. -** -** The unlock-notify callback is not reentrant. If an application invokes -** any sqlite3_xxx API functions from within an unlock-notify callback, a -** crash or deadlock may be the result. -** -** ^Unless deadlock is detected (see below), sqlite3_unlock_notify() always -** returns SQLITE_OK. -** -** Callback Invocation Details -** -** When an unlock-notify callback is registered, the application provides a -** single void* pointer that is passed to the callback when it is invoked. -** However, the signature of the callback function allows SQLite to pass -** it an array of void* context pointers. The first argument passed to -** an unlock-notify callback is a pointer to an array of void* pointers, -** and the second is the number of entries in the array. -** -** When a blocking connections transaction is concluded, there may be -** more than one blocked connection that has registered for an unlock-notify -** callback. ^If two or more such blocked connections have specified the -** same callback function, then instead of invoking the callback function -** multiple times, it is invoked once with the set of void* context pointers -** specified by the blocked connections bundled together into an array. -** This gives the application an opportunity to prioritize any actions -** related to the set of unblocked database connections. -** -** Deadlock Detection -** -** Assuming that after registering for an unlock-notify callback a -** database waits for the callback to be issued before taking any further -** action (a reasonable assumption), then using this API may cause the -** application to deadlock. For example, if connection X is waiting for -** connection Y's transaction to be concluded, and similarly connection -** Y is waiting on connection X's transaction, then neither connection -** will proceed and the system may remain deadlocked indefinitely. -** -** To avoid this scenario, the sqlite3_unlock_notify() performs deadlock -** detection. ^If a given call to sqlite3_unlock_notify() would put the -** system in a deadlocked state, then SQLITE_LOCKED is returned and no -** unlock-notify callback is registered. The system is said to be in -** a deadlocked state if connection A has registered for an unlock-notify -** callback on the conclusion of connection B's transaction, and connection -** B has itself registered for an unlock-notify callback when connection -** A's transaction is concluded. ^Indirect deadlock is also detected, so -** the system is also considered to be deadlocked if connection B has -** registered for an unlock-notify callback on the conclusion of connection -** C's transaction, where connection C is waiting on connection A. ^Any -** number of levels of indirection are allowed. -** -** The "DROP TABLE" Exception -** -** When a call to [sqlite3_step()] returns SQLITE_LOCKED, it is almost -** always appropriate to call sqlite3_unlock_notify(). There is however, -** one exception. When executing a "DROP TABLE" or "DROP INDEX" statement, -** SQLite checks if there are any currently executing SELECT statements -** that belong to the same connection. If there are, SQLITE_LOCKED is -** returned. In this case there is no "blocking connection", so invoking -** sqlite3_unlock_notify() results in the unlock-notify callback being -** invoked immediately. If the application then re-attempts the "DROP TABLE" -** or "DROP INDEX" query, an infinite loop might be the result. -** -** One way around this problem is to check the extended error code returned -** by an sqlite3_step() call. ^(If there is a blocking connection, then the -** extended error code is set to SQLITE_LOCKED_SHAREDCACHE. Otherwise, in -** the special "DROP TABLE/INDEX" case, the extended error code is just -** SQLITE_LOCKED.)^ -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_unlock_notify( - sqlite3 *pBlocked, /* Waiting connection */ - void (*xNotify)(void **apArg, int nArg), /* Callback function to invoke */ - void *pNotifyArg /* Argument to pass to xNotify */ -); - - -/* -** CAPI3REF: String Comparison -** -** ^The [sqlite3_stricmp()] and [sqlite3_strnicmp()] APIs allow applications -** and extensions to compare the contents of two buffers containing UTF-8 -** strings in a case-independent fashion, using the same definition of "case -** independence" that SQLite uses internally when comparing identifiers. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_stricmp(const char *, const char *); -SQLITE_API int SQLITE_STDCALL sqlite3_strnicmp(const char *, const char *, int); - -/* -** CAPI3REF: String Globbing -* -** ^The [sqlite3_strglob(P,X)] interface returns zero if and only if -** string X matches the [GLOB] pattern P. -** ^The definition of [GLOB] pattern matching used in -** [sqlite3_strglob(P,X)] is the same as for the "X GLOB P" operator in the -** SQL dialect understood by SQLite. ^The [sqlite3_strglob(P,X)] function -** is case sensitive. -** -** Note that this routine returns zero on a match and non-zero if the strings -** do not match, the same as [sqlite3_stricmp()] and [sqlite3_strnicmp()]. -** -** See also: [sqlite3_strlike()]. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_strglob(const char *zGlob, const char *zStr); - -/* -** CAPI3REF: String LIKE Matching -* -** ^The [sqlite3_strlike(P,X,E)] interface returns zero if and only if -** string X matches the [LIKE] pattern P with escape character E. -** ^The definition of [LIKE] pattern matching used in -** [sqlite3_strlike(P,X,E)] is the same as for the "X LIKE P ESCAPE E" -** operator in the SQL dialect understood by SQLite. ^For "X LIKE P" without -** the ESCAPE clause, set the E parameter of [sqlite3_strlike(P,X,E)] to 0. -** ^As with the LIKE operator, the [sqlite3_strlike(P,X,E)] function is case -** insensitive - equivalent upper and lower case ASCII characters match -** one another. -** -** ^The [sqlite3_strlike(P,X,E)] function matches Unicode characters, though -** only ASCII characters are case folded. -** -** Note that this routine returns zero on a match and non-zero if the strings -** do not match, the same as [sqlite3_stricmp()] and [sqlite3_strnicmp()]. -** -** See also: [sqlite3_strglob()]. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_strlike(const char *zGlob, const char *zStr, unsigned int cEsc); - -/* -** CAPI3REF: Error Logging Interface -** -** ^The [sqlite3_log()] interface writes a message into the [error log] -** established by the [SQLITE_CONFIG_LOG] option to [sqlite3_config()]. -** ^If logging is enabled, the zFormat string and subsequent arguments are -** used with [sqlite3_snprintf()] to generate the final output string. -** -** The sqlite3_log() interface is intended for use by extensions such as -** virtual tables, collating functions, and SQL functions. While there is -** nothing to prevent an application from calling sqlite3_log(), doing so -** is considered bad form. -** -** The zFormat string must not be NULL. -** -** To avoid deadlocks and other threading problems, the sqlite3_log() routine -** will not use dynamically allocated memory. The log message is stored in -** a fixed-length buffer on the stack. If the log message is longer than -** a few hundred characters, it will be truncated to the length of the -** buffer. -*/ -SQLITE_API void SQLITE_CDECL sqlite3_log(int iErrCode, const char *zFormat, ...); - -/* -** CAPI3REF: Write-Ahead Log Commit Hook -** METHOD: sqlite3 -** -** ^The [sqlite3_wal_hook()] function is used to register a callback that -** is invoked each time data is committed to a database in wal mode. -** -** ^(The callback is invoked by SQLite after the commit has taken place and -** the associated write-lock on the database released)^, so the implementation -** may read, write or [checkpoint] the database as required. -** -** ^The first parameter passed to the callback function when it is invoked -** is a copy of the third parameter passed to sqlite3_wal_hook() when -** registering the callback. ^The second is a copy of the database handle. -** ^The third parameter is the name of the database that was written to - -** either "main" or the name of an [ATTACH]-ed database. ^The fourth parameter -** is the number of pages currently in the write-ahead log file, -** including those that were just committed. -** -** The callback function should normally return [SQLITE_OK]. ^If an error -** code is returned, that error will propagate back up through the -** SQLite code base to cause the statement that provoked the callback -** to report an error, though the commit will have still occurred. If the -** callback returns [SQLITE_ROW] or [SQLITE_DONE], or if it returns a value -** that does not correspond to any valid SQLite error code, the results -** are undefined. -** -** A single database handle may have at most a single write-ahead log callback -** registered at one time. ^Calling [sqlite3_wal_hook()] replaces any -** previously registered write-ahead log callback. ^Note that the -** [sqlite3_wal_autocheckpoint()] interface and the -** [wal_autocheckpoint pragma] both invoke [sqlite3_wal_hook()] and will -** overwrite any prior [sqlite3_wal_hook()] settings. -*/ -SQLITE_API void *SQLITE_STDCALL sqlite3_wal_hook( - sqlite3*, - int(*)(void *,sqlite3*,const char*,int), - void* -); - -/* -** CAPI3REF: Configure an auto-checkpoint -** METHOD: sqlite3 -** -** ^The [sqlite3_wal_autocheckpoint(D,N)] is a wrapper around -** [sqlite3_wal_hook()] that causes any database on [database connection] D -** to automatically [checkpoint] -** after committing a transaction if there are N or -** more frames in the [write-ahead log] file. ^Passing zero or -** a negative value as the nFrame parameter disables automatic -** checkpoints entirely. -** -** ^The callback registered by this function replaces any existing callback -** registered using [sqlite3_wal_hook()]. ^Likewise, registering a callback -** using [sqlite3_wal_hook()] disables the automatic checkpoint mechanism -** configured by this function. -** -** ^The [wal_autocheckpoint pragma] can be used to invoke this interface -** from SQL. -** -** ^Checkpoints initiated by this mechanism are -** [sqlite3_wal_checkpoint_v2|PASSIVE]. -** -** ^Every new [database connection] defaults to having the auto-checkpoint -** enabled with a threshold of 1000 or [SQLITE_DEFAULT_WAL_AUTOCHECKPOINT] -** pages. The use of this interface -** is only necessary if the default setting is found to be suboptimal -** for a particular application. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_wal_autocheckpoint(sqlite3 *db, int N); - -/* -** CAPI3REF: Checkpoint a database -** METHOD: sqlite3 -** -** ^(The sqlite3_wal_checkpoint(D,X) is equivalent to -** [sqlite3_wal_checkpoint_v2](D,X,[SQLITE_CHECKPOINT_PASSIVE],0,0).)^ -** -** In brief, sqlite3_wal_checkpoint(D,X) causes the content in the -** [write-ahead log] for database X on [database connection] D to be -** transferred into the database file and for the write-ahead log to -** be reset. See the [checkpointing] documentation for addition -** information. -** -** This interface used to be the only way to cause a checkpoint to -** occur. But then the newer and more powerful [sqlite3_wal_checkpoint_v2()] -** interface was added. This interface is retained for backwards -** compatibility and as a convenience for applications that need to manually -** start a callback but which do not need the full power (and corresponding -** complication) of [sqlite3_wal_checkpoint_v2()]. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_wal_checkpoint(sqlite3 *db, const char *zDb); - -/* -** CAPI3REF: Checkpoint a database -** METHOD: sqlite3 -** -** ^(The sqlite3_wal_checkpoint_v2(D,X,M,L,C) interface runs a checkpoint -** operation on database X of [database connection] D in mode M. Status -** information is written back into integers pointed to by L and C.)^ -** ^(The M parameter must be a valid [checkpoint mode]:)^ -** -**
-**
SQLITE_CHECKPOINT_PASSIVE
-** ^Checkpoint as many frames as possible without waiting for any database -** readers or writers to finish, then sync the database file if all frames -** in the log were checkpointed. ^The [busy-handler callback] -** is never invoked in the SQLITE_CHECKPOINT_PASSIVE mode. -** ^On the other hand, passive mode might leave the checkpoint unfinished -** if there are concurrent readers or writers. -** -**
SQLITE_CHECKPOINT_FULL
-** ^This mode blocks (it invokes the -** [sqlite3_busy_handler|busy-handler callback]) until there is no -** database writer and all readers are reading from the most recent database -** snapshot. ^It then checkpoints all frames in the log file and syncs the -** database file. ^This mode blocks new database writers while it is pending, -** but new database readers are allowed to continue unimpeded. -** -**
SQLITE_CHECKPOINT_RESTART
-** ^This mode works the same way as SQLITE_CHECKPOINT_FULL with the addition -** that after checkpointing the log file it blocks (calls the -** [busy-handler callback]) -** until all readers are reading from the database file only. ^This ensures -** that the next writer will restart the log file from the beginning. -** ^Like SQLITE_CHECKPOINT_FULL, this mode blocks new -** database writer attempts while it is pending, but does not impede readers. -** -**
SQLITE_CHECKPOINT_TRUNCATE
-** ^This mode works the same way as SQLITE_CHECKPOINT_RESTART with the -** addition that it also truncates the log file to zero bytes just prior -** to a successful return. -**
-** -** ^If pnLog is not NULL, then *pnLog is set to the total number of frames in -** the log file or to -1 if the checkpoint could not run because -** of an error or because the database is not in [WAL mode]. ^If pnCkpt is not -** NULL,then *pnCkpt is set to the total number of checkpointed frames in the -** log file (including any that were already checkpointed before the function -** was called) or to -1 if the checkpoint could not run due to an error or -** because the database is not in WAL mode. ^Note that upon successful -** completion of an SQLITE_CHECKPOINT_TRUNCATE, the log file will have been -** truncated to zero bytes and so both *pnLog and *pnCkpt will be set to zero. -** -** ^All calls obtain an exclusive "checkpoint" lock on the database file. ^If -** any other process is running a checkpoint operation at the same time, the -** lock cannot be obtained and SQLITE_BUSY is returned. ^Even if there is a -** busy-handler configured, it will not be invoked in this case. -** -** ^The SQLITE_CHECKPOINT_FULL, RESTART and TRUNCATE modes also obtain the -** exclusive "writer" lock on the database file. ^If the writer lock cannot be -** obtained immediately, and a busy-handler is configured, it is invoked and -** the writer lock retried until either the busy-handler returns 0 or the lock -** is successfully obtained. ^The busy-handler is also invoked while waiting for -** database readers as described above. ^If the busy-handler returns 0 before -** the writer lock is obtained or while waiting for database readers, the -** checkpoint operation proceeds from that point in the same way as -** SQLITE_CHECKPOINT_PASSIVE - checkpointing as many frames as possible -** without blocking any further. ^SQLITE_BUSY is returned in this case. -** -** ^If parameter zDb is NULL or points to a zero length string, then the -** specified operation is attempted on all WAL databases [attached] to -** [database connection] db. In this case the -** values written to output parameters *pnLog and *pnCkpt are undefined. ^If -** an SQLITE_BUSY error is encountered when processing one or more of the -** attached WAL databases, the operation is still attempted on any remaining -** attached databases and SQLITE_BUSY is returned at the end. ^If any other -** error occurs while processing an attached database, processing is abandoned -** and the error code is returned to the caller immediately. ^If no error -** (SQLITE_BUSY or otherwise) is encountered while processing the attached -** databases, SQLITE_OK is returned. -** -** ^If database zDb is the name of an attached database that is not in WAL -** mode, SQLITE_OK is returned and both *pnLog and *pnCkpt set to -1. ^If -** zDb is not NULL (or a zero length string) and is not the name of any -** attached database, SQLITE_ERROR is returned to the caller. -** -** ^Unless it returns SQLITE_MISUSE, -** the sqlite3_wal_checkpoint_v2() interface -** sets the error information that is queried by -** [sqlite3_errcode()] and [sqlite3_errmsg()]. -** -** ^The [PRAGMA wal_checkpoint] command can be used to invoke this interface -** from SQL. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_wal_checkpoint_v2( - sqlite3 *db, /* Database handle */ - const char *zDb, /* Name of attached database (or NULL) */ - int eMode, /* SQLITE_CHECKPOINT_* value */ - int *pnLog, /* OUT: Size of WAL log in frames */ - int *pnCkpt /* OUT: Total number of frames checkpointed */ -); - -/* -** CAPI3REF: Checkpoint Mode Values -** KEYWORDS: {checkpoint mode} -** -** These constants define all valid values for the "checkpoint mode" passed -** as the third parameter to the [sqlite3_wal_checkpoint_v2()] interface. -** See the [sqlite3_wal_checkpoint_v2()] documentation for details on the -** meaning of each of these checkpoint modes. -*/ -#define SQLITE_CHECKPOINT_PASSIVE 0 /* Do as much as possible w/o blocking */ -#define SQLITE_CHECKPOINT_FULL 1 /* Wait for writers, then checkpoint */ -#define SQLITE_CHECKPOINT_RESTART 2 /* Like FULL but wait for for readers */ -#define SQLITE_CHECKPOINT_TRUNCATE 3 /* Like RESTART but also truncate WAL */ - -/* -** CAPI3REF: Virtual Table Interface Configuration -** -** This function may be called by either the [xConnect] or [xCreate] method -** of a [virtual table] implementation to configure -** various facets of the virtual table interface. -** -** If this interface is invoked outside the context of an xConnect or -** xCreate virtual table method then the behavior is undefined. -** -** At present, there is only one option that may be configured using -** this function. (See [SQLITE_VTAB_CONSTRAINT_SUPPORT].) Further options -** may be added in the future. -*/ -SQLITE_API int SQLITE_CDECL sqlite3_vtab_config(sqlite3*, int op, ...); - -/* -** CAPI3REF: Virtual Table Configuration Options -** -** These macros define the various options to the -** [sqlite3_vtab_config()] interface that [virtual table] implementations -** can use to customize and optimize their behavior. -** -**
-**
SQLITE_VTAB_CONSTRAINT_SUPPORT -**
Calls of the form -** [sqlite3_vtab_config](db,SQLITE_VTAB_CONSTRAINT_SUPPORT,X) are supported, -** where X is an integer. If X is zero, then the [virtual table] whose -** [xCreate] or [xConnect] method invoked [sqlite3_vtab_config()] does not -** support constraints. In this configuration (which is the default) if -** a call to the [xUpdate] method returns [SQLITE_CONSTRAINT], then the entire -** statement is rolled back as if [ON CONFLICT | OR ABORT] had been -** specified as part of the users SQL statement, regardless of the actual -** ON CONFLICT mode specified. -** -** If X is non-zero, then the virtual table implementation guarantees -** that if [xUpdate] returns [SQLITE_CONSTRAINT], it will do so before -** any modifications to internal or persistent data structures have been made. -** If the [ON CONFLICT] mode is ABORT, FAIL, IGNORE or ROLLBACK, SQLite -** is able to roll back a statement or database transaction, and abandon -** or continue processing the current SQL statement as appropriate. -** If the ON CONFLICT mode is REPLACE and the [xUpdate] method returns -** [SQLITE_CONSTRAINT], SQLite handles this as if the ON CONFLICT mode -** had been ABORT. -** -** Virtual table implementations that are required to handle OR REPLACE -** must do so within the [xUpdate] method. If a call to the -** [sqlite3_vtab_on_conflict()] function indicates that the current ON -** CONFLICT policy is REPLACE, the virtual table implementation should -** silently replace the appropriate rows within the xUpdate callback and -** return SQLITE_OK. Or, if this is not possible, it may return -** SQLITE_CONSTRAINT, in which case SQLite falls back to OR ABORT -** constraint handling. -**
-*/ -#define SQLITE_VTAB_CONSTRAINT_SUPPORT 1 - -/* -** CAPI3REF: Determine The Virtual Table Conflict Policy -** -** This function may only be called from within a call to the [xUpdate] method -** of a [virtual table] implementation for an INSERT or UPDATE operation. ^The -** value returned is one of [SQLITE_ROLLBACK], [SQLITE_IGNORE], [SQLITE_FAIL], -** [SQLITE_ABORT], or [SQLITE_REPLACE], according to the [ON CONFLICT] mode -** of the SQL statement that triggered the call to the [xUpdate] method of the -** [virtual table]. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_vtab_on_conflict(sqlite3 *); - -/* -** CAPI3REF: Conflict resolution modes -** KEYWORDS: {conflict resolution mode} -** -** These constants are returned by [sqlite3_vtab_on_conflict()] to -** inform a [virtual table] implementation what the [ON CONFLICT] mode -** is for the SQL statement being evaluated. -** -** Note that the [SQLITE_IGNORE] constant is also used as a potential -** return value from the [sqlite3_set_authorizer()] callback and that -** [SQLITE_ABORT] is also a [result code]. -*/ -#define SQLITE_ROLLBACK 1 -/* #define SQLITE_IGNORE 2 // Also used by sqlite3_authorizer() callback */ -#define SQLITE_FAIL 3 -/* #define SQLITE_ABORT 4 // Also an error code */ -#define SQLITE_REPLACE 5 - -/* -** CAPI3REF: Prepared Statement Scan Status Opcodes -** KEYWORDS: {scanstatus options} -** -** The following constants can be used for the T parameter to the -** [sqlite3_stmt_scanstatus(S,X,T,V)] interface. Each constant designates a -** different metric for sqlite3_stmt_scanstatus() to return. -** -** When the value returned to V is a string, space to hold that string is -** managed by the prepared statement S and will be automatically freed when -** S is finalized. -** -**
-** [[SQLITE_SCANSTAT_NLOOP]]
SQLITE_SCANSTAT_NLOOP
-**
^The [sqlite3_int64] variable pointed to by the T parameter will be -** set to the total number of times that the X-th loop has run.
-** -** [[SQLITE_SCANSTAT_NVISIT]]
SQLITE_SCANSTAT_NVISIT
-**
^The [sqlite3_int64] variable pointed to by the T parameter will be set -** to the total number of rows examined by all iterations of the X-th loop.
-** -** [[SQLITE_SCANSTAT_EST]]
SQLITE_SCANSTAT_EST
-**
^The "double" variable pointed to by the T parameter will be set to the -** query planner's estimate for the average number of rows output from each -** iteration of the X-th loop. If the query planner's estimates was accurate, -** then this value will approximate the quotient NVISIT/NLOOP and the -** product of this value for all prior loops with the same SELECTID will -** be the NLOOP value for the current loop. -** -** [[SQLITE_SCANSTAT_NAME]]
SQLITE_SCANSTAT_NAME
-**
^The "const char *" variable pointed to by the T parameter will be set -** to a zero-terminated UTF-8 string containing the name of the index or table -** used for the X-th loop. -** -** [[SQLITE_SCANSTAT_EXPLAIN]]
SQLITE_SCANSTAT_EXPLAIN
-**
^The "const char *" variable pointed to by the T parameter will be set -** to a zero-terminated UTF-8 string containing the [EXPLAIN QUERY PLAN] -** description for the X-th loop. -** -** [[SQLITE_SCANSTAT_SELECTID]]
SQLITE_SCANSTAT_SELECT
-**
^The "int" variable pointed to by the T parameter will be set to the -** "select-id" for the X-th loop. The select-id identifies which query or -** subquery the loop is part of. The main query has a select-id of zero. -** The select-id is the same value as is output in the first column -** of an [EXPLAIN QUERY PLAN] query. -**
-*/ -#define SQLITE_SCANSTAT_NLOOP 0 -#define SQLITE_SCANSTAT_NVISIT 1 -#define SQLITE_SCANSTAT_EST 2 -#define SQLITE_SCANSTAT_NAME 3 -#define SQLITE_SCANSTAT_EXPLAIN 4 -#define SQLITE_SCANSTAT_SELECTID 5 - -/* -** CAPI3REF: Prepared Statement Scan Status -** METHOD: sqlite3_stmt -** -** This interface returns information about the predicted and measured -** performance for pStmt. Advanced applications can use this -** interface to compare the predicted and the measured performance and -** issue warnings and/or rerun [ANALYZE] if discrepancies are found. -** -** Since this interface is expected to be rarely used, it is only -** available if SQLite is compiled using the [SQLITE_ENABLE_STMT_SCANSTATUS] -** compile-time option. -** -** The "iScanStatusOp" parameter determines which status information to return. -** The "iScanStatusOp" must be one of the [scanstatus options] or the behavior -** of this interface is undefined. -** ^The requested measurement is written into a variable pointed to by -** the "pOut" parameter. -** Parameter "idx" identifies the specific loop to retrieve statistics for. -** Loops are numbered starting from zero. ^If idx is out of range - less than -** zero or greater than or equal to the total number of loops used to implement -** the statement - a non-zero value is returned and the variable that pOut -** points to is unchanged. -** -** ^Statistics might not be available for all loops in all statements. ^In cases -** where there exist loops with no available statistics, this function behaves -** as if the loop did not exist - it returns non-zero and leave the variable -** that pOut points to unchanged. -** -** See also: [sqlite3_stmt_scanstatus_reset()] -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_stmt_scanstatus( - sqlite3_stmt *pStmt, /* Prepared statement for which info desired */ - int idx, /* Index of loop to report on */ - int iScanStatusOp, /* Information desired. SQLITE_SCANSTAT_* */ - void *pOut /* Result written here */ -); - -/* -** CAPI3REF: Zero Scan-Status Counters -** METHOD: sqlite3_stmt -** -** ^Zero all [sqlite3_stmt_scanstatus()] related event counters. -** -** This API is only available if the library is built with pre-processor -** symbol [SQLITE_ENABLE_STMT_SCANSTATUS] defined. -*/ -SQLITE_API void SQLITE_STDCALL sqlite3_stmt_scanstatus_reset(sqlite3_stmt*); - -/* -** CAPI3REF: Flush caches to disk mid-transaction -** -** ^If a write-transaction is open on [database connection] D when the -** [sqlite3_db_cacheflush(D)] interface invoked, any dirty -** pages in the pager-cache that are not currently in use are written out -** to disk. A dirty page may be in use if a database cursor created by an -** active SQL statement is reading from it, or if it is page 1 of a database -** file (page 1 is always "in use"). ^The [sqlite3_db_cacheflush(D)] -** interface flushes caches for all schemas - "main", "temp", and -** any [attached] databases. -** -** ^If this function needs to obtain extra database locks before dirty pages -** can be flushed to disk, it does so. ^If those locks cannot be obtained -** immediately and there is a busy-handler callback configured, it is invoked -** in the usual manner. ^If the required lock still cannot be obtained, then -** the database is skipped and an attempt made to flush any dirty pages -** belonging to the next (if any) database. ^If any databases are skipped -** because locks cannot be obtained, but no other error occurs, this -** function returns SQLITE_BUSY. -** -** ^If any other error occurs while flushing dirty pages to disk (for -** example an IO error or out-of-memory condition), then processing is -** abandoned and an SQLite [error code] is returned to the caller immediately. -** -** ^Otherwise, if no error occurs, [sqlite3_db_cacheflush()] returns SQLITE_OK. -** -** ^This function does not set the database handle error code or message -** returned by the [sqlite3_errcode()] and [sqlite3_errmsg()] functions. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_db_cacheflush(sqlite3*); - -/* -** CAPI3REF: The pre-update hook. -** -** ^These interfaces are only available if SQLite is compiled using the -** [SQLITE_ENABLE_PREUPDATE_HOOK] compile-time option. -** -** ^The [sqlite3_preupdate_hook()] interface registers a callback function -** that is invoked prior to each [INSERT], [UPDATE], and [DELETE] operation -** on a [rowid table]. -** ^At most one preupdate hook may be registered at a time on a single -** [database connection]; each call to [sqlite3_preupdate_hook()] overrides -** the previous setting. -** ^The preupdate hook is disabled by invoking [sqlite3_preupdate_hook()] -** with a NULL pointer as the second parameter. -** ^The third parameter to [sqlite3_preupdate_hook()] is passed through as -** the first parameter to callbacks. -** -** ^The preupdate hook only fires for changes to [rowid tables]; the preupdate -** hook is not invoked for changes to [virtual tables] or [WITHOUT ROWID] -** tables. -** -** ^The second parameter to the preupdate callback is a pointer to -** the [database connection] that registered the preupdate hook. -** ^The third parameter to the preupdate callback is one of the constants -** [SQLITE_INSERT], [SQLITE_DELETE], or [SQLITE_UPDATE] to indentify the -** kind of update operation that is about to occur. -** ^(The fourth parameter to the preupdate callback is the name of the -** database within the database connection that is being modified. This -** will be "main" for the main database or "temp" for TEMP tables or -** the name given after the AS keyword in the [ATTACH] statement for attached -** databases.)^ -** ^The fifth parameter to the preupdate callback is the name of the -** table that is being modified. -** ^The sixth parameter to the preupdate callback is the initial [rowid] of the -** row being changes for SQLITE_UPDATE and SQLITE_DELETE changes and is -** undefined for SQLITE_INSERT changes. -** ^The seventh parameter to the preupdate callback is the final [rowid] of -** the row being changed for SQLITE_UPDATE and SQLITE_INSERT changes and is -** undefined for SQLITE_DELETE changes. -** -** The [sqlite3_preupdate_old()], [sqlite3_preupdate_new()], -** [sqlite3_preupdate_count()], and [sqlite3_preupdate_depth()] interfaces -** provide additional information about a preupdate event. These routines -** may only be called from within a preupdate callback. Invoking any of -** these routines from outside of a preupdate callback or with a -** [database connection] pointer that is different from the one supplied -** to the preupdate callback results in undefined and probably undesirable -** behavior. -** -** ^The [sqlite3_preupdate_count(D)] interface returns the number of columns -** in the row that is being inserted, updated, or deleted. -** -** ^The [sqlite3_preupdate_old(D,N,P)] interface writes into P a pointer to -** a [protected sqlite3_value] that contains the value of the Nth column of -** the table row before it is updated. The N parameter must be between 0 -** and one less than the number of columns or the behavior will be -** undefined. This must only be used within SQLITE_UPDATE and SQLITE_DELETE -** preupdate callbacks; if it is used by an SQLITE_INSERT callback then the -** behavior is undefined. The [sqlite3_value] that P points to -** will be destroyed when the preupdate callback returns. -** -** ^The [sqlite3_preupdate_new(D,N,P)] interface writes into P a pointer to -** a [protected sqlite3_value] that contains the value of the Nth column of -** the table row after it is updated. The N parameter must be between 0 -** and one less than the number of columns or the behavior will be -** undefined. This must only be used within SQLITE_INSERT and SQLITE_UPDATE -** preupdate callbacks; if it is used by an SQLITE_DELETE callback then the -** behavior is undefined. The [sqlite3_value] that P points to -** will be destroyed when the preupdate callback returns. -** -** ^The [sqlite3_preupdate_depth(D)] interface returns 0 if the preupdate -** callback was invoked as a result of a direct insert, update, or delete -** operation; or 1 for inserts, updates, or deletes invoked by top-level -** triggers; or 2 for changes resulting from triggers called by top-level -** triggers; and so forth. -** -** See also: [sqlite3_update_hook()] -*/ -SQLITE_API SQLITE_EXPERIMENTAL void *SQLITE_STDCALL sqlite3_preupdate_hook( - sqlite3 *db, - void(*xPreUpdate)( - void *pCtx, /* Copy of third arg to preupdate_hook() */ - sqlite3 *db, /* Database handle */ - int op, /* SQLITE_UPDATE, DELETE or INSERT */ - char const *zDb, /* Database name */ - char const *zName, /* Table name */ - sqlite3_int64 iKey1, /* Rowid of row about to be deleted/updated */ - sqlite3_int64 iKey2 /* New rowid value (for a rowid UPDATE) */ - ), - void* -); -SQLITE_API SQLITE_EXPERIMENTAL int SQLITE_STDCALL sqlite3_preupdate_old(sqlite3 *, int, sqlite3_value **); -SQLITE_API SQLITE_EXPERIMENTAL int SQLITE_STDCALL sqlite3_preupdate_count(sqlite3 *); -SQLITE_API SQLITE_EXPERIMENTAL int SQLITE_STDCALL sqlite3_preupdate_depth(sqlite3 *); -SQLITE_API SQLITE_EXPERIMENTAL int SQLITE_STDCALL sqlite3_preupdate_new(sqlite3 *, int, sqlite3_value **); - -/* -** CAPI3REF: Low-level system error code -** -** ^Attempt to return the underlying operating system error code or error -** number that caused the most recent I/O error or failure to open a file. -** The return value is OS-dependent. For example, on unix systems, after -** [sqlite3_open_v2()] returns [SQLITE_CANTOPEN], this interface could be -** called to get back the underlying "errno" that caused the problem, such -** as ENOSPC, EAUTH, EISDIR, and so forth. -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_system_errno(sqlite3*); - -/* -** CAPI3REF: Database Snapshot -** KEYWORDS: {snapshot} -** EXPERIMENTAL -** -** An instance of the snapshot object records the state of a [WAL mode] -** database for some specific point in history. -** -** In [WAL mode], multiple [database connections] that are open on the -** same database file can each be reading a different historical version -** of the database file. When a [database connection] begins a read -** transaction, that connection sees an unchanging copy of the database -** as it existed for the point in time when the transaction first started. -** Subsequent changes to the database from other connections are not seen -** by the reader until a new read transaction is started. -** -** The sqlite3_snapshot object records state information about an historical -** version of the database file so that it is possible to later open a new read -** transaction that sees that historical version of the database rather than -** the most recent version. -** -** The constructor for this object is [sqlite3_snapshot_get()]. The -** [sqlite3_snapshot_open()] method causes a fresh read transaction to refer -** to an historical snapshot (if possible). The destructor for -** sqlite3_snapshot objects is [sqlite3_snapshot_free()]. -*/ -typedef struct sqlite3_snapshot sqlite3_snapshot; - -/* -** CAPI3REF: Record A Database Snapshot -** EXPERIMENTAL -** -** ^The [sqlite3_snapshot_get(D,S,P)] interface attempts to make a -** new [sqlite3_snapshot] object that records the current state of -** schema S in database connection D. ^On success, the -** [sqlite3_snapshot_get(D,S,P)] interface writes a pointer to the newly -** created [sqlite3_snapshot] object into *P and returns SQLITE_OK. -** ^If schema S of [database connection] D is not a [WAL mode] database -** that is in a read transaction, then [sqlite3_snapshot_get(D,S,P)] -** leaves the *P value unchanged and returns an appropriate [error code]. -** -** The [sqlite3_snapshot] object returned from a successful call to -** [sqlite3_snapshot_get()] must be freed using [sqlite3_snapshot_free()] -** to avoid a memory leak. -** -** The [sqlite3_snapshot_get()] interface is only available when the -** SQLITE_ENABLE_SNAPSHOT compile-time option is used. -*/ -SQLITE_API SQLITE_EXPERIMENTAL int SQLITE_STDCALL sqlite3_snapshot_get( - sqlite3 *db, - const char *zSchema, - sqlite3_snapshot **ppSnapshot -); - -/* -** CAPI3REF: Start a read transaction on an historical snapshot -** EXPERIMENTAL -** -** ^The [sqlite3_snapshot_open(D,S,P)] interface starts a -** read transaction for schema S of -** [database connection] D such that the read transaction -** refers to historical [snapshot] P, rather than the most -** recent change to the database. -** ^The [sqlite3_snapshot_open()] interface returns SQLITE_OK on success -** or an appropriate [error code] if it fails. -** -** ^In order to succeed, a call to [sqlite3_snapshot_open(D,S,P)] must be -** the first operation following the [BEGIN] that takes the schema S -** out of [autocommit mode]. -** ^In other words, schema S must not currently be in -** a transaction for [sqlite3_snapshot_open(D,S,P)] to work, but the -** database connection D must be out of [autocommit mode]. -** ^A [snapshot] will fail to open if it has been overwritten by a -** [checkpoint]. -** ^(A call to [sqlite3_snapshot_open(D,S,P)] will fail if the -** database connection D does not know that the database file for -** schema S is in [WAL mode]. A database connection might not know -** that the database file is in [WAL mode] if there has been no prior -** I/O on that database connection, or if the database entered [WAL mode] -** after the most recent I/O on the database connection.)^ -** (Hint: Run "[PRAGMA application_id]" against a newly opened -** database connection in order to make it ready to use snapshots.) -** -** The [sqlite3_snapshot_open()] interface is only available when the -** SQLITE_ENABLE_SNAPSHOT compile-time option is used. -*/ -SQLITE_API SQLITE_EXPERIMENTAL int SQLITE_STDCALL sqlite3_snapshot_open( - sqlite3 *db, - const char *zSchema, - sqlite3_snapshot *pSnapshot -); - -/* -** CAPI3REF: Destroy a snapshot -** EXPERIMENTAL -** -** ^The [sqlite3_snapshot_free(P)] interface destroys [sqlite3_snapshot] P. -** The application must eventually free every [sqlite3_snapshot] object -** using this routine to avoid a memory leak. -** -** The [sqlite3_snapshot_free()] interface is only available when the -** SQLITE_ENABLE_SNAPSHOT compile-time option is used. -*/ -SQLITE_API SQLITE_EXPERIMENTAL void SQLITE_STDCALL sqlite3_snapshot_free(sqlite3_snapshot*); - -/* -** CAPI3REF: Compare the ages of two snapshot handles. -** EXPERIMENTAL -** -** The sqlite3_snapshot_cmp(P1, P2) interface is used to compare the ages -** of two valid snapshot handles. -** -** If the two snapshot handles are not associated with the same database -** file, the result of the comparison is undefined. -** -** Additionally, the result of the comparison is only valid if both of the -** snapshot handles were obtained by calling sqlite3_snapshot_get() since the -** last time the wal file was deleted. The wal file is deleted when the -** database is changed back to rollback mode or when the number of database -** clients drops to zero. If either snapshot handle was obtained before the -** wal file was last deleted, the value returned by this function -** is undefined. -** -** Otherwise, this API returns a negative value if P1 refers to an older -** snapshot than P2, zero if the two handles refer to the same database -** snapshot, and a positive value if P1 is a newer snapshot than P2. -*/ -SQLITE_API SQLITE_EXPERIMENTAL int SQLITE_STDCALL sqlite3_snapshot_cmp( - sqlite3_snapshot *p1, - sqlite3_snapshot *p2 -); - -/* -** Undo the hack that converts floating point types to integer for -** builds on processors without floating point support. -*/ -#ifdef SQLITE_OMIT_FLOATING_POINT -# undef double -#endif - -#ifdef __cplusplus -} /* End of the 'extern "C"' block */ -#endif -#endif /* _SQLITE3_H_ */ - -/******** Begin file sqlite3rtree.h *********/ -/* -** 2010 August 30 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -*/ - -#ifndef _SQLITE3RTREE_H_ -#define _SQLITE3RTREE_H_ - - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct sqlite3_rtree_geometry sqlite3_rtree_geometry; -typedef struct sqlite3_rtree_query_info sqlite3_rtree_query_info; - -/* The double-precision datatype used by RTree depends on the -** SQLITE_RTREE_INT_ONLY compile-time option. -*/ -#ifdef SQLITE_RTREE_INT_ONLY - typedef sqlite3_int64 sqlite3_rtree_dbl; -#else - typedef double sqlite3_rtree_dbl; -#endif - -/* -** Register a geometry callback named zGeom that can be used as part of an -** R-Tree geometry query as follows: -** -** SELECT ... FROM WHERE MATCH $zGeom(... params ...) -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_rtree_geometry_callback( - sqlite3 *db, - const char *zGeom, - int (*xGeom)(sqlite3_rtree_geometry*, int, sqlite3_rtree_dbl*,int*), - void *pContext -); - - -/* -** A pointer to a structure of the following type is passed as the first -** argument to callbacks registered using rtree_geometry_callback(). -*/ -struct sqlite3_rtree_geometry { - void *pContext; /* Copy of pContext passed to s_r_g_c() */ - int nParam; /* Size of array aParam[] */ - sqlite3_rtree_dbl *aParam; /* Parameters passed to SQL geom function */ - void *pUser; /* Callback implementation user data */ - void (*xDelUser)(void *); /* Called by SQLite to clean up pUser */ -}; - -/* -** Register a 2nd-generation geometry callback named zScore that can be -** used as part of an R-Tree geometry query as follows: -** -** SELECT ... FROM WHERE MATCH $zQueryFunc(... params ...) -*/ -SQLITE_API int SQLITE_STDCALL sqlite3_rtree_query_callback( - sqlite3 *db, - const char *zQueryFunc, - int (*xQueryFunc)(sqlite3_rtree_query_info*), - void *pContext, - void (*xDestructor)(void*) -); - - -/* -** A pointer to a structure of the following type is passed as the -** argument to scored geometry callback registered using -** sqlite3_rtree_query_callback(). -** -** Note that the first 5 fields of this structure are identical to -** sqlite3_rtree_geometry. This structure is a subclass of -** sqlite3_rtree_geometry. -*/ -struct sqlite3_rtree_query_info { - void *pContext; /* pContext from when function registered */ - int nParam; /* Number of function parameters */ - sqlite3_rtree_dbl *aParam; /* value of function parameters */ - void *pUser; /* callback can use this, if desired */ - void (*xDelUser)(void*); /* function to free pUser */ - sqlite3_rtree_dbl *aCoord; /* Coordinates of node or entry to check */ - unsigned int *anQueue; /* Number of pending entries in the queue */ - int nCoord; /* Number of coordinates */ - int iLevel; /* Level of current node or entry */ - int mxLevel; /* The largest iLevel value in the tree */ - sqlite3_int64 iRowid; /* Rowid for current entry */ - sqlite3_rtree_dbl rParentScore; /* Score of parent node */ - int eParentWithin; /* Visibility of parent node */ - int eWithin; /* OUT: Visiblity */ - sqlite3_rtree_dbl rScore; /* OUT: Write the score here */ - /* The following fields are only available in 3.8.11 and later */ - sqlite3_value **apSqlParam; /* Original SQL values of parameters */ -}; - -/* -** Allowed values for sqlite3_rtree_query.eWithin and .eParentWithin. -*/ -#define NOT_WITHIN 0 /* Object completely outside of query region */ -#define PARTLY_WITHIN 1 /* Object partially overlaps query region */ -#define FULLY_WITHIN 2 /* Object fully contained within query region */ - - -#ifdef __cplusplus -} /* end of the 'extern "C"' block */ -#endif - -#endif /* ifndef _SQLITE3RTREE_H_ */ - -/******** End of sqlite3rtree.h *********/ -/******** Begin file sqlite3session.h *********/ - -#if !defined(__SQLITESESSION_H_) && defined(SQLITE_ENABLE_SESSION) -#define __SQLITESESSION_H_ 1 - -/* -** Make sure we can call this stuff from C++. -*/ -#ifdef __cplusplus -extern "C" { -#endif - - -/* -** CAPI3REF: Session Object Handle -*/ -typedef struct sqlite3_session sqlite3_session; - -/* -** CAPI3REF: Changeset Iterator Handle -*/ -typedef struct sqlite3_changeset_iter sqlite3_changeset_iter; - -/* -** CAPI3REF: Create A New Session Object -** -** Create a new session object attached to database handle db. If successful, -** a pointer to the new object is written to *ppSession and SQLITE_OK is -** returned. If an error occurs, *ppSession is set to NULL and an SQLite -** error code (e.g. SQLITE_NOMEM) is returned. -** -** It is possible to create multiple session objects attached to a single -** database handle. -** -** Session objects created using this function should be deleted using the -** [sqlite3session_delete()] function before the database handle that they -** are attached to is itself closed. If the database handle is closed before -** the session object is deleted, then the results of calling any session -** module function, including [sqlite3session_delete()] on the session object -** are undefined. -** -** Because the session module uses the [sqlite3_preupdate_hook()] API, it -** is not possible for an application to register a pre-update hook on a -** database handle that has one or more session objects attached. Nor is -** it possible to create a session object attached to a database handle for -** which a pre-update hook is already defined. The results of attempting -** either of these things are undefined. -** -** The session object will be used to create changesets for tables in -** database zDb, where zDb is either "main", or "temp", or the name of an -** attached database. It is not an error if database zDb is not attached -** to the database when the session object is created. -*/ -int sqlite3session_create( - sqlite3 *db, /* Database handle */ - const char *zDb, /* Name of db (e.g. "main") */ - sqlite3_session **ppSession /* OUT: New session object */ -); - -/* -** CAPI3REF: Delete A Session Object -** -** Delete a session object previously allocated using -** [sqlite3session_create()]. Once a session object has been deleted, the -** results of attempting to use pSession with any other session module -** function are undefined. -** -** Session objects must be deleted before the database handle to which they -** are attached is closed. Refer to the documentation for -** [sqlite3session_create()] for details. -*/ -void sqlite3session_delete(sqlite3_session *pSession); - - -/* -** CAPI3REF: Enable Or Disable A Session Object -** -** Enable or disable the recording of changes by a session object. When -** enabled, a session object records changes made to the database. When -** disabled - it does not. A newly created session object is enabled. -** Refer to the documentation for [sqlite3session_changeset()] for further -** details regarding how enabling and disabling a session object affects -** the eventual changesets. -** -** Passing zero to this function disables the session. Passing a value -** greater than zero enables it. Passing a value less than zero is a -** no-op, and may be used to query the current state of the session. -** -** The return value indicates the final state of the session object: 0 if -** the session is disabled, or 1 if it is enabled. -*/ -int sqlite3session_enable(sqlite3_session *pSession, int bEnable); - -/* -** CAPI3REF: Set Or Clear the Indirect Change Flag -** -** Each change recorded by a session object is marked as either direct or -** indirect. A change is marked as indirect if either: -** -**
    -**
  • The session object "indirect" flag is set when the change is -** made, or -**
  • The change is made by an SQL trigger or foreign key action -** instead of directly as a result of a users SQL statement. -**
-** -** If a single row is affected by more than one operation within a session, -** then the change is considered indirect if all operations meet the criteria -** for an indirect change above, or direct otherwise. -** -** This function is used to set, clear or query the session object indirect -** flag. If the second argument passed to this function is zero, then the -** indirect flag is cleared. If it is greater than zero, the indirect flag -** is set. Passing a value less than zero does not modify the current value -** of the indirect flag, and may be used to query the current state of the -** indirect flag for the specified session object. -** -** The return value indicates the final state of the indirect flag: 0 if -** it is clear, or 1 if it is set. -*/ -int sqlite3session_indirect(sqlite3_session *pSession, int bIndirect); - -/* -** CAPI3REF: Attach A Table To A Session Object -** -** If argument zTab is not NULL, then it is the name of a table to attach -** to the session object passed as the first argument. All subsequent changes -** made to the table while the session object is enabled will be recorded. See -** documentation for [sqlite3session_changeset()] for further details. -** -** Or, if argument zTab is NULL, then changes are recorded for all tables -** in the database. If additional tables are added to the database (by -** executing "CREATE TABLE" statements) after this call is made, changes for -** the new tables are also recorded. -** -** Changes can only be recorded for tables that have a PRIMARY KEY explicitly -** defined as part of their CREATE TABLE statement. It does not matter if the -** PRIMARY KEY is an "INTEGER PRIMARY KEY" (rowid alias) or not. The PRIMARY -** KEY may consist of a single column, or may be a composite key. -** -** It is not an error if the named table does not exist in the database. Nor -** is it an error if the named table does not have a PRIMARY KEY. However, -** no changes will be recorded in either of these scenarios. -** -** Changes are not recorded for individual rows that have NULL values stored -** in one or more of their PRIMARY KEY columns. -** -** SQLITE_OK is returned if the call completes without error. Or, if an error -** occurs, an SQLite error code (e.g. SQLITE_NOMEM) is returned. -*/ -int sqlite3session_attach( - sqlite3_session *pSession, /* Session object */ - const char *zTab /* Table name */ -); - -/* -** CAPI3REF: Set a table filter on a Session Object. -** -** The second argument (xFilter) is the "filter callback". For changes to rows -** in tables that are not attached to the Session oject, the filter is called -** to determine whether changes to the table's rows should be tracked or not. -** If xFilter returns 0, changes is not tracked. Note that once a table is -** attached, xFilter will not be called again. -*/ -void sqlite3session_table_filter( - sqlite3_session *pSession, /* Session object */ - int(*xFilter)( - void *pCtx, /* Copy of third arg to _filter_table() */ - const char *zTab /* Table name */ - ), - void *pCtx /* First argument passed to xFilter */ -); - -/* -** CAPI3REF: Generate A Changeset From A Session Object -** -** Obtain a changeset containing changes to the tables attached to the -** session object passed as the first argument. If successful, -** set *ppChangeset to point to a buffer containing the changeset -** and *pnChangeset to the size of the changeset in bytes before returning -** SQLITE_OK. If an error occurs, set both *ppChangeset and *pnChangeset to -** zero and return an SQLite error code. -** -** A changeset consists of zero or more INSERT, UPDATE and/or DELETE changes, -** each representing a change to a single row of an attached table. An INSERT -** change contains the values of each field of a new database row. A DELETE -** contains the original values of each field of a deleted database row. An -** UPDATE change contains the original values of each field of an updated -** database row along with the updated values for each updated non-primary-key -** column. It is not possible for an UPDATE change to represent a change that -** modifies the values of primary key columns. If such a change is made, it -** is represented in a changeset as a DELETE followed by an INSERT. -** -** Changes are not recorded for rows that have NULL values stored in one or -** more of their PRIMARY KEY columns. If such a row is inserted or deleted, -** no corresponding change is present in the changesets returned by this -** function. If an existing row with one or more NULL values stored in -** PRIMARY KEY columns is updated so that all PRIMARY KEY columns are non-NULL, -** only an INSERT is appears in the changeset. Similarly, if an existing row -** with non-NULL PRIMARY KEY values is updated so that one or more of its -** PRIMARY KEY columns are set to NULL, the resulting changeset contains a -** DELETE change only. -** -** The contents of a changeset may be traversed using an iterator created -** using the [sqlite3changeset_start()] API. A changeset may be applied to -** a database with a compatible schema using the [sqlite3changeset_apply()] -** API. -** -** Within a changeset generated by this function, all changes related to a -** single table are grouped together. In other words, when iterating through -** a changeset or when applying a changeset to a database, all changes related -** to a single table are processed before moving on to the next table. Tables -** are sorted in the same order in which they were attached (or auto-attached) -** to the sqlite3_session object. The order in which the changes related to -** a single table are stored is undefined. -** -** Following a successful call to this function, it is the responsibility of -** the caller to eventually free the buffer that *ppChangeset points to using -** [sqlite3_free()]. -** -**

Changeset Generation

-** -** Once a table has been attached to a session object, the session object -** records the primary key values of all new rows inserted into the table. -** It also records the original primary key and other column values of any -** deleted or updated rows. For each unique primary key value, data is only -** recorded once - the first time a row with said primary key is inserted, -** updated or deleted in the lifetime of the session. -** -** There is one exception to the previous paragraph: when a row is inserted, -** updated or deleted, if one or more of its primary key columns contain a -** NULL value, no record of the change is made. -** -** The session object therefore accumulates two types of records - those -** that consist of primary key values only (created when the user inserts -** a new record) and those that consist of the primary key values and the -** original values of other table columns (created when the users deletes -** or updates a record). -** -** When this function is called, the requested changeset is created using -** both the accumulated records and the current contents of the database -** file. Specifically: -** -**
    -**
  • For each record generated by an insert, the database is queried -** for a row with a matching primary key. If one is found, an INSERT -** change is added to the changeset. If no such row is found, no change -** is added to the changeset. -** -**
  • For each record generated by an update or delete, the database is -** queried for a row with a matching primary key. If such a row is -** found and one or more of the non-primary key fields have been -** modified from their original values, an UPDATE change is added to -** the changeset. Or, if no such row is found in the table, a DELETE -** change is added to the changeset. If there is a row with a matching -** primary key in the database, but all fields contain their original -** values, no change is added to the changeset. -**
-** -** This means, amongst other things, that if a row is inserted and then later -** deleted while a session object is active, neither the insert nor the delete -** will be present in the changeset. Or if a row is deleted and then later a -** row with the same primary key values inserted while a session object is -** active, the resulting changeset will contain an UPDATE change instead of -** a DELETE and an INSERT. -** -** When a session object is disabled (see the [sqlite3session_enable()] API), -** it does not accumulate records when rows are inserted, updated or deleted. -** This may appear to have some counter-intuitive effects if a single row -** is written to more than once during a session. For example, if a row -** is inserted while a session object is enabled, then later deleted while -** the same session object is disabled, no INSERT record will appear in the -** changeset, even though the delete took place while the session was disabled. -** Or, if one field of a row is updated while a session is disabled, and -** another field of the same row is updated while the session is enabled, the -** resulting changeset will contain an UPDATE change that updates both fields. -*/ -int sqlite3session_changeset( - sqlite3_session *pSession, /* Session object */ - int *pnChangeset, /* OUT: Size of buffer at *ppChangeset */ - void **ppChangeset /* OUT: Buffer containing changeset */ -); - -/* -** CAPI3REF: Load The Difference Between Tables Into A Session -** -** If it is not already attached to the session object passed as the first -** argument, this function attaches table zTbl in the same manner as the -** [sqlite3session_attach()] function. If zTbl does not exist, or if it -** does not have a primary key, this function is a no-op (but does not return -** an error). -** -** Argument zFromDb must be the name of a database ("main", "temp" etc.) -** attached to the same database handle as the session object that contains -** a table compatible with the table attached to the session by this function. -** A table is considered compatible if it: -** -**
    -**
  • Has the same name, -**
  • Has the same set of columns declared in the same order, and -**
  • Has the same PRIMARY KEY definition. -**
-** -** If the tables are not compatible, SQLITE_SCHEMA is returned. If the tables -** are compatible but do not have any PRIMARY KEY columns, it is not an error -** but no changes are added to the session object. As with other session -** APIs, tables without PRIMARY KEYs are simply ignored. -** -** This function adds a set of changes to the session object that could be -** used to update the table in database zFrom (call this the "from-table") -** so that its content is the same as the table attached to the session -** object (call this the "to-table"). Specifically: -** -**
    -**
  • For each row (primary key) that exists in the to-table but not in -** the from-table, an INSERT record is added to the session object. -** -**
  • For each row (primary key) that exists in the to-table but not in -** the from-table, a DELETE record is added to the session object. -** -**
  • For each row (primary key) that exists in both tables, but features -** different in each, an UPDATE record is added to the session. -**
-** -** To clarify, if this function is called and then a changeset constructed -** using [sqlite3session_changeset()], then after applying that changeset to -** database zFrom the contents of the two compatible tables would be -** identical. -** -** It an error if database zFrom does not exist or does not contain the -** required compatible table. -** -** If the operation successful, SQLITE_OK is returned. Otherwise, an SQLite -** error code. In this case, if argument pzErrMsg is not NULL, *pzErrMsg -** may be set to point to a buffer containing an English language error -** message. It is the responsibility of the caller to free this buffer using -** sqlite3_free(). -*/ -int sqlite3session_diff( - sqlite3_session *pSession, - const char *zFromDb, - const char *zTbl, - char **pzErrMsg -); - - -/* -** CAPI3REF: Generate A Patchset From A Session Object -** -** The differences between a patchset and a changeset are that: -** -**
    -**
  • DELETE records consist of the primary key fields only. The -** original values of other fields are omitted. -**
  • The original values of any modified fields are omitted from -** UPDATE records. -**
-** -** A patchset blob may be used with up to date versions of all -** sqlite3changeset_xxx API functions except for sqlite3changeset_invert(), -** which returns SQLITE_CORRUPT if it is passed a patchset. Similarly, -** attempting to use a patchset blob with old versions of the -** sqlite3changeset_xxx APIs also provokes an SQLITE_CORRUPT error. -** -** Because the non-primary key "old.*" fields are omitted, no -** SQLITE_CHANGESET_DATA conflicts can be detected or reported if a patchset -** is passed to the sqlite3changeset_apply() API. Other conflict types work -** in the same way as for changesets. -** -** Changes within a patchset are ordered in the same way as for changesets -** generated by the sqlite3session_changeset() function (i.e. all changes for -** a single table are grouped together, tables appear in the order in which -** they were attached to the session object). -*/ -int sqlite3session_patchset( - sqlite3_session *pSession, /* Session object */ - int *pnPatchset, /* OUT: Size of buffer at *ppChangeset */ - void **ppPatchset /* OUT: Buffer containing changeset */ -); - -/* -** CAPI3REF: Test if a changeset has recorded any changes. -** -** Return non-zero if no changes to attached tables have been recorded by -** the session object passed as the first argument. Otherwise, if one or -** more changes have been recorded, return zero. -** -** Even if this function returns zero, it is possible that calling -** [sqlite3session_changeset()] on the session handle may still return a -** changeset that contains no changes. This can happen when a row in -** an attached table is modified and then later on the original values -** are restored. However, if this function returns non-zero, then it is -** guaranteed that a call to sqlite3session_changeset() will return a -** changeset containing zero changes. -*/ -int sqlite3session_isempty(sqlite3_session *pSession); - -/* -** CAPI3REF: Create An Iterator To Traverse A Changeset -** -** Create an iterator used to iterate through the contents of a changeset. -** If successful, *pp is set to point to the iterator handle and SQLITE_OK -** is returned. Otherwise, if an error occurs, *pp is set to zero and an -** SQLite error code is returned. -** -** The following functions can be used to advance and query a changeset -** iterator created by this function: -** -**
    -**
  • [sqlite3changeset_next()] -**
  • [sqlite3changeset_op()] -**
  • [sqlite3changeset_new()] -**
  • [sqlite3changeset_old()] -**
-** -** It is the responsibility of the caller to eventually destroy the iterator -** by passing it to [sqlite3changeset_finalize()]. The buffer containing the -** changeset (pChangeset) must remain valid until after the iterator is -** destroyed. -** -** Assuming the changeset blob was created by one of the -** [sqlite3session_changeset()], [sqlite3changeset_concat()] or -** [sqlite3changeset_invert()] functions, all changes within the changeset -** that apply to a single table are grouped together. This means that when -** an application iterates through a changeset using an iterator created by -** this function, all changes that relate to a single table are visted -** consecutively. There is no chance that the iterator will visit a change -** the applies to table X, then one for table Y, and then later on visit -** another change for table X. -*/ -int sqlite3changeset_start( - sqlite3_changeset_iter **pp, /* OUT: New changeset iterator handle */ - int nChangeset, /* Size of changeset blob in bytes */ - void *pChangeset /* Pointer to blob containing changeset */ -); - - -/* -** CAPI3REF: Advance A Changeset Iterator -** -** This function may only be used with iterators created by function -** [sqlite3changeset_start()]. If it is called on an iterator passed to -** a conflict-handler callback by [sqlite3changeset_apply()], SQLITE_MISUSE -** is returned and the call has no effect. -** -** Immediately after an iterator is created by sqlite3changeset_start(), it -** does not point to any change in the changeset. Assuming the changeset -** is not empty, the first call to this function advances the iterator to -** point to the first change in the changeset. Each subsequent call advances -** the iterator to point to the next change in the changeset (if any). If -** no error occurs and the iterator points to a valid change after a call -** to sqlite3changeset_next() has advanced it, SQLITE_ROW is returned. -** Otherwise, if all changes in the changeset have already been visited, -** SQLITE_DONE is returned. -** -** If an error occurs, an SQLite error code is returned. Possible error -** codes include SQLITE_CORRUPT (if the changeset buffer is corrupt) or -** SQLITE_NOMEM. -*/ -int sqlite3changeset_next(sqlite3_changeset_iter *pIter); - -/* -** CAPI3REF: Obtain The Current Operation From A Changeset Iterator -** -** The pIter argument passed to this function may either be an iterator -** passed to a conflict-handler by [sqlite3changeset_apply()], or an iterator -** created by [sqlite3changeset_start()]. In the latter case, the most recent -** call to [sqlite3changeset_next()] must have returned [SQLITE_ROW]. If this -** is not the case, this function returns [SQLITE_MISUSE]. -** -** If argument pzTab is not NULL, then *pzTab is set to point to a -** nul-terminated utf-8 encoded string containing the name of the table -** affected by the current change. The buffer remains valid until either -** sqlite3changeset_next() is called on the iterator or until the -** conflict-handler function returns. If pnCol is not NULL, then *pnCol is -** set to the number of columns in the table affected by the change. If -** pbIncorrect is not NULL, then *pbIndirect is set to true (1) if the change -** is an indirect change, or false (0) otherwise. See the documentation for -** [sqlite3session_indirect()] for a description of direct and indirect -** changes. Finally, if pOp is not NULL, then *pOp is set to one of -** [SQLITE_INSERT], [SQLITE_DELETE] or [SQLITE_UPDATE], depending on the -** type of change that the iterator currently points to. -** -** If no error occurs, SQLITE_OK is returned. If an error does occur, an -** SQLite error code is returned. The values of the output variables may not -** be trusted in this case. -*/ -int sqlite3changeset_op( - sqlite3_changeset_iter *pIter, /* Iterator object */ - const char **pzTab, /* OUT: Pointer to table name */ - int *pnCol, /* OUT: Number of columns in table */ - int *pOp, /* OUT: SQLITE_INSERT, DELETE or UPDATE */ - int *pbIndirect /* OUT: True for an 'indirect' change */ -); - -/* -** CAPI3REF: Obtain The Primary Key Definition Of A Table -** -** For each modified table, a changeset includes the following: -** -**
    -**
  • The number of columns in the table, and -**
  • Which of those columns make up the tables PRIMARY KEY. -**
-** -** This function is used to find which columns comprise the PRIMARY KEY of -** the table modified by the change that iterator pIter currently points to. -** If successful, *pabPK is set to point to an array of nCol entries, where -** nCol is the number of columns in the table. Elements of *pabPK are set to -** 0x01 if the corresponding column is part of the tables primary key, or -** 0x00 if it is not. -** -** If argumet pnCol is not NULL, then *pnCol is set to the number of columns -** in the table. -** -** If this function is called when the iterator does not point to a valid -** entry, SQLITE_MISUSE is returned and the output variables zeroed. Otherwise, -** SQLITE_OK is returned and the output variables populated as described -** above. -*/ -int sqlite3changeset_pk( - sqlite3_changeset_iter *pIter, /* Iterator object */ - unsigned char **pabPK, /* OUT: Array of boolean - true for PK cols */ - int *pnCol /* OUT: Number of entries in output array */ -); - -/* -** CAPI3REF: Obtain old.* Values From A Changeset Iterator -** -** The pIter argument passed to this function may either be an iterator -** passed to a conflict-handler by [sqlite3changeset_apply()], or an iterator -** created by [sqlite3changeset_start()]. In the latter case, the most recent -** call to [sqlite3changeset_next()] must have returned SQLITE_ROW. -** Furthermore, it may only be called if the type of change that the iterator -** currently points to is either [SQLITE_DELETE] or [SQLITE_UPDATE]. Otherwise, -** this function returns [SQLITE_MISUSE] and sets *ppValue to NULL. -** -** Argument iVal must be greater than or equal to 0, and less than the number -** of columns in the table affected by the current change. Otherwise, -** [SQLITE_RANGE] is returned and *ppValue is set to NULL. -** -** If successful, this function sets *ppValue to point to a protected -** sqlite3_value object containing the iVal'th value from the vector of -** original row values stored as part of the UPDATE or DELETE change and -** returns SQLITE_OK. The name of the function comes from the fact that this -** is similar to the "old.*" columns available to update or delete triggers. -** -** If some other error occurs (e.g. an OOM condition), an SQLite error code -** is returned and *ppValue is set to NULL. -*/ -int sqlite3changeset_old( - sqlite3_changeset_iter *pIter, /* Changeset iterator */ - int iVal, /* Column number */ - sqlite3_value **ppValue /* OUT: Old value (or NULL pointer) */ -); - -/* -** CAPI3REF: Obtain new.* Values From A Changeset Iterator -** -** The pIter argument passed to this function may either be an iterator -** passed to a conflict-handler by [sqlite3changeset_apply()], or an iterator -** created by [sqlite3changeset_start()]. In the latter case, the most recent -** call to [sqlite3changeset_next()] must have returned SQLITE_ROW. -** Furthermore, it may only be called if the type of change that the iterator -** currently points to is either [SQLITE_UPDATE] or [SQLITE_INSERT]. Otherwise, -** this function returns [SQLITE_MISUSE] and sets *ppValue to NULL. -** -** Argument iVal must be greater than or equal to 0, and less than the number -** of columns in the table affected by the current change. Otherwise, -** [SQLITE_RANGE] is returned and *ppValue is set to NULL. -** -** If successful, this function sets *ppValue to point to a protected -** sqlite3_value object containing the iVal'th value from the vector of -** new row values stored as part of the UPDATE or INSERT change and -** returns SQLITE_OK. If the change is an UPDATE and does not include -** a new value for the requested column, *ppValue is set to NULL and -** SQLITE_OK returned. The name of the function comes from the fact that -** this is similar to the "new.*" columns available to update or delete -** triggers. -** -** If some other error occurs (e.g. an OOM condition), an SQLite error code -** is returned and *ppValue is set to NULL. -*/ -int sqlite3changeset_new( - sqlite3_changeset_iter *pIter, /* Changeset iterator */ - int iVal, /* Column number */ - sqlite3_value **ppValue /* OUT: New value (or NULL pointer) */ -); - -/* -** CAPI3REF: Obtain Conflicting Row Values From A Changeset Iterator -** -** This function should only be used with iterator objects passed to a -** conflict-handler callback by [sqlite3changeset_apply()] with either -** [SQLITE_CHANGESET_DATA] or [SQLITE_CHANGESET_CONFLICT]. If this function -** is called on any other iterator, [SQLITE_MISUSE] is returned and *ppValue -** is set to NULL. -** -** Argument iVal must be greater than or equal to 0, and less than the number -** of columns in the table affected by the current change. Otherwise, -** [SQLITE_RANGE] is returned and *ppValue is set to NULL. -** -** If successful, this function sets *ppValue to point to a protected -** sqlite3_value object containing the iVal'th value from the -** "conflicting row" associated with the current conflict-handler callback -** and returns SQLITE_OK. -** -** If some other error occurs (e.g. an OOM condition), an SQLite error code -** is returned and *ppValue is set to NULL. -*/ -int sqlite3changeset_conflict( - sqlite3_changeset_iter *pIter, /* Changeset iterator */ - int iVal, /* Column number */ - sqlite3_value **ppValue /* OUT: Value from conflicting row */ -); - -/* -** CAPI3REF: Determine The Number Of Foreign Key Constraint Violations -** -** This function may only be called with an iterator passed to an -** SQLITE_CHANGESET_FOREIGN_KEY conflict handler callback. In this case -** it sets the output variable to the total number of known foreign key -** violations in the destination database and returns SQLITE_OK. -** -** In all other cases this function returns SQLITE_MISUSE. -*/ -int sqlite3changeset_fk_conflicts( - sqlite3_changeset_iter *pIter, /* Changeset iterator */ - int *pnOut /* OUT: Number of FK violations */ -); - - -/* -** CAPI3REF: Finalize A Changeset Iterator -** -** This function is used to finalize an iterator allocated with -** [sqlite3changeset_start()]. -** -** This function should only be called on iterators created using the -** [sqlite3changeset_start()] function. If an application calls this -** function with an iterator passed to a conflict-handler by -** [sqlite3changeset_apply()], [SQLITE_MISUSE] is immediately returned and the -** call has no effect. -** -** If an error was encountered within a call to an sqlite3changeset_xxx() -** function (for example an [SQLITE_CORRUPT] in [sqlite3changeset_next()] or an -** [SQLITE_NOMEM] in [sqlite3changeset_new()]) then an error code corresponding -** to that error is returned by this function. Otherwise, SQLITE_OK is -** returned. This is to allow the following pattern (pseudo-code): -** -** sqlite3changeset_start(); -** while( SQLITE_ROW==sqlite3changeset_next() ){ -** // Do something with change. -** } -** rc = sqlite3changeset_finalize(); -** if( rc!=SQLITE_OK ){ -** // An error has occurred -** } -*/ -int sqlite3changeset_finalize(sqlite3_changeset_iter *pIter); - -/* -** CAPI3REF: Invert A Changeset -** -** This function is used to "invert" a changeset object. Applying an inverted -** changeset to a database reverses the effects of applying the uninverted -** changeset. Specifically: -** -**
    -**
  • Each DELETE change is changed to an INSERT, and -**
  • Each INSERT change is changed to a DELETE, and -**
  • For each UPDATE change, the old.* and new.* values are exchanged. -**
-** -** This function does not change the order in which changes appear within -** the changeset. It merely reverses the sense of each individual change. -** -** If successful, a pointer to a buffer containing the inverted changeset -** is stored in *ppOut, the size of the same buffer is stored in *pnOut, and -** SQLITE_OK is returned. If an error occurs, both *pnOut and *ppOut are -** zeroed and an SQLite error code returned. -** -** It is the responsibility of the caller to eventually call sqlite3_free() -** on the *ppOut pointer to free the buffer allocation following a successful -** call to this function. -** -** WARNING/TODO: This function currently assumes that the input is a valid -** changeset. If it is not, the results are undefined. -*/ -int sqlite3changeset_invert( - int nIn, const void *pIn, /* Input changeset */ - int *pnOut, void **ppOut /* OUT: Inverse of input */ -); - -/* -** CAPI3REF: Concatenate Two Changeset Objects -** -** This function is used to concatenate two changesets, A and B, into a -** single changeset. The result is a changeset equivalent to applying -** changeset A followed by changeset B. -** -** This function combines the two input changesets using an -** sqlite3_changegroup object. Calling it produces similar results as the -** following code fragment: -** -** sqlite3_changegroup *pGrp; -** rc = sqlite3_changegroup_new(&pGrp); -** if( rc==SQLITE_OK ) rc = sqlite3changegroup_add(pGrp, nA, pA); -** if( rc==SQLITE_OK ) rc = sqlite3changegroup_add(pGrp, nB, pB); -** if( rc==SQLITE_OK ){ -** rc = sqlite3changegroup_output(pGrp, pnOut, ppOut); -** }else{ -** *ppOut = 0; -** *pnOut = 0; -** } -** -** Refer to the sqlite3_changegroup documentation below for details. -*/ -int sqlite3changeset_concat( - int nA, /* Number of bytes in buffer pA */ - void *pA, /* Pointer to buffer containing changeset A */ - int nB, /* Number of bytes in buffer pB */ - void *pB, /* Pointer to buffer containing changeset B */ - int *pnOut, /* OUT: Number of bytes in output changeset */ - void **ppOut /* OUT: Buffer containing output changeset */ -); - - -/* -** Changegroup handle. -*/ -typedef struct sqlite3_changegroup sqlite3_changegroup; - -/* -** CAPI3REF: Combine two or more changesets into a single changeset. -** -** An sqlite3_changegroup object is used to combine two or more changesets -** (or patchsets) into a single changeset (or patchset). A single changegroup -** object may combine changesets or patchsets, but not both. The output is -** always in the same format as the input. -** -** If successful, this function returns SQLITE_OK and populates (*pp) with -** a pointer to a new sqlite3_changegroup object before returning. The caller -** should eventually free the returned object using a call to -** sqlite3changegroup_delete(). If an error occurs, an SQLite error code -** (i.e. SQLITE_NOMEM) is returned and *pp is set to NULL. -** -** The usual usage pattern for an sqlite3_changegroup object is as follows: -** -**
    -**
  • It is created using a call to sqlite3changegroup_new(). -** -**
  • Zero or more changesets (or patchsets) are added to the object -** by calling sqlite3changegroup_add(). -** -**
  • The result of combining all input changesets together is obtained -** by the application via a call to sqlite3changegroup_output(). -** -**
  • The object is deleted using a call to sqlite3changegroup_delete(). -**
-** -** Any number of calls to add() and output() may be made between the calls to -** new() and delete(), and in any order. -** -** As well as the regular sqlite3changegroup_add() and -** sqlite3changegroup_output() functions, also available are the streaming -** versions sqlite3changegroup_add_strm() and sqlite3changegroup_output_strm(). -*/ -int sqlite3changegroup_new(sqlite3_changegroup **pp); - -/* -** Add all changes within the changeset (or patchset) in buffer pData (size -** nData bytes) to the changegroup. -** -** If the buffer contains a patchset, then all prior calls to this function -** on the same changegroup object must also have specified patchsets. Or, if -** the buffer contains a changeset, so must have the earlier calls to this -** function. Otherwise, SQLITE_ERROR is returned and no changes are added -** to the changegroup. -** -** Rows within the changeset and changegroup are identified by the values in -** their PRIMARY KEY columns. A change in the changeset is considered to -** apply to the same row as a change already present in the changegroup if -** the two rows have the same primary key. -** -** Changes to rows that that do not already appear in the changegroup are -** simply copied into it. Or, if both the new changeset and the changegroup -** contain changes that apply to a single row, the final contents of the -** changegroup depends on the type of each change, as follows: -** -** -** -** -**
Existing Change New Change Output Change -**
INSERT INSERT -** The new change is ignored. This case does not occur if the new -** changeset was recorded immediately after the changesets already -** added to the changegroup. -**
INSERT UPDATE -** The INSERT change remains in the changegroup. The values in the -** INSERT change are modified as if the row was inserted by the -** existing change and then updated according to the new change. -**
INSERT DELETE -** The existing INSERT is removed from the changegroup. The DELETE is -** not added. -**
UPDATE INSERT -** The new change is ignored. This case does not occur if the new -** changeset was recorded immediately after the changesets already -** added to the changegroup. -**
UPDATE UPDATE -** The existing UPDATE remains within the changegroup. It is amended -** so that the accompanying values are as if the row was updated once -** by the existing change and then again by the new change. -**
UPDATE DELETE -** The existing UPDATE is replaced by the new DELETE within the -** changegroup. -**
DELETE INSERT -** If one or more of the column values in the row inserted by the -** new change differ from those in the row deleted by the existing -** change, the existing DELETE is replaced by an UPDATE within the -** changegroup. Otherwise, if the inserted row is exactly the same -** as the deleted row, the existing DELETE is simply discarded. -**
DELETE UPDATE -** The new change is ignored. This case does not occur if the new -** changeset was recorded immediately after the changesets already -** added to the changegroup. -**
DELETE DELETE -** The new change is ignored. This case does not occur if the new -** changeset was recorded immediately after the changesets already -** added to the changegroup. -**
-** -** If the new changeset contains changes to a table that is already present -** in the changegroup, then the number of columns and the position of the -** primary key columns for the table must be consistent. If this is not the -** case, this function fails with SQLITE_SCHEMA. If the input changeset -** appears to be corrupt and the corruption is detected, SQLITE_CORRUPT is -** returned. Or, if an out-of-memory condition occurs during processing, this -** function returns SQLITE_NOMEM. In all cases, if an error occurs the -** final contents of the changegroup is undefined. -** -** If no error occurs, SQLITE_OK is returned. -*/ -int sqlite3changegroup_add(sqlite3_changegroup*, int nData, void *pData); - -/* -** Obtain a buffer containing a changeset (or patchset) representing the -** current contents of the changegroup. If the inputs to the changegroup -** were themselves changesets, the output is a changeset. Or, if the -** inputs were patchsets, the output is also a patchset. -** -** As with the output of the sqlite3session_changeset() and -** sqlite3session_patchset() functions, all changes related to a single -** table are grouped together in the output of this function. Tables appear -** in the same order as for the very first changeset added to the changegroup. -** If the second or subsequent changesets added to the changegroup contain -** changes for tables that do not appear in the first changeset, they are -** appended onto the end of the output changeset, again in the order in -** which they are first encountered. -** -** If an error occurs, an SQLite error code is returned and the output -** variables (*pnData) and (*ppData) are set to 0. Otherwise, SQLITE_OK -** is returned and the output variables are set to the size of and a -** pointer to the output buffer, respectively. In this case it is the -** responsibility of the caller to eventually free the buffer using a -** call to sqlite3_free(). -*/ -int sqlite3changegroup_output( - sqlite3_changegroup*, - int *pnData, /* OUT: Size of output buffer in bytes */ - void **ppData /* OUT: Pointer to output buffer */ -); - -/* -** Delete a changegroup object. -*/ -void sqlite3changegroup_delete(sqlite3_changegroup*); - -/* -** CAPI3REF: Apply A Changeset To A Database -** -** Apply a changeset to a database. This function attempts to update the -** "main" database attached to handle db with the changes found in the -** changeset passed via the second and third arguments. -** -** The fourth argument (xFilter) passed to this function is the "filter -** callback". If it is not NULL, then for each table affected by at least one -** change in the changeset, the filter callback is invoked with -** the table name as the second argument, and a copy of the context pointer -** passed as the sixth argument to this function as the first. If the "filter -** callback" returns zero, then no attempt is made to apply any changes to -** the table. Otherwise, if the return value is non-zero or the xFilter -** argument to this function is NULL, all changes related to the table are -** attempted. -** -** For each table that is not excluded by the filter callback, this function -** tests that the target database contains a compatible table. A table is -** considered compatible if all of the following are true: -** -**
    -**
  • The table has the same name as the name recorded in the -** changeset, and -**
  • The table has the same number of columns as recorded in the -** changeset, and -**
  • The table has primary key columns in the same position as -** recorded in the changeset. -**
-** -** If there is no compatible table, it is not an error, but none of the -** changes associated with the table are applied. A warning message is issued -** via the sqlite3_log() mechanism with the error code SQLITE_SCHEMA. At most -** one such warning is issued for each table in the changeset. -** -** For each change for which there is a compatible table, an attempt is made -** to modify the table contents according to the UPDATE, INSERT or DELETE -** change. If a change cannot be applied cleanly, the conflict handler -** function passed as the fifth argument to sqlite3changeset_apply() may be -** invoked. A description of exactly when the conflict handler is invoked for -** each type of change is below. -** -** Unlike the xFilter argument, xConflict may not be passed NULL. The results -** of passing anything other than a valid function pointer as the xConflict -** argument are undefined. -** -** Each time the conflict handler function is invoked, it must return one -** of [SQLITE_CHANGESET_OMIT], [SQLITE_CHANGESET_ABORT] or -** [SQLITE_CHANGESET_REPLACE]. SQLITE_CHANGESET_REPLACE may only be returned -** if the second argument passed to the conflict handler is either -** SQLITE_CHANGESET_DATA or SQLITE_CHANGESET_CONFLICT. If the conflict-handler -** returns an illegal value, any changes already made are rolled back and -** the call to sqlite3changeset_apply() returns SQLITE_MISUSE. Different -** actions are taken by sqlite3changeset_apply() depending on the value -** returned by each invocation of the conflict-handler function. Refer to -** the documentation for the three -** [SQLITE_CHANGESET_OMIT|available return values] for details. -** -**
-**
DELETE Changes
-** For each DELETE change, this function checks if the target database -** contains a row with the same primary key value (or values) as the -** original row values stored in the changeset. If it does, and the values -** stored in all non-primary key columns also match the values stored in -** the changeset the row is deleted from the target database. -** -** If a row with matching primary key values is found, but one or more of -** the non-primary key fields contains a value different from the original -** row value stored in the changeset, the conflict-handler function is -** invoked with [SQLITE_CHANGESET_DATA] as the second argument. -** -** If no row with matching primary key values is found in the database, -** the conflict-handler function is invoked with [SQLITE_CHANGESET_NOTFOUND] -** passed as the second argument. -** -** If the DELETE operation is attempted, but SQLite returns SQLITE_CONSTRAINT -** (which can only happen if a foreign key constraint is violated), the -** conflict-handler function is invoked with [SQLITE_CHANGESET_CONSTRAINT] -** passed as the second argument. This includes the case where the DELETE -** operation is attempted because an earlier call to the conflict handler -** function returned [SQLITE_CHANGESET_REPLACE]. -** -**
INSERT Changes
-** For each INSERT change, an attempt is made to insert the new row into -** the database. -** -** If the attempt to insert the row fails because the database already -** contains a row with the same primary key values, the conflict handler -** function is invoked with the second argument set to -** [SQLITE_CHANGESET_CONFLICT]. -** -** If the attempt to insert the row fails because of some other constraint -** violation (e.g. NOT NULL or UNIQUE), the conflict handler function is -** invoked with the second argument set to [SQLITE_CHANGESET_CONSTRAINT]. -** This includes the case where the INSERT operation is re-attempted because -** an earlier call to the conflict handler function returned -** [SQLITE_CHANGESET_REPLACE]. -** -**
UPDATE Changes
-** For each UPDATE change, this function checks if the target database -** contains a row with the same primary key value (or values) as the -** original row values stored in the changeset. If it does, and the values -** stored in all non-primary key columns also match the values stored in -** the changeset the row is updated within the target database. -** -** If a row with matching primary key values is found, but one or more of -** the non-primary key fields contains a value different from an original -** row value stored in the changeset, the conflict-handler function is -** invoked with [SQLITE_CHANGESET_DATA] as the second argument. Since -** UPDATE changes only contain values for non-primary key fields that are -** to be modified, only those fields need to match the original values to -** avoid the SQLITE_CHANGESET_DATA conflict-handler callback. -** -** If no row with matching primary key values is found in the database, -** the conflict-handler function is invoked with [SQLITE_CHANGESET_NOTFOUND] -** passed as the second argument. -** -** If the UPDATE operation is attempted, but SQLite returns -** SQLITE_CONSTRAINT, the conflict-handler function is invoked with -** [SQLITE_CHANGESET_CONSTRAINT] passed as the second argument. -** This includes the case where the UPDATE operation is attempted after -** an earlier call to the conflict handler function returned -** [SQLITE_CHANGESET_REPLACE]. -**
-** -** It is safe to execute SQL statements, including those that write to the -** table that the callback related to, from within the xConflict callback. -** This can be used to further customize the applications conflict -** resolution strategy. -** -** All changes made by this function are enclosed in a savepoint transaction. -** If any other error (aside from a constraint failure when attempting to -** write to the target database) occurs, then the savepoint transaction is -** rolled back, restoring the target database to its original state, and an -** SQLite error code returned. -*/ -int sqlite3changeset_apply( - sqlite3 *db, /* Apply change to "main" db of this handle */ - int nChangeset, /* Size of changeset in bytes */ - void *pChangeset, /* Changeset blob */ - int(*xFilter)( - void *pCtx, /* Copy of sixth arg to _apply() */ - const char *zTab /* Table name */ - ), - int(*xConflict)( - void *pCtx, /* Copy of sixth arg to _apply() */ - int eConflict, /* DATA, MISSING, CONFLICT, CONSTRAINT */ - sqlite3_changeset_iter *p /* Handle describing change and conflict */ - ), - void *pCtx /* First argument passed to xConflict */ -); - -/* -** CAPI3REF: Constants Passed To The Conflict Handler -** -** Values that may be passed as the second argument to a conflict-handler. -** -**
-**
SQLITE_CHANGESET_DATA
-** The conflict handler is invoked with CHANGESET_DATA as the second argument -** when processing a DELETE or UPDATE change if a row with the required -** PRIMARY KEY fields is present in the database, but one or more other -** (non primary-key) fields modified by the update do not contain the -** expected "before" values. -** -** The conflicting row, in this case, is the database row with the matching -** primary key. -** -**
SQLITE_CHANGESET_NOTFOUND
-** The conflict handler is invoked with CHANGESET_NOTFOUND as the second -** argument when processing a DELETE or UPDATE change if a row with the -** required PRIMARY KEY fields is not present in the database. -** -** There is no conflicting row in this case. The results of invoking the -** sqlite3changeset_conflict() API are undefined. -** -**
SQLITE_CHANGESET_CONFLICT
-** CHANGESET_CONFLICT is passed as the second argument to the conflict -** handler while processing an INSERT change if the operation would result -** in duplicate primary key values. -** -** The conflicting row in this case is the database row with the matching -** primary key. -** -**
SQLITE_CHANGESET_FOREIGN_KEY
-** If foreign key handling is enabled, and applying a changeset leaves the -** database in a state containing foreign key violations, the conflict -** handler is invoked with CHANGESET_FOREIGN_KEY as the second argument -** exactly once before the changeset is committed. If the conflict handler -** returns CHANGESET_OMIT, the changes, including those that caused the -** foreign key constraint violation, are committed. Or, if it returns -** CHANGESET_ABORT, the changeset is rolled back. -** -** No current or conflicting row information is provided. The only function -** it is possible to call on the supplied sqlite3_changeset_iter handle -** is sqlite3changeset_fk_conflicts(). -** -**
SQLITE_CHANGESET_CONSTRAINT
-** If any other constraint violation occurs while applying a change (i.e. -** a UNIQUE, CHECK or NOT NULL constraint), the conflict handler is -** invoked with CHANGESET_CONSTRAINT as the second argument. -** -** There is no conflicting row in this case. The results of invoking the -** sqlite3changeset_conflict() API are undefined. -** -**
-*/ -#define SQLITE_CHANGESET_DATA 1 -#define SQLITE_CHANGESET_NOTFOUND 2 -#define SQLITE_CHANGESET_CONFLICT 3 -#define SQLITE_CHANGESET_CONSTRAINT 4 -#define SQLITE_CHANGESET_FOREIGN_KEY 5 - -/* -** CAPI3REF: Constants Returned By The Conflict Handler -** -** A conflict handler callback must return one of the following three values. -** -**
-**
SQLITE_CHANGESET_OMIT
-** If a conflict handler returns this value no special action is taken. The -** change that caused the conflict is not applied. The session module -** continues to the next change in the changeset. -** -**
SQLITE_CHANGESET_REPLACE
-** This value may only be returned if the second argument to the conflict -** handler was SQLITE_CHANGESET_DATA or SQLITE_CHANGESET_CONFLICT. If this -** is not the case, any changes applied so far are rolled back and the -** call to sqlite3changeset_apply() returns SQLITE_MISUSE. -** -** If CHANGESET_REPLACE is returned by an SQLITE_CHANGESET_DATA conflict -** handler, then the conflicting row is either updated or deleted, depending -** on the type of change. -** -** If CHANGESET_REPLACE is returned by an SQLITE_CHANGESET_CONFLICT conflict -** handler, then the conflicting row is removed from the database and a -** second attempt to apply the change is made. If this second attempt fails, -** the original row is restored to the database before continuing. -** -**
SQLITE_CHANGESET_ABORT
-** If this value is returned, any changes applied so far are rolled back -** and the call to sqlite3changeset_apply() returns SQLITE_ABORT. -**
-*/ -#define SQLITE_CHANGESET_OMIT 0 -#define SQLITE_CHANGESET_REPLACE 1 -#define SQLITE_CHANGESET_ABORT 2 - -/* -** CAPI3REF: Streaming Versions of API functions. -** -** The six streaming API xxx_strm() functions serve similar purposes to the -** corresponding non-streaming API functions: -** -** -** -**
Streaming functionNon-streaming equivalent
sqlite3changeset_apply_str[sqlite3changeset_apply] -**
sqlite3changeset_concat_str[sqlite3changeset_concat] -**
sqlite3changeset_invert_str[sqlite3changeset_invert] -**
sqlite3changeset_start_str[sqlite3changeset_start] -**
sqlite3session_changeset_str[sqlite3session_changeset] -**
sqlite3session_patchset_str[sqlite3session_patchset] -**
-** -** Non-streaming functions that accept changesets (or patchsets) as input -** require that the entire changeset be stored in a single buffer in memory. -** Similarly, those that return a changeset or patchset do so by returning -** a pointer to a single large buffer allocated using sqlite3_malloc(). -** Normally this is convenient. However, if an application running in a -** low-memory environment is required to handle very large changesets, the -** large contiguous memory allocations required can become onerous. -** -** In order to avoid this problem, instead of a single large buffer, input -** is passed to a streaming API functions by way of a callback function that -** the sessions module invokes to incrementally request input data as it is -** required. In all cases, a pair of API function parameters such as -** -**
-**        int nChangeset,
-**        void *pChangeset,
-**  
-** -** Is replaced by: -** -**
-**        int (*xInput)(void *pIn, void *pData, int *pnData),
-**        void *pIn,
-**  
-** -** Each time the xInput callback is invoked by the sessions module, the first -** argument passed is a copy of the supplied pIn context pointer. The second -** argument, pData, points to a buffer (*pnData) bytes in size. Assuming no -** error occurs the xInput method should copy up to (*pnData) bytes of data -** into the buffer and set (*pnData) to the actual number of bytes copied -** before returning SQLITE_OK. If the input is completely exhausted, (*pnData) -** should be set to zero to indicate this. Or, if an error occurs, an SQLite -** error code should be returned. In all cases, if an xInput callback returns -** an error, all processing is abandoned and the streaming API function -** returns a copy of the error code to the caller. -** -** In the case of sqlite3changeset_start_strm(), the xInput callback may be -** invoked by the sessions module at any point during the lifetime of the -** iterator. If such an xInput callback returns an error, the iterator enters -** an error state, whereby all subsequent calls to iterator functions -** immediately fail with the same error code as returned by xInput. -** -** Similarly, streaming API functions that return changesets (or patchsets) -** return them in chunks by way of a callback function instead of via a -** pointer to a single large buffer. In this case, a pair of parameters such -** as: -** -**
-**        int *pnChangeset,
-**        void **ppChangeset,
-**  
-** -** Is replaced by: -** -**
-**        int (*xOutput)(void *pOut, const void *pData, int nData),
-**        void *pOut
-**  
-** -** The xOutput callback is invoked zero or more times to return data to -** the application. The first parameter passed to each call is a copy of the -** pOut pointer supplied by the application. The second parameter, pData, -** points to a buffer nData bytes in size containing the chunk of output -** data being returned. If the xOutput callback successfully processes the -** supplied data, it should return SQLITE_OK to indicate success. Otherwise, -** it should return some other SQLite error code. In this case processing -** is immediately abandoned and the streaming API function returns a copy -** of the xOutput error code to the application. -** -** The sessions module never invokes an xOutput callback with the third -** parameter set to a value less than or equal to zero. Other than this, -** no guarantees are made as to the size of the chunks of data returned. -*/ -int sqlite3changeset_apply_strm( - sqlite3 *db, /* Apply change to "main" db of this handle */ - int (*xInput)(void *pIn, void *pData, int *pnData), /* Input function */ - void *pIn, /* First arg for xInput */ - int(*xFilter)( - void *pCtx, /* Copy of sixth arg to _apply() */ - const char *zTab /* Table name */ - ), - int(*xConflict)( - void *pCtx, /* Copy of sixth arg to _apply() */ - int eConflict, /* DATA, MISSING, CONFLICT, CONSTRAINT */ - sqlite3_changeset_iter *p /* Handle describing change and conflict */ - ), - void *pCtx /* First argument passed to xConflict */ -); -int sqlite3changeset_concat_strm( - int (*xInputA)(void *pIn, void *pData, int *pnData), - void *pInA, - int (*xInputB)(void *pIn, void *pData, int *pnData), - void *pInB, - int (*xOutput)(void *pOut, const void *pData, int nData), - void *pOut -); -int sqlite3changeset_invert_strm( - int (*xInput)(void *pIn, void *pData, int *pnData), - void *pIn, - int (*xOutput)(void *pOut, const void *pData, int nData), - void *pOut -); -int sqlite3changeset_start_strm( - sqlite3_changeset_iter **pp, - int (*xInput)(void *pIn, void *pData, int *pnData), - void *pIn -); -int sqlite3session_changeset_strm( - sqlite3_session *pSession, - int (*xOutput)(void *pOut, const void *pData, int nData), - void *pOut -); -int sqlite3session_patchset_strm( - sqlite3_session *pSession, - int (*xOutput)(void *pOut, const void *pData, int nData), - void *pOut -); -int sqlite3changegroup_add_strm(sqlite3_changegroup*, - int (*xInput)(void *pIn, void *pData, int *pnData), - void *pIn -); -int sqlite3changegroup_output_strm(sqlite3_changegroup*, - int (*xOutput)(void *pOut, const void *pData, int nData), - void *pOut -); - - -/* -** Make sure we can call this stuff from C++. -*/ -#ifdef __cplusplus -} -#endif - -#endif /* !defined(__SQLITESESSION_H_) && defined(SQLITE_ENABLE_SESSION) */ - -/******** End of sqlite3session.h *********/ -/******** Begin file fts5.h *********/ -/* -** 2014 May 31 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -** -** Interfaces to extend FTS5. Using the interfaces defined in this file, -** FTS5 may be extended with: -** -** * custom tokenizers, and -** * custom auxiliary functions. -*/ - - -#ifndef _FTS5_H -#define _FTS5_H - - -#ifdef __cplusplus -extern "C" { -#endif - -/************************************************************************* -** CUSTOM AUXILIARY FUNCTIONS -** -** Virtual table implementations may overload SQL functions by implementing -** the sqlite3_module.xFindFunction() method. -*/ - -typedef struct Fts5ExtensionApi Fts5ExtensionApi; -typedef struct Fts5Context Fts5Context; -typedef struct Fts5PhraseIter Fts5PhraseIter; - -typedef void (*fts5_extension_function)( - const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ - Fts5Context *pFts, /* First arg to pass to pApi functions */ - sqlite3_context *pCtx, /* Context for returning result/error */ - int nVal, /* Number of values in apVal[] array */ - sqlite3_value **apVal /* Array of trailing arguments */ -); - -struct Fts5PhraseIter { - const unsigned char *a; - const unsigned char *b; -}; - -/* -** EXTENSION API FUNCTIONS -** -** xUserData(pFts): -** Return a copy of the context pointer the extension function was -** registered with. -** -** xColumnTotalSize(pFts, iCol, pnToken): -** If parameter iCol is less than zero, set output variable *pnToken -** to the total number of tokens in the FTS5 table. Or, if iCol is -** non-negative but less than the number of columns in the table, return -** the total number of tokens in column iCol, considering all rows in -** the FTS5 table. -** -** If parameter iCol is greater than or equal to the number of columns -** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. -** an OOM condition or IO error), an appropriate SQLite error code is -** returned. -** -** xColumnCount(pFts): -** Return the number of columns in the table. -** -** xColumnSize(pFts, iCol, pnToken): -** If parameter iCol is less than zero, set output variable *pnToken -** to the total number of tokens in the current row. Or, if iCol is -** non-negative but less than the number of columns in the table, set -** *pnToken to the number of tokens in column iCol of the current row. -** -** If parameter iCol is greater than or equal to the number of columns -** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. -** an OOM condition or IO error), an appropriate SQLite error code is -** returned. -** -** This function may be quite inefficient if used with an FTS5 table -** created with the "columnsize=0" option. -** -** xColumnText: -** This function attempts to retrieve the text of column iCol of the -** current document. If successful, (*pz) is set to point to a buffer -** containing the text in utf-8 encoding, (*pn) is set to the size in bytes -** (not characters) of the buffer and SQLITE_OK is returned. Otherwise, -** if an error occurs, an SQLite error code is returned and the final values -** of (*pz) and (*pn) are undefined. -** -** xPhraseCount: -** Returns the number of phrases in the current query expression. -** -** xPhraseSize: -** Returns the number of tokens in phrase iPhrase of the query. Phrases -** are numbered starting from zero. -** -** xInstCount: -** Set *pnInst to the total number of occurrences of all phrases within -** the query within the current row. Return SQLITE_OK if successful, or -** an error code (i.e. SQLITE_NOMEM) if an error occurs. -** -** This API can be quite slow if used with an FTS5 table created with the -** "detail=none" or "detail=column" option. If the FTS5 table is created -** with either "detail=none" or "detail=column" and "content=" option -** (i.e. if it is a contentless table), then this API always returns 0. -** -** xInst: -** Query for the details of phrase match iIdx within the current row. -** Phrase matches are numbered starting from zero, so the iIdx argument -** should be greater than or equal to zero and smaller than the value -** output by xInstCount(). -** -** Usually, output parameter *piPhrase is set to the phrase number, *piCol -** to the column in which it occurs and *piOff the token offset of the -** first token of the phrase. The exception is if the table was created -** with the offsets=0 option specified. In this case *piOff is always -** set to -1. -** -** Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM) -** if an error occurs. -** -** This API can be quite slow if used with an FTS5 table created with the -** "detail=none" or "detail=column" option. -** -** xRowid: -** Returns the rowid of the current row. -** -** xTokenize: -** Tokenize text using the tokenizer belonging to the FTS5 table. -** -** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): -** This API function is used to query the FTS table for phrase iPhrase -** of the current query. Specifically, a query equivalent to: -** -** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid -** -** with $p set to a phrase equivalent to the phrase iPhrase of the -** current query is executed. Any column filter that applies to -** phrase iPhrase of the current query is included in $p. For each -** row visited, the callback function passed as the fourth argument -** is invoked. The context and API objects passed to the callback -** function may be used to access the properties of each matched row. -** Invoking Api.xUserData() returns a copy of the pointer passed as -** the third argument to pUserData. -** -** If the callback function returns any value other than SQLITE_OK, the -** query is abandoned and the xQueryPhrase function returns immediately. -** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK. -** Otherwise, the error code is propagated upwards. -** -** If the query runs to completion without incident, SQLITE_OK is returned. -** Or, if some error occurs before the query completes or is aborted by -** the callback, an SQLite error code is returned. -** -** -** xSetAuxdata(pFts5, pAux, xDelete) -** -** Save the pointer passed as the second argument as the extension functions -** "auxiliary data". The pointer may then be retrieved by the current or any -** future invocation of the same fts5 extension function made as part of -** of the same MATCH query using the xGetAuxdata() API. -** -** Each extension function is allocated a single auxiliary data slot for -** each FTS query (MATCH expression). If the extension function is invoked -** more than once for a single FTS query, then all invocations share a -** single auxiliary data context. -** -** If there is already an auxiliary data pointer when this function is -** invoked, then it is replaced by the new pointer. If an xDelete callback -** was specified along with the original pointer, it is invoked at this -** point. -** -** The xDelete callback, if one is specified, is also invoked on the -** auxiliary data pointer after the FTS5 query has finished. -** -** If an error (e.g. an OOM condition) occurs within this function, an -** the auxiliary data is set to NULL and an error code returned. If the -** xDelete parameter was not NULL, it is invoked on the auxiliary data -** pointer before returning. -** -** -** xGetAuxdata(pFts5, bClear) -** -** Returns the current auxiliary data pointer for the fts5 extension -** function. See the xSetAuxdata() method for details. -** -** If the bClear argument is non-zero, then the auxiliary data is cleared -** (set to NULL) before this function returns. In this case the xDelete, -** if any, is not invoked. -** -** -** xRowCount(pFts5, pnRow) -** -** This function is used to retrieve the total number of rows in the table. -** In other words, the same value that would be returned by: -** -** SELECT count(*) FROM ftstable; -** -** xPhraseFirst() -** This function is used, along with type Fts5PhraseIter and the xPhraseNext -** method, to iterate through all instances of a single query phrase within -** the current row. This is the same information as is accessible via the -** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient -** to use, this API may be faster under some circumstances. To iterate -** through instances of phrase iPhrase, use the following code: -** -** Fts5PhraseIter iter; -** int iCol, iOff; -** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff); -** iCol>=0; -** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff) -** ){ -** // An instance of phrase iPhrase at offset iOff of column iCol -** } -** -** The Fts5PhraseIter structure is defined above. Applications should not -** modify this structure directly - it should only be used as shown above -** with the xPhraseFirst() and xPhraseNext() API methods (and by -** xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below). -** -** This API can be quite slow if used with an FTS5 table created with the -** "detail=none" or "detail=column" option. If the FTS5 table is created -** with either "detail=none" or "detail=column" and "content=" option -** (i.e. if it is a contentless table), then this API always iterates -** through an empty set (all calls to xPhraseFirst() set iCol to -1). -** -** xPhraseNext() -** See xPhraseFirst above. -** -** xPhraseFirstColumn() -** This function and xPhraseNextColumn() are similar to the xPhraseFirst() -** and xPhraseNext() APIs described above. The difference is that instead -** of iterating through all instances of a phrase in the current row, these -** APIs are used to iterate through the set of columns in the current row -** that contain one or more instances of a specified phrase. For example: -** -** Fts5PhraseIter iter; -** int iCol; -** for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol); -** iCol>=0; -** pApi->xPhraseNextColumn(pFts, &iter, &iCol) -** ){ -** // Column iCol contains at least one instance of phrase iPhrase -** } -** -** This API can be quite slow if used with an FTS5 table created with the -** "detail=none" option. If the FTS5 table is created with either -** "detail=none" "content=" option (i.e. if it is a contentless table), -** then this API always iterates through an empty set (all calls to -** xPhraseFirstColumn() set iCol to -1). -** -** The information accessed using this API and its companion -** xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext -** (or xInst/xInstCount). The chief advantage of this API is that it is -** significantly more efficient than those alternatives when used with -** "detail=column" tables. -** -** xPhraseNextColumn() -** See xPhraseFirstColumn above. -*/ -struct Fts5ExtensionApi { - int iVersion; /* Currently always set to 3 */ - - void *(*xUserData)(Fts5Context*); - - int (*xColumnCount)(Fts5Context*); - int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); - int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); - - int (*xTokenize)(Fts5Context*, - const char *pText, int nText, /* Text to tokenize */ - void *pCtx, /* Context passed to xToken() */ - int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ - ); - - int (*xPhraseCount)(Fts5Context*); - int (*xPhraseSize)(Fts5Context*, int iPhrase); - - int (*xInstCount)(Fts5Context*, int *pnInst); - int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff); - - sqlite3_int64 (*xRowid)(Fts5Context*); - int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn); - int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken); - - int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData, - int(*)(const Fts5ExtensionApi*,Fts5Context*,void*) - ); - int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*)); - void *(*xGetAuxdata)(Fts5Context*, int bClear); - - int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*); - void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff); - - int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*); - void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol); -}; - -/* -** CUSTOM AUXILIARY FUNCTIONS -*************************************************************************/ - -/************************************************************************* -** CUSTOM TOKENIZERS -** -** Applications may also register custom tokenizer types. A tokenizer -** is registered by providing fts5 with a populated instance of the -** following structure. All structure methods must be defined, setting -** any member of the fts5_tokenizer struct to NULL leads to undefined -** behaviour. The structure methods are expected to function as follows: -** -** xCreate: -** This function is used to allocate and inititalize a tokenizer instance. -** A tokenizer instance is required to actually tokenize text. -** -** The first argument passed to this function is a copy of the (void*) -** pointer provided by the application when the fts5_tokenizer object -** was registered with FTS5 (the third argument to xCreateTokenizer()). -** The second and third arguments are an array of nul-terminated strings -** containing the tokenizer arguments, if any, specified following the -** tokenizer name as part of the CREATE VIRTUAL TABLE statement used -** to create the FTS5 table. -** -** The final argument is an output variable. If successful, (*ppOut) -** should be set to point to the new tokenizer handle and SQLITE_OK -** returned. If an error occurs, some value other than SQLITE_OK should -** be returned. In this case, fts5 assumes that the final value of *ppOut -** is undefined. -** -** xDelete: -** This function is invoked to delete a tokenizer handle previously -** allocated using xCreate(). Fts5 guarantees that this function will -** be invoked exactly once for each successful call to xCreate(). -** -** xTokenize: -** This function is expected to tokenize the nText byte string indicated -** by argument pText. pText may or may not be nul-terminated. The first -** argument passed to this function is a pointer to an Fts5Tokenizer object -** returned by an earlier call to xCreate(). -** -** The second argument indicates the reason that FTS5 is requesting -** tokenization of the supplied text. This is always one of the following -** four values: -** -**
  • FTS5_TOKENIZE_DOCUMENT - A document is being inserted into -** or removed from the FTS table. The tokenizer is being invoked to -** determine the set of tokens to add to (or delete from) the -** FTS index. -** -**
  • FTS5_TOKENIZE_QUERY - A MATCH query is being executed -** against the FTS index. The tokenizer is being called to tokenize -** a bareword or quoted string specified as part of the query. -** -**
  • (FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX) - Same as -** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is -** followed by a "*" character, indicating that the last token -** returned by the tokenizer will be treated as a token prefix. -** -**
  • FTS5_TOKENIZE_AUX - The tokenizer is being invoked to -** satisfy an fts5_api.xTokenize() request made by an auxiliary -** function. Or an fts5_api.xColumnSize() request made by the same -** on a columnsize=0 database. -**
-** -** For each token in the input string, the supplied callback xToken() must -** be invoked. The first argument to it should be a copy of the pointer -** passed as the second argument to xTokenize(). The third and fourth -** arguments are a pointer to a buffer containing the token text, and the -** size of the token in bytes. The 4th and 5th arguments are the byte offsets -** of the first byte of and first byte immediately following the text from -** which the token is derived within the input. -** -** The second argument passed to the xToken() callback ("tflags") should -** normally be set to 0. The exception is if the tokenizer supports -** synonyms. In this case see the discussion below for details. -** -** FTS5 assumes the xToken() callback is invoked for each token in the -** order that they occur within the input text. -** -** If an xToken() callback returns any value other than SQLITE_OK, then -** the tokenization should be abandoned and the xTokenize() method should -** immediately return a copy of the xToken() return value. Or, if the -** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally, -** if an error occurs with the xTokenize() implementation itself, it -** may abandon the tokenization and return any error code other than -** SQLITE_OK or SQLITE_DONE. -** -** SYNONYM SUPPORT -** -** Custom tokenizers may also support synonyms. Consider a case in which a -** user wishes to query for a phrase such as "first place". Using the -** built-in tokenizers, the FTS5 query 'first + place' will match instances -** of "first place" within the document set, but not alternative forms -** such as "1st place". In some applications, it would be better to match -** all instances of "first place" or "1st place" regardless of which form -** the user specified in the MATCH query text. -** -** There are several ways to approach this in FTS5: -** -**
  1. By mapping all synonyms to a single token. In this case, the -** In the above example, this means that the tokenizer returns the -** same token for inputs "first" and "1st". Say that token is in -** fact "first", so that when the user inserts the document "I won -** 1st place" entries are added to the index for tokens "i", "won", -** "first" and "place". If the user then queries for '1st + place', -** the tokenizer substitutes "first" for "1st" and the query works -** as expected. -** -**
  2. By adding multiple synonyms for a single term to the FTS index. -** In this case, when tokenizing query text, the tokenizer may -** provide multiple synonyms for a single term within the document. -** FTS5 then queries the index for each synonym individually. For -** example, faced with the query: -** -** -** ... MATCH 'first place' -** -** the tokenizer offers both "1st" and "first" as synonyms for the -** first token in the MATCH query and FTS5 effectively runs a query -** similar to: -** -** -** ... MATCH '(first OR 1st) place' -** -** except that, for the purposes of auxiliary functions, the query -** still appears to contain just two phrases - "(first OR 1st)" -** being treated as a single phrase. -** -**
  3. By adding multiple synonyms for a single term to the FTS index. -** Using this method, when tokenizing document text, the tokenizer -** provides multiple synonyms for each token. So that when a -** document such as "I won first place" is tokenized, entries are -** added to the FTS index for "i", "won", "first", "1st" and -** "place". -** -** This way, even if the tokenizer does not provide synonyms -** when tokenizing query text (it should not - to do would be -** inefficient), it doesn't matter if the user queries for -** 'first + place' or '1st + place', as there are entires in the -** FTS index corresponding to both forms of the first token. -**
-** -** Whether it is parsing document or query text, any call to xToken that -** specifies a tflags argument with the FTS5_TOKEN_COLOCATED bit -** is considered to supply a synonym for the previous token. For example, -** when parsing the document "I won first place", a tokenizer that supports -** synonyms would call xToken() 5 times, as follows: -** -** -** xToken(pCtx, 0, "i", 1, 0, 1); -** xToken(pCtx, 0, "won", 3, 2, 5); -** xToken(pCtx, 0, "first", 5, 6, 11); -** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11); -** xToken(pCtx, 0, "place", 5, 12, 17); -** -** -** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time -** xToken() is called. Multiple synonyms may be specified for a single token -** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence. -** There is no limit to the number of synonyms that may be provided for a -** single token. -** -** In many cases, method (1) above is the best approach. It does not add -** extra data to the FTS index or require FTS5 to query for multiple terms, -** so it is efficient in terms of disk space and query speed. However, it -** does not support prefix queries very well. If, as suggested above, the -** token "first" is subsituted for "1st" by the tokenizer, then the query: -** -** -** ... MATCH '1s*' -** -** will not match documents that contain the token "1st" (as the tokenizer -** will probably not map "1s" to any prefix of "first"). -** -** For full prefix support, method (3) may be preferred. In this case, -** because the index contains entries for both "first" and "1st", prefix -** queries such as 'fi*' or '1s*' will match correctly. However, because -** extra entries are added to the FTS index, this method uses more space -** within the database. -** -** Method (2) offers a midpoint between (1) and (3). Using this method, -** a query such as '1s*' will match documents that contain the literal -** token "1st", but not "first" (assuming the tokenizer is not able to -** provide synonyms for prefixes). However, a non-prefix query like '1st' -** will match against "1st" and "first". This method does not require -** extra disk space, as no extra entries are added to the FTS index. -** On the other hand, it may require more CPU cycles to run MATCH queries, -** as separate queries of the FTS index are required for each synonym. -** -** When using methods (2) or (3), it is important that the tokenizer only -** provide synonyms when tokenizing document text (method (2)) or query -** text (method (3)), not both. Doing so will not cause any errors, but is -** inefficient. -*/ -typedef struct Fts5Tokenizer Fts5Tokenizer; -typedef struct fts5_tokenizer fts5_tokenizer; -struct fts5_tokenizer { - int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); - void (*xDelete)(Fts5Tokenizer*); - int (*xTokenize)(Fts5Tokenizer*, - void *pCtx, - int flags, /* Mask of FTS5_TOKENIZE_* flags */ - const char *pText, int nText, - int (*xToken)( - void *pCtx, /* Copy of 2nd argument to xTokenize() */ - int tflags, /* Mask of FTS5_TOKEN_* flags */ - const char *pToken, /* Pointer to buffer containing token */ - int nToken, /* Size of token in bytes */ - int iStart, /* Byte offset of token within input text */ - int iEnd /* Byte offset of end of token within input text */ - ) - ); -}; - -/* Flags that may be passed as the third argument to xTokenize() */ -#define FTS5_TOKENIZE_QUERY 0x0001 -#define FTS5_TOKENIZE_PREFIX 0x0002 -#define FTS5_TOKENIZE_DOCUMENT 0x0004 -#define FTS5_TOKENIZE_AUX 0x0008 - -/* Flags that may be passed by the tokenizer implementation back to FTS5 -** as the third argument to the supplied xToken callback. */ -#define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */ - -/* -** END OF CUSTOM TOKENIZERS -*************************************************************************/ - -/************************************************************************* -** FTS5 EXTENSION REGISTRATION API -*/ -typedef struct fts5_api fts5_api; -struct fts5_api { - int iVersion; /* Currently always set to 2 */ - - /* Create a new tokenizer */ - int (*xCreateTokenizer)( - fts5_api *pApi, - const char *zName, - void *pContext, - fts5_tokenizer *pTokenizer, - void (*xDestroy)(void*) - ); - - /* Find an existing tokenizer */ - int (*xFindTokenizer)( - fts5_api *pApi, - const char *zName, - void **ppContext, - fts5_tokenizer *pTokenizer - ); - - /* Create a new auxiliary function */ - int (*xCreateFunction)( - fts5_api *pApi, - const char *zName, - void *pContext, - fts5_extension_function xFunction, - void (*xDestroy)(void*) - ); -}; - -/* -** END OF REGISTRATION API -*************************************************************************/ - -#ifdef __cplusplus -} /* end of the 'extern "C"' block */ -#endif - -#endif /* _FTS5_H */ - - -/******** End of fts5.h *********/ diff --git a/vendor/pyLibrary/vendor/sqlite/sqlite3_32.def b/vendor/pyLibrary/vendor/sqlite/sqlite3_32.def deleted file mode 100644 index 0ab28af..0000000 --- a/vendor/pyLibrary/vendor/sqlite/sqlite3_32.def +++ /dev/null @@ -1,237 +0,0 @@ -EXPORTS -sqlite3_aggregate_context -sqlite3_aggregate_count -sqlite3_auto_extension -sqlite3_backup_finish -sqlite3_backup_init -sqlite3_backup_pagecount -sqlite3_backup_remaining -sqlite3_backup_step -sqlite3_bind_blob -sqlite3_bind_blob64 -sqlite3_bind_double -sqlite3_bind_int -sqlite3_bind_int64 -sqlite3_bind_null -sqlite3_bind_parameter_count -sqlite3_bind_parameter_index -sqlite3_bind_parameter_name -sqlite3_bind_text -sqlite3_bind_text16 -sqlite3_bind_text64 -sqlite3_bind_value -sqlite3_bind_zeroblob -sqlite3_bind_zeroblob64 -sqlite3_blob_bytes -sqlite3_blob_close -sqlite3_blob_open -sqlite3_blob_read -sqlite3_blob_reopen -sqlite3_blob_write -sqlite3_busy_handler -sqlite3_busy_timeout -sqlite3_cancel_auto_extension -sqlite3_changes -sqlite3_clear_bindings -sqlite3_close -sqlite3_close_v2 -sqlite3_collation_needed -sqlite3_collation_needed16 -sqlite3_column_blob -sqlite3_column_bytes -sqlite3_column_bytes16 -sqlite3_column_count -sqlite3_column_database_name -sqlite3_column_database_name16 -sqlite3_column_decltype -sqlite3_column_decltype16 -sqlite3_column_double -sqlite3_column_int -sqlite3_column_int64 -sqlite3_column_name -sqlite3_column_name16 -sqlite3_column_origin_name -sqlite3_column_origin_name16 -sqlite3_column_table_name -sqlite3_column_table_name16 -sqlite3_column_text -sqlite3_column_text16 -sqlite3_column_type -sqlite3_column_value -sqlite3_commit_hook -sqlite3_compileoption_get -sqlite3_compileoption_used -sqlite3_complete -sqlite3_complete16 -sqlite3_config -sqlite3_context_db_handle -sqlite3_create_collation -sqlite3_create_collation_v2 -sqlite3_create_collation16 -sqlite3_create_function -sqlite3_create_function_v2 -sqlite3_create_function16 -sqlite3_create_module -sqlite3_create_module_v2 -sqlite3_data_count -sqlite3_data_directory -sqlite3_db_cacheflush -sqlite3_db_config -sqlite3_db_filename -sqlite3_db_handle -sqlite3_db_mutex -sqlite3_db_readonly -sqlite3_db_release_memory -sqlite3_db_status -sqlite3_declare_vtab -sqlite3_enable_load_extension -sqlite3_enable_shared_cache -sqlite3_errcode -sqlite3_errmsg -sqlite3_errmsg16 -sqlite3_errstr -sqlite3_exec -sqlite3_expired -sqlite3_extended_errcode -sqlite3_extended_result_codes -sqlite3_file_control -sqlite3_finalize -sqlite3_free -sqlite3_free_table -sqlite3_fts5_may_be_corrupt -sqlite3_get_autocommit -sqlite3_get_auxdata -sqlite3_get_table -sqlite3_global_recover -sqlite3_initialize -sqlite3_interrupt -sqlite3_last_insert_rowid -sqlite3_libversion -sqlite3_libversion_number -sqlite3_limit -sqlite3_load_extension -sqlite3_log -sqlite3_malloc -sqlite3_malloc64 -sqlite3_memory_alarm -sqlite3_memory_highwater -sqlite3_memory_used -sqlite3_mprintf -sqlite3_msize -sqlite3_mutex_alloc -sqlite3_mutex_enter -sqlite3_mutex_free -sqlite3_mutex_leave -sqlite3_mutex_try -sqlite3_next_stmt -sqlite3_open -sqlite3_open_v2 -sqlite3_open16 -sqlite3_os_end -sqlite3_os_init -sqlite3_overload_function -sqlite3_prepare -sqlite3_prepare_v2 -sqlite3_prepare16 -sqlite3_prepare16_v2 -sqlite3_profile -sqlite3_progress_handler -sqlite3_randomness -sqlite3_realloc -sqlite3_realloc64 -sqlite3_release_memory -sqlite3_reset -sqlite3_reset_auto_extension -sqlite3_result_blob -sqlite3_result_blob64 -sqlite3_result_double -sqlite3_result_error -sqlite3_result_error_code -sqlite3_result_error_nomem -sqlite3_result_error_toobig -sqlite3_result_error16 -sqlite3_result_int -sqlite3_result_int64 -sqlite3_result_null -sqlite3_result_subtype -sqlite3_result_text -sqlite3_result_text16 -sqlite3_result_text16be -sqlite3_result_text16le -sqlite3_result_text64 -sqlite3_result_value -sqlite3_result_zeroblob -sqlite3_result_zeroblob64 -sqlite3_rollback_hook -sqlite3_rtree_geometry_callback -sqlite3_rtree_query_callback -sqlite3_set_authorizer -sqlite3_set_auxdata -sqlite3_shutdown -sqlite3_sleep -sqlite3_snprintf -sqlite3_soft_heap_limit -sqlite3_soft_heap_limit64 -sqlite3_sourceid -sqlite3_sql -sqlite3_status -sqlite3_status64 -sqlite3_step -sqlite3_stmt_busy -sqlite3_stmt_readonly -sqlite3_stmt_status -sqlite3_strglob -sqlite3_stricmp -sqlite3_strlike -sqlite3_strnicmp -sqlite3_system_errno -sqlite3_table_column_metadata -sqlite3_temp_directory -sqlite3_test_control -sqlite3_thread_cleanup -sqlite3_threadsafe -sqlite3_total_changes -sqlite3_trace -sqlite3_transfer_bindings -sqlite3_update_hook -sqlite3_uri_boolean -sqlite3_uri_int64 -sqlite3_uri_parameter -sqlite3_user_data -sqlite3_value_blob -sqlite3_value_bytes -sqlite3_value_bytes16 -sqlite3_value_double -sqlite3_value_dup -sqlite3_value_free -sqlite3_value_int -sqlite3_value_int64 -sqlite3_value_numeric_type -sqlite3_value_subtype -sqlite3_value_text -sqlite3_value_text16 -sqlite3_value_text16be -sqlite3_value_text16le -sqlite3_value_type -sqlite3_version -sqlite3_vfs_find -sqlite3_vfs_register -sqlite3_vfs_unregister -sqlite3_vmprintf -sqlite3_vsnprintf -sqlite3_vtab_config -sqlite3_vtab_on_conflict -sqlite3_wal_autocheckpoint -sqlite3_wal_checkpoint -sqlite3_wal_checkpoint_v2 -sqlite3_wal_hook -sqlite3_win32_is_nt -sqlite3_win32_mbcs_to_utf8 -sqlite3_win32_mbcs_to_utf8_v2 -sqlite3_win32_set_directory -sqlite3_win32_sleep -sqlite3_win32_unicode_to_utf8 -sqlite3_win32_utf8_to_mbcs -sqlite3_win32_utf8_to_mbcs_v2 -sqlite3_win32_utf8_to_unicode -sqlite3_win32_write_debug diff --git a/vendor/pyLibrary/vendor/sqlite/sqlite3_32.dll b/vendor/pyLibrary/vendor/sqlite/sqlite3_32.dll deleted file mode 100644 index 9ef02de..0000000 Binary files a/vendor/pyLibrary/vendor/sqlite/sqlite3_32.dll and /dev/null differ diff --git a/vendor/pyLibrary/vendor/sqlite/sqlite3_64.def b/vendor/pyLibrary/vendor/sqlite/sqlite3_64.def deleted file mode 100644 index f024e1d..0000000 --- a/vendor/pyLibrary/vendor/sqlite/sqlite3_64.def +++ /dev/null @@ -1,248 +0,0 @@ -EXPORTS -sqlite3_aggregate_context -sqlite3_aggregate_count -sqlite3_auto_extension -sqlite3_backup_finish -sqlite3_backup_init -sqlite3_backup_pagecount -sqlite3_backup_remaining -sqlite3_backup_step -sqlite3_bind_blob -sqlite3_bind_blob64 -sqlite3_bind_double -sqlite3_bind_int -sqlite3_bind_int64 -sqlite3_bind_null -sqlite3_bind_parameter_count -sqlite3_bind_parameter_index -sqlite3_bind_parameter_name -sqlite3_bind_pointer -sqlite3_bind_text -sqlite3_bind_text16 -sqlite3_bind_text64 -sqlite3_bind_value -sqlite3_bind_zeroblob -sqlite3_bind_zeroblob64 -sqlite3_blob_bytes -sqlite3_blob_close -sqlite3_blob_open -sqlite3_blob_read -sqlite3_blob_reopen -sqlite3_blob_write -sqlite3_busy_handler -sqlite3_busy_timeout -sqlite3_cancel_auto_extension -sqlite3_changes -sqlite3_clear_bindings -sqlite3_close -sqlite3_close_v2 -sqlite3_collation_needed -sqlite3_collation_needed16 -sqlite3_column_blob -sqlite3_column_bytes -sqlite3_column_bytes16 -sqlite3_column_count -sqlite3_column_database_name -sqlite3_column_database_name16 -sqlite3_column_decltype -sqlite3_column_decltype16 -sqlite3_column_double -sqlite3_column_int -sqlite3_column_int64 -sqlite3_column_name -sqlite3_column_name16 -sqlite3_column_origin_name -sqlite3_column_origin_name16 -sqlite3_column_table_name -sqlite3_column_table_name16 -sqlite3_column_text -sqlite3_column_text16 -sqlite3_column_type -sqlite3_column_value -sqlite3_commit_hook -sqlite3_compileoption_get -sqlite3_compileoption_used -sqlite3_complete -sqlite3_complete16 -sqlite3_config -sqlite3_context_db_handle -sqlite3_create_collation -sqlite3_create_collation_v2 -sqlite3_create_collation16 -sqlite3_create_function -sqlite3_create_function_v2 -sqlite3_create_function16 -sqlite3_create_module -sqlite3_create_module_v2 -sqlite3_data_count -sqlite3_data_directory -sqlite3_db_cacheflush -sqlite3_db_config -sqlite3_db_filename -sqlite3_db_handle -sqlite3_db_mutex -sqlite3_db_readonly -sqlite3_db_release_memory -sqlite3_db_status -sqlite3_declare_vtab -sqlite3_enable_load_extension -sqlite3_enable_shared_cache -sqlite3_errcode -sqlite3_errmsg -sqlite3_errmsg16 -sqlite3_errstr -sqlite3_exec -sqlite3_expanded_sql -sqlite3_expired -sqlite3_extended_errcode -sqlite3_extended_result_codes -sqlite3_file_control -sqlite3_finalize -sqlite3_free -sqlite3_free_table -sqlite3_fts5_may_be_corrupt -sqlite3_get_autocommit -sqlite3_get_auxdata -sqlite3_get_table -sqlite3_global_recover -sqlite3_initialize -sqlite3_interrupt -sqlite3_last_insert_rowid -sqlite3_libversion -sqlite3_libversion_number -sqlite3_limit -sqlite3_load_extension -sqlite3_log -sqlite3_malloc -sqlite3_malloc64 -sqlite3_memory_alarm -sqlite3_memory_highwater -sqlite3_memory_used -sqlite3_mprintf -sqlite3_msize -sqlite3_mutex_alloc -sqlite3_mutex_enter -sqlite3_mutex_free -sqlite3_mutex_leave -sqlite3_mutex_try -sqlite3_next_stmt -sqlite3_open -sqlite3_open_v2 -sqlite3_open16 -sqlite3_os_end -sqlite3_os_init -sqlite3_overload_function -sqlite3_prepare -sqlite3_prepare_v2 -sqlite3_prepare_v3 -sqlite3_prepare16 -sqlite3_prepare16_v2 -sqlite3_prepare16_v3 -sqlite3_profile -sqlite3_progress_handler -sqlite3_randomness -sqlite3_realloc -sqlite3_realloc64 -sqlite3_release_memory -sqlite3_reset -sqlite3_reset_auto_extension -sqlite3_result_blob -sqlite3_result_blob64 -sqlite3_result_double -sqlite3_result_error -sqlite3_result_error_code -sqlite3_result_error_nomem -sqlite3_result_error_toobig -sqlite3_result_error16 -sqlite3_result_int -sqlite3_result_int64 -sqlite3_result_null -sqlite3_result_pointer -sqlite3_result_subtype -sqlite3_result_text -sqlite3_result_text16 -sqlite3_result_text16be -sqlite3_result_text16le -sqlite3_result_text64 -sqlite3_result_value -sqlite3_result_zeroblob -sqlite3_result_zeroblob64 -sqlite3_rollback_hook -sqlite3_rtree_geometry_callback -sqlite3_rtree_query_callback -sqlite3_set_authorizer -sqlite3_set_auxdata -sqlite3_set_last_insert_rowid -sqlite3_shutdown -sqlite3_sleep -sqlite3_snprintf -sqlite3_soft_heap_limit -sqlite3_soft_heap_limit64 -sqlite3_sourceid -sqlite3_sql -sqlite3_status -sqlite3_status64 -sqlite3_step -sqlite3_stmt_busy -sqlite3_stmt_readonly -sqlite3_stmt_status -sqlite3_strglob -sqlite3_stricmp -sqlite3_strlike -sqlite3_strnicmp -sqlite3_system_errno -sqlite3_table_column_metadata -sqlite3_temp_directory -sqlite3_test_control -sqlite3_thread_cleanup -sqlite3_threadsafe -sqlite3_total_changes -sqlite3_trace -sqlite3_trace_v2 -sqlite3_transfer_bindings -sqlite3_update_hook -sqlite3_uri_boolean -sqlite3_uri_int64 -sqlite3_uri_parameter -sqlite3_user_data -sqlite3_value_blob -sqlite3_value_bytes -sqlite3_value_bytes16 -sqlite3_value_double -sqlite3_value_dup -sqlite3_value_free -sqlite3_value_int -sqlite3_value_int64 -sqlite3_value_nochange -sqlite3_value_numeric_type -sqlite3_value_pointer -sqlite3_value_subtype -sqlite3_value_text -sqlite3_value_text16 -sqlite3_value_text16be -sqlite3_value_text16le -sqlite3_value_type -sqlite3_version -sqlite3_vfs_find -sqlite3_vfs_register -sqlite3_vfs_unregister -sqlite3_vmprintf -sqlite3_vsnprintf -sqlite3_vtab_collation -sqlite3_vtab_config -sqlite3_vtab_nochange -sqlite3_vtab_on_conflict -sqlite3_wal_autocheckpoint -sqlite3_wal_checkpoint -sqlite3_wal_checkpoint_v2 -sqlite3_wal_hook -sqlite3_win32_is_nt -sqlite3_win32_mbcs_to_utf8 -sqlite3_win32_mbcs_to_utf8_v2 -sqlite3_win32_set_directory -sqlite3_win32_sleep -sqlite3_win32_unicode_to_utf8 -sqlite3_win32_utf8_to_mbcs -sqlite3_win32_utf8_to_mbcs_v2 -sqlite3_win32_utf8_to_unicode -sqlite3_win32_write_debug diff --git a/vendor/pyLibrary/vendor/sqlite/sqlite3_64.dll b/vendor/pyLibrary/vendor/sqlite/sqlite3_64.dll deleted file mode 100644 index e43c0ed..0000000 Binary files a/vendor/pyLibrary/vendor/sqlite/sqlite3_64.dll and /dev/null differ diff --git a/vendor/pyLibrary/vendor/sqlite/sqlite3ext.h b/vendor/pyLibrary/vendor/sqlite/sqlite3ext.h deleted file mode 100644 index dd1ea4b..0000000 --- a/vendor/pyLibrary/vendor/sqlite/sqlite3ext.h +++ /dev/null @@ -1,527 +0,0 @@ -/* -** 2006 June 7 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This header file defines the SQLite interface for use by -** shared libraries that want to be imported as extensions into -** an SQLite instance. Shared libraries that intend to be loaded -** as extensions by SQLite should #include this file instead of -** sqlite3.h. -*/ -#ifndef _SQLITE3EXT_H_ -#define _SQLITE3EXT_H_ -#include "sqlite3.h" - -typedef struct sqlite3_api_routines sqlite3_api_routines; - -/* -** The following structure holds pointers to all of the SQLite API -** routines. -** -** WARNING: In order to maintain backwards compatibility, add new -** interfaces to the end of this structure only. If you insert new -** interfaces in the middle of this structure, then older different -** versions of SQLite will not be able to load each other's shared -** libraries! -*/ -struct sqlite3_api_routines { - void * (*aggregate_context)(sqlite3_context*,int nBytes); - int (*aggregate_count)(sqlite3_context*); - int (*bind_blob)(sqlite3_stmt*,int,const void*,int n,void(*)(void*)); - int (*bind_double)(sqlite3_stmt*,int,double); - int (*bind_int)(sqlite3_stmt*,int,int); - int (*bind_int64)(sqlite3_stmt*,int,sqlite_int64); - int (*bind_null)(sqlite3_stmt*,int); - int (*bind_parameter_count)(sqlite3_stmt*); - int (*bind_parameter_index)(sqlite3_stmt*,const char*zName); - const char * (*bind_parameter_name)(sqlite3_stmt*,int); - int (*bind_text)(sqlite3_stmt*,int,const char*,int n,void(*)(void*)); - int (*bind_text16)(sqlite3_stmt*,int,const void*,int,void(*)(void*)); - int (*bind_value)(sqlite3_stmt*,int,const sqlite3_value*); - int (*busy_handler)(sqlite3*,int(*)(void*,int),void*); - int (*busy_timeout)(sqlite3*,int ms); - int (*changes)(sqlite3*); - int (*close)(sqlite3*); - int (*collation_needed)(sqlite3*,void*,void(*)(void*,sqlite3*, - int eTextRep,const char*)); - int (*collation_needed16)(sqlite3*,void*,void(*)(void*,sqlite3*, - int eTextRep,const void*)); - const void * (*column_blob)(sqlite3_stmt*,int iCol); - int (*column_bytes)(sqlite3_stmt*,int iCol); - int (*column_bytes16)(sqlite3_stmt*,int iCol); - int (*column_count)(sqlite3_stmt*pStmt); - const char * (*column_database_name)(sqlite3_stmt*,int); - const void * (*column_database_name16)(sqlite3_stmt*,int); - const char * (*column_decltype)(sqlite3_stmt*,int i); - const void * (*column_decltype16)(sqlite3_stmt*,int); - double (*column_double)(sqlite3_stmt*,int iCol); - int (*column_int)(sqlite3_stmt*,int iCol); - sqlite_int64 (*column_int64)(sqlite3_stmt*,int iCol); - const char * (*column_name)(sqlite3_stmt*,int); - const void * (*column_name16)(sqlite3_stmt*,int); - const char * (*column_origin_name)(sqlite3_stmt*,int); - const void * (*column_origin_name16)(sqlite3_stmt*,int); - const char * (*column_table_name)(sqlite3_stmt*,int); - const void * (*column_table_name16)(sqlite3_stmt*,int); - const unsigned char * (*column_text)(sqlite3_stmt*,int iCol); - const void * (*column_text16)(sqlite3_stmt*,int iCol); - int (*column_type)(sqlite3_stmt*,int iCol); - sqlite3_value* (*column_value)(sqlite3_stmt*,int iCol); - void * (*commit_hook)(sqlite3*,int(*)(void*),void*); - int (*complete)(const char*sql); - int (*complete16)(const void*sql); - int (*create_collation)(sqlite3*,const char*,int,void*, - int(*)(void*,int,const void*,int,const void*)); - int (*create_collation16)(sqlite3*,const void*,int,void*, - int(*)(void*,int,const void*,int,const void*)); - int (*create_function)(sqlite3*,const char*,int,int,void*, - void (*xFunc)(sqlite3_context*,int,sqlite3_value**), - void (*xStep)(sqlite3_context*,int,sqlite3_value**), - void (*xFinal)(sqlite3_context*)); - int (*create_function16)(sqlite3*,const void*,int,int,void*, - void (*xFunc)(sqlite3_context*,int,sqlite3_value**), - void (*xStep)(sqlite3_context*,int,sqlite3_value**), - void (*xFinal)(sqlite3_context*)); - int (*create_module)(sqlite3*,const char*,const sqlite3_module*,void*); - int (*data_count)(sqlite3_stmt*pStmt); - sqlite3 * (*db_handle)(sqlite3_stmt*); - int (*declare_vtab)(sqlite3*,const char*); - int (*enable_shared_cache)(int); - int (*errcode)(sqlite3*db); - const char * (*errmsg)(sqlite3*); - const void * (*errmsg16)(sqlite3*); - int (*exec)(sqlite3*,const char*,sqlite3_callback,void*,char**); - int (*expired)(sqlite3_stmt*); - int (*finalize)(sqlite3_stmt*pStmt); - void (*free)(void*); - void (*free_table)(char**result); - int (*get_autocommit)(sqlite3*); - void * (*get_auxdata)(sqlite3_context*,int); - int (*get_table)(sqlite3*,const char*,char***,int*,int*,char**); - int (*global_recover)(void); - void (*interruptx)(sqlite3*); - sqlite_int64 (*last_insert_rowid)(sqlite3*); - const char * (*libversion)(void); - int (*libversion_number)(void); - void *(*malloc)(int); - char * (*mprintf)(const char*,...); - int (*open)(const char*,sqlite3**); - int (*open16)(const void*,sqlite3**); - int (*prepare)(sqlite3*,const char*,int,sqlite3_stmt**,const char**); - int (*prepare16)(sqlite3*,const void*,int,sqlite3_stmt**,const void**); - void * (*profile)(sqlite3*,void(*)(void*,const char*,sqlite_uint64),void*); - void (*progress_handler)(sqlite3*,int,int(*)(void*),void*); - void *(*realloc)(void*,int); - int (*reset)(sqlite3_stmt*pStmt); - void (*result_blob)(sqlite3_context*,const void*,int,void(*)(void*)); - void (*result_double)(sqlite3_context*,double); - void (*result_error)(sqlite3_context*,const char*,int); - void (*result_error16)(sqlite3_context*,const void*,int); - void (*result_int)(sqlite3_context*,int); - void (*result_int64)(sqlite3_context*,sqlite_int64); - void (*result_null)(sqlite3_context*); - void (*result_text)(sqlite3_context*,const char*,int,void(*)(void*)); - void (*result_text16)(sqlite3_context*,const void*,int,void(*)(void*)); - void (*result_text16be)(sqlite3_context*,const void*,int,void(*)(void*)); - void (*result_text16le)(sqlite3_context*,const void*,int,void(*)(void*)); - void (*result_value)(sqlite3_context*,sqlite3_value*); - void * (*rollback_hook)(sqlite3*,void(*)(void*),void*); - int (*set_authorizer)(sqlite3*,int(*)(void*,int,const char*,const char*, - const char*,const char*),void*); - void (*set_auxdata)(sqlite3_context*,int,void*,void (*)(void*)); - char * (*snprintf)(int,char*,const char*,...); - int (*step)(sqlite3_stmt*); - int (*table_column_metadata)(sqlite3*,const char*,const char*,const char*, - char const**,char const**,int*,int*,int*); - void (*thread_cleanup)(void); - int (*total_changes)(sqlite3*); - void * (*trace)(sqlite3*,void(*xTrace)(void*,const char*),void*); - int (*transfer_bindings)(sqlite3_stmt*,sqlite3_stmt*); - void * (*update_hook)(sqlite3*,void(*)(void*,int ,char const*,char const*, - sqlite_int64),void*); - void * (*user_data)(sqlite3_context*); - const void * (*value_blob)(sqlite3_value*); - int (*value_bytes)(sqlite3_value*); - int (*value_bytes16)(sqlite3_value*); - double (*value_double)(sqlite3_value*); - int (*value_int)(sqlite3_value*); - sqlite_int64 (*value_int64)(sqlite3_value*); - int (*value_numeric_type)(sqlite3_value*); - const unsigned char * (*value_text)(sqlite3_value*); - const void * (*value_text16)(sqlite3_value*); - const void * (*value_text16be)(sqlite3_value*); - const void * (*value_text16le)(sqlite3_value*); - int (*value_type)(sqlite3_value*); - char *(*vmprintf)(const char*,va_list); - /* Added ??? */ - int (*overload_function)(sqlite3*, const char *zFuncName, int nArg); - /* Added by 3.3.13 */ - int (*prepare_v2)(sqlite3*,const char*,int,sqlite3_stmt**,const char**); - int (*prepare16_v2)(sqlite3*,const void*,int,sqlite3_stmt**,const void**); - int (*clear_bindings)(sqlite3_stmt*); - /* Added by 3.4.1 */ - int (*create_module_v2)(sqlite3*,const char*,const sqlite3_module*,void*, - void (*xDestroy)(void *)); - /* Added by 3.5.0 */ - int (*bind_zeroblob)(sqlite3_stmt*,int,int); - int (*blob_bytes)(sqlite3_blob*); - int (*blob_close)(sqlite3_blob*); - int (*blob_open)(sqlite3*,const char*,const char*,const char*,sqlite3_int64, - int,sqlite3_blob**); - int (*blob_read)(sqlite3_blob*,void*,int,int); - int (*blob_write)(sqlite3_blob*,const void*,int,int); - int (*create_collation_v2)(sqlite3*,const char*,int,void*, - int(*)(void*,int,const void*,int,const void*), - void(*)(void*)); - int (*file_control)(sqlite3*,const char*,int,void*); - sqlite3_int64 (*memory_highwater)(int); - sqlite3_int64 (*memory_used)(void); - sqlite3_mutex *(*mutex_alloc)(int); - void (*mutex_enter)(sqlite3_mutex*); - void (*mutex_free)(sqlite3_mutex*); - void (*mutex_leave)(sqlite3_mutex*); - int (*mutex_try)(sqlite3_mutex*); - int (*open_v2)(const char*,sqlite3**,int,const char*); - int (*release_memory)(int); - void (*result_error_nomem)(sqlite3_context*); - void (*result_error_toobig)(sqlite3_context*); - int (*sleep)(int); - void (*soft_heap_limit)(int); - sqlite3_vfs *(*vfs_find)(const char*); - int (*vfs_register)(sqlite3_vfs*,int); - int (*vfs_unregister)(sqlite3_vfs*); - int (*xthreadsafe)(void); - void (*result_zeroblob)(sqlite3_context*,int); - void (*result_error_code)(sqlite3_context*,int); - int (*test_control)(int, ...); - void (*randomness)(int,void*); - sqlite3 *(*context_db_handle)(sqlite3_context*); - int (*extended_result_codes)(sqlite3*,int); - int (*limit)(sqlite3*,int,int); - sqlite3_stmt *(*next_stmt)(sqlite3*,sqlite3_stmt*); - const char *(*sql)(sqlite3_stmt*); - int (*status)(int,int*,int*,int); - int (*backup_finish)(sqlite3_backup*); - sqlite3_backup *(*backup_init)(sqlite3*,const char*,sqlite3*,const char*); - int (*backup_pagecount)(sqlite3_backup*); - int (*backup_remaining)(sqlite3_backup*); - int (*backup_step)(sqlite3_backup*,int); - const char *(*compileoption_get)(int); - int (*compileoption_used)(const char*); - int (*create_function_v2)(sqlite3*,const char*,int,int,void*, - void (*xFunc)(sqlite3_context*,int,sqlite3_value**), - void (*xStep)(sqlite3_context*,int,sqlite3_value**), - void (*xFinal)(sqlite3_context*), - void(*xDestroy)(void*)); - int (*db_config)(sqlite3*,int,...); - sqlite3_mutex *(*db_mutex)(sqlite3*); - int (*db_status)(sqlite3*,int,int*,int*,int); - int (*extended_errcode)(sqlite3*); - void (*log)(int,const char*,...); - sqlite3_int64 (*soft_heap_limit64)(sqlite3_int64); - const char *(*sourceid)(void); - int (*stmt_status)(sqlite3_stmt*,int,int); - int (*strnicmp)(const char*,const char*,int); - int (*unlock_notify)(sqlite3*,void(*)(void**,int),void*); - int (*wal_autocheckpoint)(sqlite3*,int); - int (*wal_checkpoint)(sqlite3*,const char*); - void *(*wal_hook)(sqlite3*,int(*)(void*,sqlite3*,const char*,int),void*); - int (*blob_reopen)(sqlite3_blob*,sqlite3_int64); - int (*vtab_config)(sqlite3*,int op,...); - int (*vtab_on_conflict)(sqlite3*); - /* Version 3.7.16 and later */ - int (*close_v2)(sqlite3*); - const char *(*db_filename)(sqlite3*,const char*); - int (*db_readonly)(sqlite3*,const char*); - int (*db_release_memory)(sqlite3*); - const char *(*errstr)(int); - int (*stmt_busy)(sqlite3_stmt*); - int (*stmt_readonly)(sqlite3_stmt*); - int (*stricmp)(const char*,const char*); - int (*uri_boolean)(const char*,const char*,int); - sqlite3_int64 (*uri_int64)(const char*,const char*,sqlite3_int64); - const char *(*uri_parameter)(const char*,const char*); - char *(*vsnprintf)(int,char*,const char*,va_list); - int (*wal_checkpoint_v2)(sqlite3*,const char*,int,int*,int*); - /* Version 3.8.7 and later */ - int (*auto_extension)(void(*)(void)); - int (*bind_blob64)(sqlite3_stmt*,int,const void*,sqlite3_uint64, - void(*)(void*)); - int (*bind_text64)(sqlite3_stmt*,int,const char*,sqlite3_uint64, - void(*)(void*),unsigned char); - int (*cancel_auto_extension)(void(*)(void)); - int (*load_extension)(sqlite3*,const char*,const char*,char**); - void *(*malloc64)(sqlite3_uint64); - sqlite3_uint64 (*msize)(void*); - void *(*realloc64)(void*,sqlite3_uint64); - void (*reset_auto_extension)(void); - void (*result_blob64)(sqlite3_context*,const void*,sqlite3_uint64, - void(*)(void*)); - void (*result_text64)(sqlite3_context*,const char*,sqlite3_uint64, - void(*)(void*), unsigned char); - int (*strglob)(const char*,const char*); - /* Version 3.8.11 and later */ - sqlite3_value *(*value_dup)(const sqlite3_value*); - void (*value_free)(sqlite3_value*); - int (*result_zeroblob64)(sqlite3_context*,sqlite3_uint64); - int (*bind_zeroblob64)(sqlite3_stmt*, int, sqlite3_uint64); -}; - -/* -** The following macros redefine the API routines so that they are -** redirected through the global sqlite3_api structure. -** -** This header file is also used by the loadext.c source file -** (part of the main SQLite library - not an extension) so that -** it can get access to the sqlite3_api_routines structure -** definition. But the main library does not want to redefine -** the API. So the redefinition macros are only valid if the -** SQLITE_CORE macros is undefined. -*/ -#ifndef SQLITE_CORE -#define sqlite3_aggregate_context sqlite3_api->aggregate_context -#ifndef SQLITE_OMIT_DEPRECATED -#define sqlite3_aggregate_count sqlite3_api->aggregate_count -#endif -#define sqlite3_bind_blob sqlite3_api->bind_blob -#define sqlite3_bind_double sqlite3_api->bind_double -#define sqlite3_bind_int sqlite3_api->bind_int -#define sqlite3_bind_int64 sqlite3_api->bind_int64 -#define sqlite3_bind_null sqlite3_api->bind_null -#define sqlite3_bind_parameter_count sqlite3_api->bind_parameter_count -#define sqlite3_bind_parameter_index sqlite3_api->bind_parameter_index -#define sqlite3_bind_parameter_name sqlite3_api->bind_parameter_name -#define sqlite3_bind_text sqlite3_api->bind_text -#define sqlite3_bind_text16 sqlite3_api->bind_text16 -#define sqlite3_bind_value sqlite3_api->bind_value -#define sqlite3_busy_handler sqlite3_api->busy_handler -#define sqlite3_busy_timeout sqlite3_api->busy_timeout -#define sqlite3_changes sqlite3_api->changes -#define sqlite3_close sqlite3_api->close -#define sqlite3_collation_needed sqlite3_api->collation_needed -#define sqlite3_collation_needed16 sqlite3_api->collation_needed16 -#define sqlite3_column_blob sqlite3_api->column_blob -#define sqlite3_column_bytes sqlite3_api->column_bytes -#define sqlite3_column_bytes16 sqlite3_api->column_bytes16 -#define sqlite3_column_count sqlite3_api->column_count -#define sqlite3_column_database_name sqlite3_api->column_database_name -#define sqlite3_column_database_name16 sqlite3_api->column_database_name16 -#define sqlite3_column_decltype sqlite3_api->column_decltype -#define sqlite3_column_decltype16 sqlite3_api->column_decltype16 -#define sqlite3_column_double sqlite3_api->column_double -#define sqlite3_column_int sqlite3_api->column_int -#define sqlite3_column_int64 sqlite3_api->column_int64 -#define sqlite3_column_name sqlite3_api->column_name -#define sqlite3_column_name16 sqlite3_api->column_name16 -#define sqlite3_column_origin_name sqlite3_api->column_origin_name -#define sqlite3_column_origin_name16 sqlite3_api->column_origin_name16 -#define sqlite3_column_table_name sqlite3_api->column_table_name -#define sqlite3_column_table_name16 sqlite3_api->column_table_name16 -#define sqlite3_column_text sqlite3_api->column_text -#define sqlite3_column_text16 sqlite3_api->column_text16 -#define sqlite3_column_type sqlite3_api->column_type -#define sqlite3_column_value sqlite3_api->column_value -#define sqlite3_commit_hook sqlite3_api->commit_hook -#define sqlite3_complete sqlite3_api->complete -#define sqlite3_complete16 sqlite3_api->complete16 -#define sqlite3_create_collation sqlite3_api->create_collation -#define sqlite3_create_collation16 sqlite3_api->create_collation16 -#define sqlite3_create_function sqlite3_api->create_function -#define sqlite3_create_function16 sqlite3_api->create_function16 -#define sqlite3_create_module sqlite3_api->create_module -#define sqlite3_create_module_v2 sqlite3_api->create_module_v2 -#define sqlite3_data_count sqlite3_api->data_count -#define sqlite3_db_handle sqlite3_api->db_handle -#define sqlite3_declare_vtab sqlite3_api->declare_vtab -#define sqlite3_enable_shared_cache sqlite3_api->enable_shared_cache -#define sqlite3_errcode sqlite3_api->errcode -#define sqlite3_errmsg sqlite3_api->errmsg -#define sqlite3_errmsg16 sqlite3_api->errmsg16 -#define sqlite3_exec sqlite3_api->exec -#ifndef SQLITE_OMIT_DEPRECATED -#define sqlite3_expired sqlite3_api->expired -#endif -#define sqlite3_finalize sqlite3_api->finalize -#define sqlite3_free sqlite3_api->free -#define sqlite3_free_table sqlite3_api->free_table -#define sqlite3_get_autocommit sqlite3_api->get_autocommit -#define sqlite3_get_auxdata sqlite3_api->get_auxdata -#define sqlite3_get_table sqlite3_api->get_table -#ifndef SQLITE_OMIT_DEPRECATED -#define sqlite3_global_recover sqlite3_api->global_recover -#endif -#define sqlite3_interrupt sqlite3_api->interruptx -#define sqlite3_last_insert_rowid sqlite3_api->last_insert_rowid -#define sqlite3_libversion sqlite3_api->libversion -#define sqlite3_libversion_number sqlite3_api->libversion_number -#define sqlite3_malloc sqlite3_api->malloc -#define sqlite3_mprintf sqlite3_api->mprintf -#define sqlite3_open sqlite3_api->open -#define sqlite3_open16 sqlite3_api->open16 -#define sqlite3_prepare sqlite3_api->prepare -#define sqlite3_prepare16 sqlite3_api->prepare16 -#define sqlite3_prepare_v2 sqlite3_api->prepare_v2 -#define sqlite3_prepare16_v2 sqlite3_api->prepare16_v2 -#define sqlite3_profile sqlite3_api->profile -#define sqlite3_progress_handler sqlite3_api->progress_handler -#define sqlite3_realloc sqlite3_api->realloc -#define sqlite3_reset sqlite3_api->reset -#define sqlite3_result_blob sqlite3_api->result_blob -#define sqlite3_result_double sqlite3_api->result_double -#define sqlite3_result_error sqlite3_api->result_error -#define sqlite3_result_error16 sqlite3_api->result_error16 -#define sqlite3_result_int sqlite3_api->result_int -#define sqlite3_result_int64 sqlite3_api->result_int64 -#define sqlite3_result_null sqlite3_api->result_null -#define sqlite3_result_text sqlite3_api->result_text -#define sqlite3_result_text16 sqlite3_api->result_text16 -#define sqlite3_result_text16be sqlite3_api->result_text16be -#define sqlite3_result_text16le sqlite3_api->result_text16le -#define sqlite3_result_value sqlite3_api->result_value -#define sqlite3_rollback_hook sqlite3_api->rollback_hook -#define sqlite3_set_authorizer sqlite3_api->set_authorizer -#define sqlite3_set_auxdata sqlite3_api->set_auxdata -#define sqlite3_snprintf sqlite3_api->snprintf -#define sqlite3_step sqlite3_api->step -#define sqlite3_table_column_metadata sqlite3_api->table_column_metadata -#define sqlite3_thread_cleanup sqlite3_api->thread_cleanup -#define sqlite3_total_changes sqlite3_api->total_changes -#define sqlite3_trace sqlite3_api->trace -#ifndef SQLITE_OMIT_DEPRECATED -#define sqlite3_transfer_bindings sqlite3_api->transfer_bindings -#endif -#define sqlite3_update_hook sqlite3_api->update_hook -#define sqlite3_user_data sqlite3_api->user_data -#define sqlite3_value_blob sqlite3_api->value_blob -#define sqlite3_value_bytes sqlite3_api->value_bytes -#define sqlite3_value_bytes16 sqlite3_api->value_bytes16 -#define sqlite3_value_double sqlite3_api->value_double -#define sqlite3_value_int sqlite3_api->value_int -#define sqlite3_value_int64 sqlite3_api->value_int64 -#define sqlite3_value_numeric_type sqlite3_api->value_numeric_type -#define sqlite3_value_text sqlite3_api->value_text -#define sqlite3_value_text16 sqlite3_api->value_text16 -#define sqlite3_value_text16be sqlite3_api->value_text16be -#define sqlite3_value_text16le sqlite3_api->value_text16le -#define sqlite3_value_type sqlite3_api->value_type -#define sqlite3_vmprintf sqlite3_api->vmprintf -#define sqlite3_overload_function sqlite3_api->overload_function -#define sqlite3_prepare_v2 sqlite3_api->prepare_v2 -#define sqlite3_prepare16_v2 sqlite3_api->prepare16_v2 -#define sqlite3_clear_bindings sqlite3_api->clear_bindings -#define sqlite3_bind_zeroblob sqlite3_api->bind_zeroblob -#define sqlite3_blob_bytes sqlite3_api->blob_bytes -#define sqlite3_blob_close sqlite3_api->blob_close -#define sqlite3_blob_open sqlite3_api->blob_open -#define sqlite3_blob_read sqlite3_api->blob_read -#define sqlite3_blob_write sqlite3_api->blob_write -#define sqlite3_create_collation_v2 sqlite3_api->create_collation_v2 -#define sqlite3_file_control sqlite3_api->file_control -#define sqlite3_memory_highwater sqlite3_api->memory_highwater -#define sqlite3_memory_used sqlite3_api->memory_used -#define sqlite3_mutex_alloc sqlite3_api->mutex_alloc -#define sqlite3_mutex_enter sqlite3_api->mutex_enter -#define sqlite3_mutex_free sqlite3_api->mutex_free -#define sqlite3_mutex_leave sqlite3_api->mutex_leave -#define sqlite3_mutex_try sqlite3_api->mutex_try -#define sqlite3_open_v2 sqlite3_api->open_v2 -#define sqlite3_release_memory sqlite3_api->release_memory -#define sqlite3_result_error_nomem sqlite3_api->result_error_nomem -#define sqlite3_result_error_toobig sqlite3_api->result_error_toobig -#define sqlite3_sleep sqlite3_api->sleep -#define sqlite3_soft_heap_limit sqlite3_api->soft_heap_limit -#define sqlite3_vfs_find sqlite3_api->vfs_find -#define sqlite3_vfs_register sqlite3_api->vfs_register -#define sqlite3_vfs_unregister sqlite3_api->vfs_unregister -#define sqlite3_threadsafe sqlite3_api->xthreadsafe -#define sqlite3_result_zeroblob sqlite3_api->result_zeroblob -#define sqlite3_result_error_code sqlite3_api->result_error_code -#define sqlite3_test_control sqlite3_api->test_control -#define sqlite3_randomness sqlite3_api->randomness -#define sqlite3_context_db_handle sqlite3_api->context_db_handle -#define sqlite3_extended_result_codes sqlite3_api->extended_result_codes -#define sqlite3_limit sqlite3_api->limit -#define sqlite3_next_stmt sqlite3_api->next_stmt -#define sqlite3_sql sqlite3_api->sql -#define sqlite3_status sqlite3_api->status -#define sqlite3_backup_finish sqlite3_api->backup_finish -#define sqlite3_backup_init sqlite3_api->backup_init -#define sqlite3_backup_pagecount sqlite3_api->backup_pagecount -#define sqlite3_backup_remaining sqlite3_api->backup_remaining -#define sqlite3_backup_step sqlite3_api->backup_step -#define sqlite3_compileoption_get sqlite3_api->compileoption_get -#define sqlite3_compileoption_used sqlite3_api->compileoption_used -#define sqlite3_create_function_v2 sqlite3_api->create_function_v2 -#define sqlite3_db_config sqlite3_api->db_config -#define sqlite3_db_mutex sqlite3_api->db_mutex -#define sqlite3_db_status sqlite3_api->db_status -#define sqlite3_extended_errcode sqlite3_api->extended_errcode -#define sqlite3_log sqlite3_api->log -#define sqlite3_soft_heap_limit64 sqlite3_api->soft_heap_limit64 -#define sqlite3_sourceid sqlite3_api->sourceid -#define sqlite3_stmt_status sqlite3_api->stmt_status -#define sqlite3_strnicmp sqlite3_api->strnicmp -#define sqlite3_unlock_notify sqlite3_api->unlock_notify -#define sqlite3_wal_autocheckpoint sqlite3_api->wal_autocheckpoint -#define sqlite3_wal_checkpoint sqlite3_api->wal_checkpoint -#define sqlite3_wal_hook sqlite3_api->wal_hook -#define sqlite3_blob_reopen sqlite3_api->blob_reopen -#define sqlite3_vtab_config sqlite3_api->vtab_config -#define sqlite3_vtab_on_conflict sqlite3_api->vtab_on_conflict -/* Version 3.7.16 and later */ -#define sqlite3_close_v2 sqlite3_api->close_v2 -#define sqlite3_db_filename sqlite3_api->db_filename -#define sqlite3_db_readonly sqlite3_api->db_readonly -#define sqlite3_db_release_memory sqlite3_api->db_release_memory -#define sqlite3_errstr sqlite3_api->errstr -#define sqlite3_stmt_busy sqlite3_api->stmt_busy -#define sqlite3_stmt_readonly sqlite3_api->stmt_readonly -#define sqlite3_stricmp sqlite3_api->stricmp -#define sqlite3_uri_boolean sqlite3_api->uri_boolean -#define sqlite3_uri_int64 sqlite3_api->uri_int64 -#define sqlite3_uri_parameter sqlite3_api->uri_parameter -#define sqlite3_uri_vsnprintf sqlite3_api->vsnprintf -#define sqlite3_wal_checkpoint_v2 sqlite3_api->wal_checkpoint_v2 -/* Version 3.8.7 and later */ -#define sqlite3_auto_extension sqlite3_api->auto_extension -#define sqlite3_bind_blob64 sqlite3_api->bind_blob64 -#define sqlite3_bind_text64 sqlite3_api->bind_text64 -#define sqlite3_cancel_auto_extension sqlite3_api->cancel_auto_extension -#define sqlite3_load_extension sqlite3_api->load_extension -#define sqlite3_malloc64 sqlite3_api->malloc64 -#define sqlite3_msize sqlite3_api->msize -#define sqlite3_realloc64 sqlite3_api->realloc64 -#define sqlite3_reset_auto_extension sqlite3_api->reset_auto_extension -#define sqlite3_result_blob64 sqlite3_api->result_blob64 -#define sqlite3_result_text64 sqlite3_api->result_text64 -#define sqlite3_strglob sqlite3_api->strglob -/* Version 3.8.11 and later */ -#define sqlite3_value_dup sqlite3_api->value_dup -#define sqlite3_value_free sqlite3_api->value_free -#define sqlite3_result_zeroblob64 sqlite3_api->result_zeroblob64 -#define sqlite3_bind_zeroblob64 sqlite3_api->bind_zeroblob64 -#endif /* SQLITE_CORE */ - -#ifndef SQLITE_CORE - /* This case when the file really is being compiled as a loadable - ** extension */ -# define SQLITE_EXTENSION_INIT1 const sqlite3_api_routines *sqlite3_api=0; -# define SQLITE_EXTENSION_INIT2(v) sqlite3_api=v; -# define SQLITE_EXTENSION_INIT3 \ - extern const sqlite3_api_routines *sqlite3_api; -#else - /* This case when the file is being statically linked into the - ** application */ -# define SQLITE_EXTENSION_INIT1 /*no-op*/ -# define SQLITE_EXTENSION_INIT2(v) (void)v; /* unused parameter */ -# define SQLITE_EXTENSION_INIT3 /*no-op*/ -#endif - -#endif /* _SQLITE3EXT_H_ */ \ No newline at end of file