···33 |
44 2 | def foo():
55 | ^ the Osprey rule language is a simplified subset of python
66- | not all Python is valid osprey rule syntax
66+ | not all Python is valid osprey rule syntax
···88--> main.sml:3:0
99 |
1010 3 | _Bar = 2
1111- | ^ this variable is not used anywhere, and thus has no effect. either delete or comment it out
1111+ | ^ this variable is not used anywhere, and thus has no effect. either delete or comment it out
···22--> main.sml:1:11
33 |
44 1 | Foo: str = JsonData()
55- | ^ the following keyword arguments were not provided: [`path`]
55+ | ^ the following keyword arguments were not provided: [`path`]
···22--> main.sml:1:11
33 |
44 1 | Foo: str = JsonData()
55- | ^ the following keyword arguments were not provided: [`path`]
55+ | ^ the following keyword arguments were not provided: [`path`]
···55 | ^ this must must not be used as a statement within the source root, e.g.:
66 | - `Foo = HasResult(...)` is OK
77 | - `SomeOtherFunction(argument=HasResult(...))` is OK
88- | - `HasResult(...)` is not OK
88+ | - `HasResult(...)` is not OK
···44 1 | Qux = HasNoResult()
55 | ^ this cannot appear to the right of a `=`, e.g.:
66 | - `HasNoResult(...)` is OK
77- | - `Qux = HasNoResult(...)` is not OK
77+ | - `Qux = HasNoResult(...)` is not OK
···55 | ^ this function has no result, and thus its result cannot be used:
66 | - `HasNoResult(...)` is OK, however:
77 | - `Foo = Bar + HasNoResult(...)` and
88- | - `Bar(qux=HasNoResult(...))` are not OK
88+ | - `Bar(qux=HasNoResult(...))` are not OK
···22--> main.sml:1:5
33 |
44 1 | Foo: List = JsonData(path='$.foo')
55- | ^ `List` is generic, try `List[T]` where `T` is a type like `str` or `int`
55+ | ^ `List` is generic, try `List[T]` where `T` is a type like `str` or `int`
···22--> main.sml:1:5
33 |
44 1 | Foo: Optional = JsonData(path='$.foo')
55- | ^ `Optional` is generic, try `Optional[T]` where `T` is a type like `str` or `int`
55+ | ^ `Optional` is generic, try `Optional[T]` where `T` is a type like `str` or `int`
···22--> main.sml:1:5
33 |
44 1 | Foo: Union = JsonData(path='$.foo')
55- | ^ `Union` is generic, try `Union[T]` where `T` is a type like `str` or `int`
55+ | ^ `Union` is generic, try `Union[T]` where `T` is a type like `str` or `int`
···22--> main.sml:1:6
33 |
44 1 | Foo = JsonData()
55- | ^ the following keyword arguments were not provided: [`path`]
55+ | ^ the following keyword arguments were not provided: [`path`]
···88--> config.yaml:4:15
99 |
1010 4 | int_field: hello
1111- | ^ value is not a valid integer (type=type_error.integer)
1111+ | ^ value is not a valid integer (type=type_error.integer)
···88--> config.yaml:4:0
99 |
1010 4 | unknown_top_level_key: "hello"
1111- | ^ make sure there is a config model registered for the `unknown_top_level_key` key
1111+ | ^ make sure there is a config model registered for the `unknown_top_level_key` key
+46-3
osprey_worker/src/osprey/engine/conftest.py
···55from contextlib import contextmanager
66from datetime import datetime
77from textwrap import dedent
88-from typing import TYPE_CHECKING, Callable, ContextManager, Dict, Iterator, Optional, Set, Type, TypeVar, Union
88+from typing import (
99+ TYPE_CHECKING,
1010+ Any,
1111+ Callable,
1212+ ContextManager,
1313+ Dict,
1414+ Generator,
1515+ Iterator,
1616+ Optional,
1717+ Set,
1818+ Type,
1919+ TypeVar,
2020+ Union,
2121+)
9221023import gevent.pool
1124import pytest
···2134from osprey.engine.executor.udf_execution_helpers import UDFHelpers
2235from osprey.engine.stdlib import get_config_registry
2336from osprey.engine.udf.registry import UDFRegistry
3737+from osprey.worker.lib.singletons import CONFIG
2438from typing_extensions import Protocol
25392640if TYPE_CHECKING:
2741 from _pytest.config import Config
4242+ from _pytest.config.argparsing import Parser
2843 from _pytest.fixtures import FixtureRequest
29444545+4646+@pytest.fixture(autouse=True) # autouse = True means automatically use for each test
4747+def config_setup() -> Generator[Any, None, None]:
4848+ CONFIG.instance().configure_from_env()
4949+ # yield is used here to basically split this function into two parts:
5050+ # all code before `yield` is the setup code (run before each test), and
5151+ # all code after `yield` is the teardown code (run after each test)
5252+ yield # this line is where the testing happens
5353+ # teardown code
5454+ CONFIG.instance().unconfigure_for_tests()
5555+5656+3057SourcesDict = Union[Sources, str, Dict[str, str]]
3158CheckOutputFunction = Callable[[str], bool]
3259···98125 raise Exception('Cannot call check_output more than once per test run.')
99126100127 did_call = True
101101- if request.config.option.write_outputs:
128128+ if getattr(request.config.option, 'write_outputs', False):
102129 put_file_contents(output)
103130 return True
104131 else:
···108135 f'\n\tExpected test output: {get_output_file()}'
109136 )
110137 except AssertionError as e:
111111- if request.config.option.write_first_failed_output:
138138+ if getattr(request.config.option, 'write_first_failed_output', False):
112139 request.config.option.write_first_failed_output = False
113140 put_file_contents(output)
114141 raise AssertionError(
···266293 action_time: Optional[datetime] = None,
267294 ) -> ExecutionResult:
268295 sources = into_sources(sources_dict)
296296+297297+ # Ensure standard AST validators are registered before validation/execution
298298+ try:
299299+ from osprey.worker.adaptor.plugin_manager import bootstrap_ast_validators
300300+301301+ bootstrap_ast_validators()
302302+ except Exception:
303303+ # If plugin bootstrap is unavailable in this context, continue; tests using run_validation will supply validators
304304+ pass
305305+269306 config_validator = get_config_registry().get_validator()
270307 validator_registry = ValidatorRegistry.get_instance().instance_with_additional_validators(config_validator)
271308 try:
···368405 assert check_output(e.value.rendered())
369406370407 return check_failure
408408+409409+410410+def pytest_addoption(parser: 'Parser') -> None:
411411+ parser.addoption(
412412+ '--write-outputs', action='store_true', help='write checked validator outputs instead of checking them'
413413+ )
371414372415373416def pytest_configure(config: 'Config') -> None:
osprey_worker/src/osprey/engine/query_language/tests/test_ast_druid_translator/test_parses_query_with_unary_operator[not A == B or (C == D and F [gt]= 2)].txt
···1010--> (2) main.sml:1:16
1111 |
1212 1 | Query = True == False
1313- | ^ this is a literal value
1313+ | ^ this is a literal value
+1-1
osprey_worker/src/osprey/engine/query_language/tests/test_parse_query_to_validated_ast/test_binary_comparison_must_contain_identifier[True and False and True].txt
···1414--> (3) main.sml:1:27
1515 |
1616 1 | Query = True and False and True
1717- | ^ this is a literal value
1717+ | ^ this is a literal value
+1-1
osprey_worker/src/osprey/engine/query_language/tests/test_parse_query_to_validated_ast/test_binary_comparison_must_contain_identifier[True and True].txt
···1010--> (2) main.sml:1:17
1111 |
1212 1 | Query = True and True
1313- | ^ this is a literal value
1313+ | ^ this is a literal value
···88--> main.sml:1:45
99 |
1010 1 | Query = A == B or (C == D and F >= 2); Foo = Bar
1111- | ^ `Bar` does not exist in any rule files. typo, perhaps?
1111+ | ^ `Bar` does not exist in any rule files. typo, perhaps?
···22--> main.sml:1:8
33 |
44 1 | Query = 1
55- | ^ you have provided a `Number`, try something like `ActionName != True`
55+ | ^ you have provided a `Number`, try something like `ActionName != True`
···1818--> (4) main.sml:3:0
1919 |
2020 3 | Bar = Entity(type="User", id=123)
2121- | ^ variable `Bar` with incompatible type `int` originally defined here
2121+ | ^ variable `Bar` with incompatible type `int` originally defined here
+1-1
osprey_worker/src/osprey/engine/query_language/tests/test_parse_query_to_validated_ast/test_unary_operator_query_validation_failure[UserName == 1 and not 2].txt
···1616--> (1) main.sml:1:30
1717 |
1818 1 | Query = UserName == 1 and not 2
1919- | ^ this is a literal value
1919+ | ^ this is a literal value
···22--> main.sml:1:8
33 |
44 1 | Query = UserNaem == 'jake'
55- | ^ unknown name `UserNaem`, did you mean `UserName`?
55+ | ^ unknown name `UserNaem`, did you mean `UserName`?
···22--> main.sml:1:8
33 |
44 1 | Query = KLJHaflLasfkL == 'jake'
55- | ^ `KLJHaflLasfkL` does not exist in any rule files. typo, perhaps?
55+ | ^ `KLJHaflLasfkL` does not exist in any rule files. typo, perhaps?
···22--> main.sml:1:16
33 |
44 1 | RegexMatch(item='Jake', regex='^foo$')
55- | ^ argument `item` must be a variable
55+ | ^ argument `item` must be a variable
···22--> main.sml:1:25
33 |
44 1 | RegexMatch(item=A, regex='[')
55- | ^ error: unterminated character set at position 0
55+ | ^ error: unterminated character set at position 0
···11-error:
11+error:
22 experiment bucket size precision is too high,
33 the precision can be at most to the hundredth decimal place
44-55---> main.sml:4:54
44+55+--> main.sml:3:65
66 |
77- 4 | entity=E1, buckets=['control', 'b'], bucket_sizes=[10, 2.512], version=2,
88- | ^
77+ 3 | A = Experiment(entity=E1, buckets=['control', 'b'], bucket_sizes=[10, 2.512], version=2, revision=0)
88+ | ^
···11-error:
11+error:
22 75 is over the
33 current max of 50.
44 Either reduce the bucket size or reduce the number of buckets
55-66---> main.sml:4:54
55+66+--> main.sml:3:65
77 |
88- 4 | entity=E1, buckets=['control', 'b'], bucket_sizes=[75, 25], version=1,
99- | ^
1010- | max percentage size of each bucket is 100/(# of buckets) rounded
1111- | down to the nearest hundreth decimal place
1212- |
88+ 3 | A = Experiment(entity=E1, buckets=['control', 'b'], bucket_sizes=[75, 25], version=1, revision=1)
99+ | ^
1010+ | max percentage size of each bucket is 100/(# of buckets) rounded
1111+ | down to the nearest hundreth decimal place
1212+ |
···11-error:
11+error:
22 33.34 is over the
33 current max of 33.33.
44 Either reduce the bucket size or reduce the number of buckets
55-66---> main.sml:4:59
55+66+--> main.sml:3:70
77 |
88- 4 | entity=E1, buckets=['control', 'b', 'c'], bucket_sizes=[33.34, 33.33, 33.33], version=1,
99- | ^
1010- | max percentage size of each bucket is 100/(# of buckets) rounded
1111- | down to the nearest hundreth decimal place
1212- |
88+ 3 | A = Experiment(entity=E1, buckets=['control', 'b', 'c'], bucket_sizes=[33.34, 33.33, 33.33], version=1, revision=1)
99+ | ^
1010+ | max percentage size of each bucket is 100/(# of buckets) rounded
1111+ | down to the nearest hundreth decimal place
1212+ |
···114 errors occurred while validating:
22-[1/4] error:
22+[1/4] error:
33 16.67 is over the
44 current max of 16.66.
55 Either reduce the bucket size or reduce the number of buckets
66-77---> main.sml:4:74
66+77+--> main.sml:3:85
88 |
99- 4 | entity=E1, buckets=['control', 'b', 'c', 'd', 'e', 'f'], bucket_sizes=[16.66, 16.67, 16.67, 16.67, 16.66, 16.67], version=1,
1010- | ^
1111- | max percentage size of each bucket is 100/(# of buckets) rounded
1212- | down to the nearest hundreth decimal place
1313- |
1414-[2/4] error:
99+ 3 | A = Experiment(entity=E1, buckets=['control', 'b', 'c', 'd', 'e', 'f'], bucket_sizes=[16.66, 16.67, 16.67, 16.67, 16.66, 16.67], version=1, revision=1)
1010+ | ^
1111+ | max percentage size of each bucket is 100/(# of buckets) rounded
1212+ | down to the nearest hundreth decimal place
1313+ |
1414+[2/4] error:
1515 16.67 is over the
1616 current max of 16.66.
1717 Either reduce the bucket size or reduce the number of buckets
1818-1919---> main.sml:4:74
1818+1919+--> main.sml:3:85
2020 |
2121- 4 | entity=E1, buckets=['control', 'b', 'c', 'd', 'e', 'f'], bucket_sizes=[16.66, 16.67, 16.67, 16.67, 16.66, 16.67], version=1,
2222- | ^
2323- | max percentage size of each bucket is 100/(# of buckets) rounded
2424- | down to the nearest hundreth decimal place
2525- |
2626-[3/4] error:
2121+ 3 | A = Experiment(entity=E1, buckets=['control', 'b', 'c', 'd', 'e', 'f'], bucket_sizes=[16.66, 16.67, 16.67, 16.67, 16.66, 16.67], version=1, revision=1)
2222+ | ^
2323+ | max percentage size of each bucket is 100/(# of buckets) rounded
2424+ | down to the nearest hundreth decimal place
2525+ |
2626+[3/4] error:
2727 16.67 is over the
2828 current max of 16.66.
2929 Either reduce the bucket size or reduce the number of buckets
3030-3131---> main.sml:4:74
3030+3131+--> main.sml:3:85
3232 |
3333- 4 | entity=E1, buckets=['control', 'b', 'c', 'd', 'e', 'f'], bucket_sizes=[16.66, 16.67, 16.67, 16.67, 16.66, 16.67], version=1,
3434- | ^
3535- | max percentage size of each bucket is 100/(# of buckets) rounded
3636- | down to the nearest hundreth decimal place
3737- |
3838-[4/4] error:
3333+ 3 | A = Experiment(entity=E1, buckets=['control', 'b', 'c', 'd', 'e', 'f'], bucket_sizes=[16.66, 16.67, 16.67, 16.67, 16.66, 16.67], version=1, revision=1)
3434+ | ^
3535+ | max percentage size of each bucket is 100/(# of buckets) rounded
3636+ | down to the nearest hundreth decimal place
3737+ |
3838+[4/4] error:
3939 16.67 is over the
4040 current max of 16.66.
4141 Either reduce the bucket size or reduce the number of buckets
4242-4343---> main.sml:4:74
4242+4343+--> main.sml:3:85
4444 |
4545- 4 | entity=E1, buckets=['control', 'b', 'c', 'd', 'e', 'f'], bucket_sizes=[16.66, 16.67, 16.67, 16.67, 16.66, 16.67], version=1,
4646- | ^
4747- | max percentage size of each bucket is 100/(# of buckets) rounded
4848- | down to the nearest hundreth decimal place
4949- |
4545+ 3 | A = Experiment(entity=E1, buckets=['control', 'b', 'c', 'd', 'e', 'f'], bucket_sizes=[16.66, 16.67, 16.67, 16.67, 16.66, 16.67], version=1, revision=1)
4646+ | ^
4747+ | max percentage size of each bucket is 100/(# of buckets) rounded
4848+ | down to the nearest hundreth decimal place
4949+ |
···66--> (1) config.yaml:1:1
77 |
88 1 | {"labels": {"my_label": {"valid_for": ["MyEntity"]}}}
99- | ^ add the label to the config here
99+ | ^ add the label to the config here
···22--> main.sml:1:20
33 |
44 1 | r = RandomInt(start=10,end=0)
55- | ^ the `start` value must be less than the `end` value
55+ | ^ the `start` value must be less than the `end` value
···22--> main.sml:1:20
33 |
44 1 | r = RandomInt(start=0,end=0)
55- | ^ the `start` value must be less than the `end` value
55+ | ^ the `start` value must be less than the `end` value
···33 |
44 3 | Require(rule=Foo)
55 | ^ you can only use literal strings, for example:
66- | `Require(rule="hello.sml")` or `Require(rule=f"actions/{ActionName}.sml")`
66+ | `Require(rule="hello.sml")` or `Require(rule=f"actions/{ActionName}.sml")`
···66--> (1) main.sml:3:14
77 |
88 3 | FooUnwrapped: str = ResolveOptional(optional_value=Foo)
99- | ^ expected `str` due to this
99+ | ^ expected `str` due to this
···44 3 | R1 = Rule(when_all=[True], description='Interpolated: {X}')
55 | ^ this string contains what looks like variable interpolation, but is not an f-string
66 | found: `X`
77- | consider prefixing with `f`
77+ | consider prefixing with `f`
···1717 |
1818 8 | _Local = Rule(when_all=[True], description='')
1919 | ^ this rule is being stored in the local variable `_Local`
2020- | rules must be in non-local features (eg that don't start with `_`)
2020+ | rules must be in non-local features (eg that don't start with `_`)
···22--> main.sml:3:0
33 |
44 3 | CallFoo = StringClean(s=Foo)
55- | ^ name 'CallFoo' is not marked as non-extractable
55+ | ^ name 'CallFoo' is not marked as non-extractable
···22--> main.sml:3:0
33 |
44 3 | CallFoo = StringClean(s=Foo)
55- | ^ name 'CallFoo' is not marked as non-extractable
55+ | ^ name 'CallFoo' is not marked as non-extractable
···88--> main.sml:3:18
99 |
1010 3 | FormattedFoo = f'{Foo}'
1111- | ^ name 'Foo' is marked as non-extractable
1111+ | ^ name 'Foo' is marked as non-extractable
···88--> main.sml:3:18
99 |
1010 3 | FormattedFoo = f'{Foo}'
1111- | ^ name 'Foo' is marked as non-extractable
1111+ | ^ name 'Foo' is marked as non-extractable
···22--> main.sml:3:0
33 |
44 3 | ListFoo = [Foo]
55- | ^ name 'ListFoo' is not marked as non-extractable
55+ | ^ name 'ListFoo' is not marked as non-extractable
···22--> main.sml:3:0
33 |
44 3 | ListFoo = [Foo]
55- | ^ name 'ListFoo' is not marked as non-extractable
55+ | ^ name 'ListFoo' is not marked as non-extractable
···22--> main.sml:1:68
33 |
44 1 | GetSnowflakeBucket(snowflake=119144447702335491,granularity_seconds=0)
55- | ^ granularity_seconds can not be less than 3600 (1 hour)
55+ | ^ granularity_seconds can not be less than 3600 (1 hour)
···22--> main.sml:1:76
33 |
44 1 | GetTimedeltaBucket(timedelta=TimeDelta(seconds=1136715),granularity_seconds=0)
55- | ^ granularity_seconds can not be less than 3600 (1 hour)
55+ | ^ granularity_seconds can not be less than 3600 (1 hour)
···22--> main.sml:1:60
33 |
44 1 | GetTimestampBucket(timestamp=1448476649,granularity_seconds=0)
55- | ^ granularity_seconds can not be less than 3600 (1 hour)
55+ | ^ granularity_seconds can not be less than 3600 (1 hour)
···11-generic arguments type must have at least one generic item: UDF `ArgumentDoesNotUseTypeUdf` generic arguments type `ArgumentDoesNotUseTypeArgs[~A]` has no generic items
11+generic arguments type must have at least one generic item: UDF `ArgumentDoesNotUseTypeUdf` generic arguments type `ArgumentDoesNotUseTypeArgs[~A]` has no generic items
···11-Osprey generics must inherit from `OspreyInvariantGeneric`: generic UDF type `ArgumentGenericWhenUdfIsNotUdf` does not inherit from `OspreyInvariantGeneric`
11+Osprey generics must inherit from `OspreyInvariantGeneric`: generic UDF type `ArgumentGenericWhenUdfIsNotUdf` does not inherit from `OspreyInvariantGeneric`
···11-Osprey generics must have exactly one parameter: generic UDF type `ArgumentHasDifferentTypeVarUdf` has 2 parameters
11+Osprey generics must have exactly one parameter: generic UDF type `ArgumentHasDifferentTypeVarUdf` has 2 parameters
···11-Osprey generics must inherit from `OspreyInvariantGeneric`: generic arguments type `ArgumentInheritsWrongGenericArgs` does not inherit from `OspreyInvariantGeneric`
11+Osprey generics must inherit from `OspreyInvariantGeneric`: generic arguments type `ArgumentInheritsWrongGenericArgs` does not inherit from `OspreyInvariantGeneric`
···11-cannot get type candidates for `Tuple[str, ...]`, is an unknown generic
11+cannot get type candidates for `Tuple[str, ...]`, is an unknown generic
···11-cannot get type candidates for `Tuple[str, ...]`, is an unknown generic
11+cannot get type candidates for `Tuple[str, ...]`, is an unknown generic
···11-generic UDF return type must have exactly one type parameter: UDF `DoesNotReturnGenericUdf` return type `None` is not generic
11+generic UDF return type must have exactly one type parameter: UDF `DoesNotReturnGenericUdf` return type `None` is not generic
···11-Osprey generics must inherit from `OspreyInvariantGeneric`: generic UDF type `InheritsWrongGenericsUdf` does not inherit from `OspreyInvariantGeneric`
11+Osprey generics must inherit from `OspreyInvariantGeneric`: generic UDF type `InheritsWrongGenericsUdf` does not inherit from `OspreyInvariantGeneric`
···11-Osprey generics must have exactly one parameter: generic UDF type `ReturnsDifferentTypeVarDirectUdf` has 2 parameters
11+Osprey generics must have exactly one parameter: generic UDF type `ReturnsDifferentTypeVarDirectUdf` has 2 parameters
-14
osprey_worker/src/osprey/worker/sinks/__init__.py
···11-# ruff: noqa: E402
22-"""TODO: move logic to another file
33-44-__init__.py often gets imported and run before other modules,
55-so it's dangerous to import other modules here, potentially prior to gevent patching
66-77-Therefore we patch at the top of this file -- it's generally fine to double patch
88-but not ideal as clients should be responsible for patching
99-"""
1010-1111-from osprey.worker.lib.patcher import patch_all
1212-1313-patch_all()
1414-151from enum import StrEnum, auto
162173