From b1afd99c9e4ba5f4fc3bb3ee3fbce5fe64cbc201 Mon Sep 17 00:00:00 2001 From: Jiajie Zhong Date: Sun, 26 Dec 2021 15:44:30 +0800 Subject: [PATCH] [python] Fix task relation wrong hashable function (#7628) * [python] Fix task relation wrong hashable function Currently our :class:`TaskRelation` have wrong __hash__ function, would mistake a object as same object. It failed our dependence and process definition. this patch fix it by adding `_KEY_ATTR` to overwrite TaskRelation's __eq__ and correct __hash__ func close: #7627 * Fix code format --- .../src/pydolphinscheduler/constants.py | 1 + .../src/pydolphinscheduler/core/task.py | 12 ++++- .../tests/core/test_task.py | 46 ++++++++++++++++++- 3 files changed, 56 insertions(+), 3 deletions(-) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/constants.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/constants.py index 940c74920..84becbcb4 100644 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/constants.py +++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/constants.py @@ -104,6 +104,7 @@ class Delimiter(str): DASH = "/" COLON = ":" UNDERSCORE = "_" + DIRECTION = "->" class Time(str): diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/task.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/task.py index 39cca9c0b..8a90efcd7 100644 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/task.py +++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/task.py @@ -21,6 +21,7 @@ import logging from typing import Dict, List, Optional, Sequence, Set, Tuple, Union from pydolphinscheduler.constants import ( + Delimiter, ProcessDefinitionDefault, TaskFlag, TaskPriority, @@ -37,6 +38,13 @@ from pydolphinscheduler.java_gateway import launch_gateway class TaskRelation(Base): """TaskRelation object, describe the relation of exactly two tasks.""" + # Add attr `_KEY_ATTR` to overwrite :func:`__eq__`, it is make set + # `Task.process_definition._task_relations` work correctly. + _KEY_ATTR = { + "pre_task_code", + "post_task_code", + } + _DEFINE_ATTR = { "pre_task_code", "post_task_code", @@ -61,7 +69,7 @@ class TaskRelation(Base): self.post_task_code = post_task_code def __hash__(self): - return hash(f"{self.post_task_code}, {self.post_task_code}") + return hash(f"{self.pre_task_code} {Delimiter.DIRECTION} {self.post_task_code}") class Task(Base): @@ -219,6 +227,7 @@ class Task(Base): task_relation = TaskRelation( pre_task_code=task.code, post_task_code=self.code, + name=f"{task.name} {Delimiter.DIRECTION} {self.name}", ) self.process_definition._task_relations.add(task_relation) else: @@ -229,6 +238,7 @@ class Task(Base): task_relation = TaskRelation( pre_task_code=self.code, post_task_code=task.code, + name=f"{self.name} {Delimiter.DIRECTION} {task.name}", ) self.process_definition._task_relations.add(task_relation) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_task.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_task.py index b551f072b..6af731b5f 100644 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_task.py +++ b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_task.py @@ -24,6 +24,9 @@ import pytest from pydolphinscheduler.core.task import Task, TaskRelation from tests.testing.task import Task as testTask +TEST_TASK_RELATION_SET = set() +TEST_TASK_RELATION_SIZE = 0 + @pytest.mark.parametrize( "attr, expect", @@ -66,6 +69,45 @@ def test_property_task_params(attr, expect): assert expect == task.task_params +@pytest.mark.parametrize( + "pre_code, post_code, expect", + [ + (123, 456, hash("123 -> 456")), + (12345678, 987654321, hash("12345678 -> 987654321")), + ], +) +def test_task_relation_hash_func(pre_code, post_code, expect): + """Test TaskRelation magic function :func:`__hash__`.""" + task_param = TaskRelation(pre_task_code=pre_code, post_task_code=post_code) + assert hash(task_param) == expect + + +@pytest.mark.parametrize( + "pre_code, post_code, size_add", + [ + (123, 456, 1), + (123, 456, 0), + (456, 456, 1), + (123, 123, 1), + (456, 123, 1), + (0, 456, 1), + (123, 0, 1), + ], +) +def test_task_relation_add_to_set(pre_code, post_code, size_add): + """Test TaskRelation with different pre_code and post_code add to set behavior. + + Here we use global variable to keep set of :class:`TaskRelation` instance and the number we expect + of the size when we add a new task relation to exists set. + """ + task_relation = TaskRelation(pre_task_code=pre_code, post_task_code=post_code) + TEST_TASK_RELATION_SET.add(task_relation) + # hint python interpreter use global variable instead of local's + global TEST_TASK_RELATION_SIZE + TEST_TASK_RELATION_SIZE += size_add + assert len(TEST_TASK_RELATION_SET) == TEST_TASK_RELATION_SIZE + + def test_task_relation_to_dict(): """Test TaskRelation object function to_dict.""" pre_task_code = 123 @@ -79,10 +121,10 @@ def test_task_relation_to_dict(): "conditionType": 0, "conditionParams": {}, } - task_param = TaskRelation( + task_relation = TaskRelation( pre_task_code=pre_task_code, post_task_code=post_task_code ) - assert task_param.get_define() == expect + assert task_relation.get_define() == expect def test_task_get_define():