From 5a2f6b64a5299166565e973d759ecb44f5a32612 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 9 Sep 2021 18:07:07 -0500 Subject: [PATCH 01/16] fix issue 151 - cascades to part tables should be from parent tables only. --- datajoint/table.py | 94 ++++++++++++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 33 deletions(-) diff --git a/datajoint/table.py b/datajoint/table.py index bed38a693..03bb5815d 100644 --- a/datajoint/table.py +++ b/datajoint/table.py @@ -46,16 +46,17 @@ class _RenameMap(tuple): class Table(QueryExpression): """ - Table is an abstract class that represents a base relation, i.e. a table in the schema. + Table is an abstract class that represents a table in the schema. + It implements insert and delete methods and inherits query functionality. To make it a concrete class, override the abstract properties specifying the connection, table name, database, and definition. - A Relation implements insert and delete methods in addition to inherited relational operators. """ _table_name = None # must be defined in subclass _log_ = None # placeholder for the Log table object - # These properties must be set by the schema decorator (schemas.py) at class level or by FreeTable at instance level + # These properties must be set by the schema decorator (schemas.py) at class level + # or by FreeTable at instance level database = None declaration_context = None @@ -65,7 +66,8 @@ def table_name(self): @property def definition(self): - raise NotImplementedError('Subclasses of Table must implement the `definition` property') + raise NotImplementedError( + 'Subclasses of Table must implement the `definition` property') def declare(self, context=None): """ @@ -95,7 +97,8 @@ def alter(self, prompt=True, context=None): """ if self.connection.in_transaction: raise DataJointError( - 'Cannot update table declaration inside a transaction, e.g. from inside a populate/make call') + 'Cannot update table declaration inside a transaction, ' + 'e.g. from inside a populate/make call') if context is None: frame = inspect.currentframe().f_back context = dict(frame.f_globals, **frame.f_locals) @@ -117,7 +120,8 @@ def alter(self, prompt=True, context=None): # skip if no create privilege pass else: - self.__class__._heading = Heading(table_info=self.heading.table_info) # reset heading + # reset heading + self.__class__._heading = Heading(table_info=self.heading.table_info) if prompt: print('Table altered') self._log('Altered ' + self.full_table_name) @@ -226,9 +230,11 @@ def external(self): def update1(self, row): """ update1 updates one existing entry in the table. - Caution: Updates are not part of the DataJoint data manipulation model. For strict data integrity, - use delete and insert. - :param row: a dict containing the primary key and the attributes to update. + Caution: In DataJoint the primary modes for data manipulation is to insert and delete + entire records since referential integrity works on the level of records, not fields. + Therefore, updates are reserved for corrective operations outside of main workflow. + Use UPDATE methods sparingly with full awareness of potential violations of assumptions. + :param row: a dict containing the primary key values and the attributes to update. Setting an attribute value to None will reset it to the default value (if any) The primary key attributes must always be provided. Examples: @@ -241,7 +247,8 @@ def update1(self, row): if not set(row).issuperset(self.primary_key): raise DataJointError('The argument of update1 must supply all primary key values.') try: - raise DataJointError('Attribute `%s` not found.' % next(k for k in row if k not in self.heading.names)) + raise DataJointError('Attribute `%s` not found.' % + next(k for k in row if k not in self.heading.names)) except StopIteration: pass # ok if len(self.restriction): @@ -250,7 +257,8 @@ def update1(self, row): if len(self & key) != 1: raise DataJointError('Update entry must exist.') # UPDATE query - row = [self.__make_placeholder(k, v) for k, v in row.items() if k not in self.primary_key] + row = [self.__make_placeholder(k, v) for k, v in row.items() + if k not in self.primary_key] query = "UPDATE {table} SET {assignments} WHERE {where}".format( table=self.full_table_name, assignments=",".join('`%s`=%s' % r[:2] for r in row), @@ -259,22 +267,25 @@ def update1(self, row): def insert1(self, row, **kwargs): """ - Insert one data record or one Mapping (like a dict). - :param row: a numpy record, a dict-like object, or an ordered sequence to be inserted as one row. + Insert one data record into the table.. + :param row: a numpy record, a dict-like object, or an ordered sequence to be inserted + as one row. For kwargs, see insert() """ self.insert((row,), **kwargs) - def insert(self, rows, replace=False, skip_duplicates=False, ignore_extra_fields=False, allow_direct_insert=None): + def insert(self, rows, replace=False, skip_duplicates=False, ignore_extra_fields=False, + allow_direct_insert=None): """ Insert a collection of rows. - :param rows: An iterable where an element is a numpy record, a dict-like object, a pandas.DataFrame, a sequence, - or a query expression with the same heading as table self. + :param rows: An iterable where an element is a numpy record, a dict-like object, a + pandas.DataFrame, a sequence, or a query expression with the same heading as self. :param replace: If True, replaces the existing tuple. :param skip_duplicates: If True, silently skip duplicate inserts. :param ignore_extra_fields: If False, fields that are not in the heading raise error. :param allow_direct_insert: applies only in auto-populated tables. - If False (default), insert are allowed only from inside the make callback. + If False (default), insert are allowed only from inside + the make callback. Example:: >>> relation.insert([ >>> dict(subject_id=7, species="mouse", date_of_birth="2014-09-01"), @@ -288,20 +299,22 @@ def insert(self, rows, replace=False, skip_duplicates=False, ignore_extra_fields ).to_records(index=False) # prohibit direct inserts into auto-populated tables - if not allow_direct_insert and not getattr(self, '_allow_insert', True): # allow_insert is only used in AutoPopulate + if not allow_direct_insert and not getattr(self, '_allow_insert', True): raise DataJointError( - 'Inserts into an auto-populated table can only done inside its make method during a populate call.' + 'Inserts into an auto-populated table can only done inside ' + 'its make method during a populate call.' ' To override, set keyword argument allow_direct_insert=True.') - if inspect.isclass(rows) and issubclass(rows, QueryExpression): # instantiate if a class - rows = rows() + if inspect.isclass(rows) and issubclass(rows, QueryExpression): + rows = rows() # instantiate if a class if isinstance(rows, QueryExpression): # insert from select if not ignore_extra_fields: try: raise DataJointError( - "Attribute %s not found. To ignore extra attributes in insert, set ignore_extra_fields=True." % - next(name for name in rows.heading if name not in self.heading)) + "Attribute %s not found. To ignore extra attributes in insert, " + "set ignore_extra_fields=True." % next( + name for name in rows.heading if name not in self.heading)) except StopIteration: pass fields = list(name for name in rows.heading if name in self.heading) @@ -369,10 +382,21 @@ def _delete_cascade(self): *[_.strip('`') for _ in match['child'].split('`.`')] )).fetchall()))) match['parent'] = match['parent'][0] - # restrict child by self if - # 1. if self's restriction attributes are not in child's primary key - # 2. if child renames any attributes - # otherwise restrict child by self's restriction. + + # If child is a part table of another table, do not recurse (See issue #151) + if '__' in match['child'] and (not match['child'].strip('`').startswith( + self.full_table_name).strip('`') + '__'): + raise DataJointError( + 'Attempt to delete from part table {part} before deleting from ' + 'its master. Delete from {master} first.'.format( + part=match['child'], + master=match['child'].split('__')[0] + '`' + )) + + # Restrict child by self if + # 1. if self's restriction attributes are not in child's primary key + # 2. if child renames any attributes + # Otherwise restrict child by self's restriction. child = FreeTable(self.connection, match['child']) if set(self.restriction_attributes) <= set(child.primary_key) and \ match['fk_attrs'] == match['pk_attrs']: @@ -396,6 +420,8 @@ def delete(self, transaction=True, safemode=None): Deletes the contents of the table and its dependent tables, recursively. :param transaction: if True, use the entire delete becomes an atomic transaction. + This is the default and recommended behavior. Set to False if this delete is nested + within another transaction. :param safemode: If True, prohibit nested transactions and prompt to confirm. Default is dj.config['safemode']. :return: number of deleted rows (excluding those from dependent tables) @@ -460,7 +486,7 @@ def drop(self): User is prompted for confirmation if config['safemode'] is set to True. """ if self.restriction: - raise DataJointError('A relation with an applied restriction condition cannot be dropped.' + raise DataJointError('A table with an applied restriction cannot be dropped.' ' Call drop() on the unrestricted Table.') self.connection.dependencies.load() do_drop = True @@ -555,12 +581,14 @@ def describe(self, context=None, printout=True): def _update(self, attrname, value=None): """ - This is a deprecated function to be removed in datajoint 0.14. Use .update1 instead. + This is a deprecated function to be removed in datajoint 0.14. + Use .update1 instead. - Updates a field in an existing tuple. This is not a datajoyous operation and should not be used - routinely. Relational database maintain referential integrity on the level of a tuple. Therefore, - the UPDATE operator can violate referential integrity. The datajoyous way to update information is - to delete the entire tuple and insert the entire update tuple. + Updates a field in one existing tuple. self must be restricted to exactly one entry. + In DataJoint the principal way of updating data is to delete and re-insert the + entire record and updates are reserved for corrective actions. + This is because referential integrity is observed on the level of entire + records rather than individual attributes. Safety constraints: 1. self must be restricted to exactly one tuple From 4a2e1af7e748a73e4464b0223c20b6fc63c2f77e Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 9 Sep 2021 19:04:52 -0500 Subject: [PATCH 02/16] Fix issue #374 --- CHANGELOG.md | 1 + datajoint/table.py | 27 +++++++++++++++++---------- datajoint/utils.py | 17 +++++++++++++++++ docs-parts/intro/Releases_lang1.rst | 1 + 4 files changed, 36 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 671c0f0d9..0623f4cce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ### 0.13.3 -- TBD * Bugfix - Dependencies not properly loaded on populate. (#902) PR #919 * Bugfix - Replace use of numpy aliases of built-in types with built-in type. (#938) PR #939 +* Bugfix - Deletes and drops can cascade to part from master only. (#151 and #374) PR #957 ### 0.13.2 -- May 7, 2021 * Update `setuptools_certificate` dependency to new name `otumat` diff --git a/datajoint/table.py b/datajoint/table.py index 03bb5815d..051fab5e4 100644 --- a/datajoint/table.py +++ b/datajoint/table.py @@ -14,7 +14,7 @@ from .condition import make_condition from .expression import QueryExpression from . import blob -from .utils import user_choice +from .utils import user_choice, get_master from .heading import Heading from .errors import (DuplicateError, AccessError, DataJointError, UnknownAttributeError, IntegrityError) @@ -383,15 +383,13 @@ def _delete_cascade(self): )).fetchall()))) match['parent'] = match['parent'][0] - # If child is a part table of another table, do not recurse (See issue #151) - if '__' in match['child'] and (not match['child'].strip('`').startswith( - self.full_table_name).strip('`') + '__'): + # Avoid deleting from child before master (See issue #151) + master = get_master(match['child']) + if master and self.full_table_name != master: raise DataJointError( 'Attempt to delete from part table {part} before deleting from ' 'its master. Delete from {master} first.'.format( - part=match['child'], - master=match['child'].split('__')[0] + '`' - )) + part=match['child'], master=master)) # Restrict child by self if # 1. if self's restriction attributes are not in child's primary key @@ -490,8 +488,17 @@ def drop(self): ' Call drop() on the unrestricted Table.') self.connection.dependencies.load() do_drop = True - tables = [table for table in self.connection.dependencies.descendants(self.full_table_name) - if not table.isdigit()] + tables = [table for table in self.connection.dependencies.descendants( + self.full_table_name) if not table.isdigit()] + + # avoid dropping part tables without their masters: See issue #374 + for part in tables: + master = get_master(part) + if master and master not in tables: + raise DataJointError( + 'Attempt to drop part table {part} before dropping ' + 'its master. Drop {master} first.'.format(part=part, master=master)) + if config['safemode']: for table in tables: print(table, '(%d tuples)' % len(FreeTable(self.connection, table))) @@ -692,7 +699,7 @@ def check_fields(fields): if field not in self.heading: raise KeyError(u'`{0:s}` is not in the table heading'.format(field)) elif set(field_list) != set(fields).intersection(self.heading.names): - raise DataJointError('Attempt to insert rows with different fields') + raise DataJointError('Attempt to insert rows with different fields.') if isinstance(row, np.void): # np.array check_fields(row.dtype.fields) diff --git a/datajoint/utils.py b/datajoint/utils.py index 42b18222b..6f26ad199 100644 --- a/datajoint/utils.py +++ b/datajoint/utils.py @@ -31,6 +31,23 @@ def user_choice(prompt, choices=("yes", "no"), default=None): return response +def get_master(full_table_name): + """ + If the table name is that of a part table, then return what the master table name would be. + :param full_table_name: + :return: Supposed master full table name or empty string if not a part table name. + + This follows DataJoint's table naming convention where a master and a part must be in the + same schema and the part table is prefixed with the mater table name + '__'. + + Example: + `ephys`.`session` -- master + `ephys`.`session__recording` -- part + """ + master, *part = full_table_name.split('__') + return master + '`' if part else "" + + def to_camel_case(s): """ Convert names with under score (_) separation into camel case names. diff --git a/docs-parts/intro/Releases_lang1.rst b/docs-parts/intro/Releases_lang1.rst index 5090d0d89..15c402ce3 100644 --- a/docs-parts/intro/Releases_lang1.rst +++ b/docs-parts/intro/Releases_lang1.rst @@ -2,6 +2,7 @@ ---------------------- * Bugfix - Dependencies not properly loaded on populate. (#902) PR #919 * Bugfix - Replace use of numpy aliases of built-in types with built-in type. (#938) PR #939 +* Bugfix - Deletes and drops can cascade to part from master only. (#151 and #374) PR #957 0.13.2 -- May 7, 2021 ---------------------- From bc12b113f6eddc701f5768bfbfb2c7114df024b9 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 10 Sep 2021 09:29:35 -0500 Subject: [PATCH 03/16] use regex to identify part tables in utils.get_master --- LNX-docker-compose.yml | 2 +- datajoint/utils.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/LNX-docker-compose.yml b/LNX-docker-compose.yml index d69677ccc..bee52b187 100644 --- a/LNX-docker-compose.yml +++ b/LNX-docker-compose.yml @@ -32,7 +32,7 @@ services: interval: 1s fakeservices.datajoint.io: <<: *net - image: datajoint/nginx:v0.0.17 + image: datajoint/nginx:v0.0.18 environment: - ADD_db_TYPE=DATABASE - ADD_db_ENDPOINT=db:3306 diff --git a/datajoint/utils.py b/datajoint/utils.py index 6f26ad199..e718d5481 100644 --- a/datajoint/utils.py +++ b/datajoint/utils.py @@ -44,8 +44,8 @@ def get_master(full_table_name): `ephys`.`session` -- master `ephys`.`session__recording` -- part """ - master, *part = full_table_name.split('__') - return master + '`' if part else "" + match = re.match(r'(?P`\w+`.`\w+)__(?P\w+)`', full_table_name) + return match['master'] + '`' if match else '' def to_camel_case(s): From 5929f95817f5db2804baf436b97fdf98e95d8a69 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 10 Sep 2021 12:53:21 -0500 Subject: [PATCH 04/16] resolve merge conflicts --- CHANGELOG.md | 3 --- docs-parts/intro/Releases_lang1.rst | 3 --- 2 files changed, 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8045d5507..88206f433 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,11 +3,8 @@ ### 0.13.3 -- TBD * Bugfix - Dependencies not properly loaded on populate. (#902) PR #919 * Bugfix - Replace use of numpy aliases of built-in types with built-in type. (#938) PR #939 -<<<<<<< HEAD * Bugfix - Deletes and drops can cascade to part from master only. (#151 and #374) PR #957 -======= * Bugfix - Fix error handling of remove_object function in `s3.py` (#952) PR #955 ->>>>>>> 61dab59fe59a408b018bf2a6330ec0ce07c2ad42 ### 0.13.2 -- May 7, 2021 * Update `setuptools_certificate` dependency to new name `otumat` diff --git a/docs-parts/intro/Releases_lang1.rst b/docs-parts/intro/Releases_lang1.rst index 37a61874c..9e15e148f 100644 --- a/docs-parts/intro/Releases_lang1.rst +++ b/docs-parts/intro/Releases_lang1.rst @@ -2,11 +2,8 @@ ---------------------- * Bugfix - Dependencies not properly loaded on populate. (#902) PR #919 * Bugfix - Replace use of numpy aliases of built-in types with built-in type. (#938) PR #939 -<<<<<<< HEAD * Bugfix - Deletes and drops can cascade to part from master only. (#151 and #374) PR #957 -======= * Bugfix - Fix error handling of remove_object function in `s3.py` (#952) PR #955 ->>>>>>> 61dab59fe59a408b018bf2a6330ec0ce07c2ad42 0.13.2 -- May 7, 2021 ---------------------- From d8d8936dace7f25033bc7f4d96adc979bf1edb38 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 10 Sep 2021 14:10:10 -0500 Subject: [PATCH 05/16] add tests for issues #151 and #374 --- tests/schema_simple.py | 51 ++++++++++++++++++++++++++++++++++ tests/test_cascading_delete.py | 34 ++++++++++++++++++++--- tests/test_schema.py | 3 +- 3 files changed, 83 insertions(+), 5 deletions(-) diff --git a/tests/schema_simple.py b/tests/schema_simple.py index c4ec45e00..9a0231186 100644 --- a/tests/schema_simple.py +++ b/tests/schema_simple.py @@ -4,6 +4,10 @@ import random import datajoint as dj import itertools +import hashlib +import uuid +import faker + from . import PREFIX, CONN_INFO import numpy as np @@ -151,6 +155,53 @@ class DataB(dj.Lookup): contents = list(zip(range(5), range(5, 10))) +@schema +class Website(dj.Lookup): + definition = """ + url_hash : uuid + --- + url : varchar(1000) + """ + + def insert1_url(self, url): + hashed = hashlib.sha1() + hashed.update(url.encode()) + url_hash = uuid.UUID(bytes=hashed.digest()[:16]) + self.insert1(dict(url=url, url_hash=url_hash), skip_duplicates=True) + return url_hash + + +@schema +class Profile(dj.Manual): + definition = """ + ssn : char(11) + --- + name : varchar(70) + residence : varchar(255) + blood_group : enum('A+', 'A-', 'AB+', 'AB-', 'B+', 'B-', 'O+', 'O-') + username : varchar(120) + birthdate : date + job : varchar(120) + sex : enum('M', 'F') + """ + + class Website(dj.Part): + definition = """ + -> master + -> Website + """ + + def populate_random(self, n=10): + fake = faker.Faker() + for _ in range(n): + profile = fake.profile() + with self.connection.transaction: + self.insert1(profile, ignore_extra_fields=True) + for url in profile['website']: + self.Website().insert1( + dict(ssn=profile['ssn'], url_hash=Website().insert1_url(url))) + + @schema class TTestUpdate(dj.Lookup): definition = """ diff --git a/tests/test_cascading_delete.py b/tests/test_cascading_delete.py index 6988d432a..b6a324c8b 100644 --- a/tests/test_cascading_delete.py +++ b/tests/test_cascading_delete.py @@ -1,6 +1,6 @@ -from nose.tools import assert_false, assert_true, assert_equal +from nose.tools import assert_false, assert_true, assert_equal, raises import datajoint as dj -from .schema_simple import A, B, D, E, L +from .schema_simple import A, B, D, E, L, Website, Profile from .schema import ComplexChild, ComplexParent @@ -27,8 +27,19 @@ def test_delete_tree(): @staticmethod def test_stepwise_delete(): - assert_false(dj.config['safemode'], 'safemode must be off for testing') #TODO: just turn it off instead of warning - assert_true(L() and A() and B() and B.C(), 'schema population failed as a precondition to test') + assert_false(dj.config['safemode'], 'safemode must be off for testing') + assert_true(L() and A() and B() and B.C(), + 'schema population failed as a precondition to test') + B.C().delete(force=True) + assert_false(B.C(), 'failed to delete child tables') + B().delete() + assert_false(B(), 'failed to delete the parent table following child table deletion') + + @staticmethod + def test_stepwise_delete(): + assert_false(dj.config['safemode'], 'safemode must be off for testing') + assert_true(L() and A() and B() and B.C(), + 'schema population failed as a precondition to test') B.C().delete(force=True) assert_false(B.C(), 'failed to delete child tables') B().delete() @@ -96,3 +107,18 @@ def test_delete_complex_keys(): (ComplexParent & restriction).delete() assert len(ComplexParent & restriction) == 0, 'Parent record was not deleted' assert len(ComplexChild & restriction) == 0, 'Child record was not deleted' + + def test_delete_master(self): + Profile().populate_random() + Profile().delete() + + @raises(dj.DataJointError) + def test_delete_parts(self): + """test issue #151""" + Profile().populate_random() + Website().delete() + + @raises(dj.DataJointError) + def test_drop_part(self): + """test issue #374""" + Website().drop() diff --git a/tests/test_schema.py b/tests/test_schema.py index 42d4e0c0e..1ff41efdf 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -124,7 +124,8 @@ def test_list_tables(): # https://github.com/datajoint/datajoint-python/issues/838 assert(set(['reserved_word', '#l', '#a', '__d', '__b', '__b__c', '__e', '__e__f', '#outfit_launch', '#outfit_launch__outfit_piece', '#i_j', '#j_i', - '#t_test_update', '#data_a', '#data_b', 'f', '#argmax_test' + '#t_test_update', '#data_a', '#data_b', 'f', '#argmax_test', + '#website', 'profile', 'profile__website' ]) == set(schema_simple.list_tables())) From d1dd9102941614454e54b7e37f8a6cc2240f2720 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 14 Sep 2021 12:44:59 -0500 Subject: [PATCH 06/16] add type hinting in `utils.get_master` Co-authored-by: Raphael Guzman <38401847+guzman-raphael@users.noreply.github.com> --- datajoint/utils.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/datajoint/utils.py b/datajoint/utils.py index e718d5481..057e7e820 100644 --- a/datajoint/utils.py +++ b/datajoint/utils.py @@ -31,18 +31,20 @@ def user_choice(prompt, choices=("yes", "no"), default=None): return response -def get_master(full_table_name): +def get_master(full_table_name: str) -> str: """ If the table name is that of a part table, then return what the master table name would be. - :param full_table_name: - :return: Supposed master full table name or empty string if not a part table name. - This follows DataJoint's table naming convention where a master and a part must be in the - same schema and the part table is prefixed with the mater table name + '__'. + same schema and the part table is prefixed with the master table name + ``__``. Example: `ephys`.`session` -- master `ephys`.`session__recording` -- part + + :param full_table_name: Full table name including part. + :type full_table_name: str + :return: Supposed master full table name or empty string if not a part table name. + :rtype: str """ match = re.match(r'(?P`\w+`.`\w+)__(?P\w+)`', full_table_name) return match['master'] + '`' if match else '' From bd16e0fe6f7a87c845ff247796d0d01925fa834e Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 14 Sep 2021 12:59:10 -0500 Subject: [PATCH 07/16] minor cleanup in tests --- tests/test_cascading_delete.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/tests/test_cascading_delete.py b/tests/test_cascading_delete.py index b6a324c8b..ebace43b7 100644 --- a/tests/test_cascading_delete.py +++ b/tests/test_cascading_delete.py @@ -35,20 +35,11 @@ def test_stepwise_delete(): B().delete() assert_false(B(), 'failed to delete the parent table following child table deletion') - @staticmethod - def test_stepwise_delete(): - assert_false(dj.config['safemode'], 'safemode must be off for testing') - assert_true(L() and A() and B() and B.C(), - 'schema population failed as a precondition to test') - B.C().delete(force=True) - assert_false(B.C(), 'failed to delete child tables') - B().delete() - assert_false(B(), 'failed to delete the parent table following child table deletion') - @staticmethod def test_delete_tree_restricted(): assert_false(dj.config['safemode'], 'safemode must be off for testing') - assert_true(L() and A() and B() and B.C() and D() and E() and E.F(), 'schema is not populated') + assert_true(L() and A() and B() and B.C() and D() and E() and E.F(), + 'schema is not populated') cond = 'cond_in_a' rel = A() & cond rest = dict( From 17d3bb49dc3aa8c391830b90feef14b48d38cfc1 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 14 Sep 2021 13:01:53 -0500 Subject: [PATCH 08/16] switch from nosetest to pytest asserts in test_stepwise_delete --- tests/test_cascading_delete.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/test_cascading_delete.py b/tests/test_cascading_delete.py index ebace43b7..362685bad 100644 --- a/tests/test_cascading_delete.py +++ b/tests/test_cascading_delete.py @@ -27,13 +27,12 @@ def test_delete_tree(): @staticmethod def test_stepwise_delete(): - assert_false(dj.config['safemode'], 'safemode must be off for testing') - assert_true(L() and A() and B() and B.C(), - 'schema population failed as a precondition to test') + assert not dj.config['safemode'], 'safemode must be off for testing' + assert L() and A() and B() and B.C(), 'schema population failed' B.C().delete(force=True) - assert_false(B.C(), 'failed to delete child tables') + assert not B.C(), 'failed to delete child tables' B().delete() - assert_false(B(), 'failed to delete the parent table following child table deletion') + assert not B(), 'failed to delete from the parent table following child table deletion' @staticmethod def test_delete_tree_restricted(): From f50533592a5aa24591962be29dee3c915f123de1 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 14 Sep 2021 13:30:51 -0500 Subject: [PATCH 09/16] add random seed to faker in tests.schema_simple to make tests deterministic --- test_requirements.txt | 2 +- tests/schema_simple.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test_requirements.txt b/test_requirements.txt index 6f13c7c6d..0ed24a620 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,4 +1,4 @@ nose nose-cov coveralls -Faker +faker diff --git a/tests/schema_simple.py b/tests/schema_simple.py index 9a0231186..721207936 100644 --- a/tests/schema_simple.py +++ b/tests/schema_simple.py @@ -15,6 +15,8 @@ schema = dj.Schema(PREFIX + '_relational', locals(), connection=dj.conn(**CONN_INFO)) +faker.Faker.seed(0) # make deterministic + @schema class IJ(dj.Lookup): From 58a294b2225fc756ffd260e9017ed21598b716b9 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 15 Sep 2021 10:14:56 -0500 Subject: [PATCH 10/16] set RNG seed for faker in tests --- tests/schema_simple.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/schema_simple.py b/tests/schema_simple.py index 721207936..73553d286 100644 --- a/tests/schema_simple.py +++ b/tests/schema_simple.py @@ -15,8 +15,6 @@ schema = dj.Schema(PREFIX + '_relational', locals(), connection=dj.conn(**CONN_INFO)) -faker.Faker.seed(0) # make deterministic - @schema class IJ(dj.Lookup): @@ -195,6 +193,7 @@ class Website(dj.Part): def populate_random(self, n=10): fake = faker.Faker() + faker.Faker.seed(0) # make tests deterministic for _ in range(n): profile = fake.profile() with self.connection.transaction: From 67dda6c4aab90cc75a4b24707c6214c8fbc3d004 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 15 Sep 2021 12:58:38 -0500 Subject: [PATCH 11/16] docstring improvement in datajoint/table.py Co-authored-by: Raphael Guzman <38401847+guzman-raphael@users.noreply.github.com> --- datajoint/table.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datajoint/table.py b/datajoint/table.py index b8f54077b..336e8505c 100644 --- a/datajoint/table.py +++ b/datajoint/table.py @@ -267,10 +267,10 @@ def update1(self, row): def insert1(self, row, **kwargs): """ - Insert one data record into the table.. + Insert one data record into the table. For ``kwargs``, see ``insert()``. + :param row: a numpy record, a dict-like object, or an ordered sequence to be inserted as one row. - For kwargs, see insert() """ self.insert((row,), **kwargs) From a56d63487e10d23fe1c3406eb3ae9ce1f130afe6 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 15 Sep 2021 12:59:00 -0500 Subject: [PATCH 12/16] fix grammar in docstring Co-authored-by: Raphael Guzman <38401847+guzman-raphael@users.noreply.github.com> --- datajoint/table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datajoint/table.py b/datajoint/table.py index 336e8505c..5e56f989f 100644 --- a/datajoint/table.py +++ b/datajoint/table.py @@ -301,7 +301,7 @@ def insert(self, rows, replace=False, skip_duplicates=False, ignore_extra_fields # prohibit direct inserts into auto-populated tables if not allow_direct_insert and not getattr(self, '_allow_insert', True): raise DataJointError( - 'Inserts into an auto-populated table can only done inside ' + 'Inserts into an auto-populated table can only be done inside ' 'its make method during a populate call.' ' To override, set keyword argument allow_direct_insert=True.') From 54d4e4603d5573e6dce3a2bc7f9f123fac52e282 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 15 Sep 2021 13:01:13 -0500 Subject: [PATCH 13/16] Apply suggestions from code review docstring improvements in datajoint.table. Co-authored-by: Raphael Guzman <38401847+guzman-raphael@users.noreply.github.com> --- datajoint/table.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/datajoint/table.py b/datajoint/table.py index 5e56f989f..587de6aef 100644 --- a/datajoint/table.py +++ b/datajoint/table.py @@ -230,11 +230,12 @@ def external(self): def update1(self, row): """ update1 updates one existing entry in the table. - Caution: In DataJoint the primary modes for data manipulation is to insert and delete + Caution: In DataJoint the primary modes for data manipulation is to ``insert`` and ``delete`` entire records since referential integrity works on the level of records, not fields. Therefore, updates are reserved for corrective operations outside of main workflow. Use UPDATE methods sparingly with full awareness of potential violations of assumptions. - :param row: a dict containing the primary key values and the attributes to update. + + :param row: a ``dict`` containing the primary key values and the attributes to update. Setting an attribute value to None will reset it to the default value (if any) The primary key attributes must always be provided. Examples: @@ -284,8 +285,7 @@ def insert(self, rows, replace=False, skip_duplicates=False, ignore_extra_fields :param skip_duplicates: If True, silently skip duplicate inserts. :param ignore_extra_fields: If False, fields that are not in the heading raise error. :param allow_direct_insert: applies only in auto-populated tables. - If False (default), insert are allowed only from inside - the make callback. + If False (default), insert are allowed only from inside the make callback. Example:: >>> relation.insert([ >>> dict(subject_id=7, species="mouse", date_of_birth="2014-09-01"), @@ -596,7 +596,7 @@ def describe(self, context=None, printout=True): def _update(self, attrname, value=None): """ This is a deprecated function to be removed in datajoint 0.14. - Use .update1 instead. + Use ``.update1`` instead. Updates a field in one existing tuple. self must be restricted to exactly one entry. In DataJoint the principal way of updating data is to delete and re-insert the From e674d38473c1d378076f63811f047c95b4c5465a Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 19 Jan 2022 17:53:10 -0600 Subject: [PATCH 14/16] set random seed for faker tests --- CHANGELOG.md | 4 ++-- docs-parts/intro/Releases_lang1.rst | 4 ++-- tests/schema_simple.py | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71e96647a..afe62eeac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ * Add - Expose proxy feature for S3 external stores (#961) PR #962 * Bugfix - Dependencies not properly loaded on populate. (#902) PR #919 * Bugfix - Replace use of numpy aliases of built-in types with built-in type. (#938) PR #939 -* Bugfix - Deletes and drops must include the master of each part. (#151 and #374) PR #957 +* Bugfix - Deletes and drops must include the master of each part. (#151, #374) PR #957 * Bugfix - `ExternalTable.delete` should not remove row on error (#953) PR #956 * Bugfix - Fix error handling of remove_object function in `s3.py` (#952) PR #955 * Bugfix - Fix regression issue with `DISTINCT` clause and `GROUP_BY` (#914) PR #963 @@ -134,7 +134,7 @@ * Fix #628 - incompatibility with pyparsing 2.4.1 ### 0.11.1 -- Nov 15, 2018 -* Fix ordering of attributes in proj (#483 and #516) +* Fix ordering of attributes in proj (#483, #516) * Prohibit direct insert into auto-populated tables (#511) ### 0.11.0 -- Oct 25, 2018 diff --git a/docs-parts/intro/Releases_lang1.rst b/docs-parts/intro/Releases_lang1.rst index fc6a7091d..55cefd108 100644 --- a/docs-parts/intro/Releases_lang1.rst +++ b/docs-parts/intro/Releases_lang1.rst @@ -6,7 +6,7 @@ * Add - Expose proxy feature for S3 external stores (#961) PR #962 * Bugfix - Dependencies not properly loaded on populate. (#902) PR #919 * Bugfix - Replace use of numpy aliases of built-in types with built-in type. (#938) PR #939 -* Bugfix - Deletes and drops must include the master of each part. (#151 and #374) PR #957 +* Bugfix - Deletes and drops must include the master of each part. (#151, #374) PR #957 * Bugfix - `ExternalTable.delete` should not remove row on error (#953) PR #956 * Bugfix - Fix error handling of remove_object function in `s3.py` (#952) PR #955 * Bugfix - Fix sql code generation to comply with sql mode ``ONLY_FULL_GROUP_BY`` (#916) PR #965 @@ -141,7 +141,7 @@ 0.11.1 -- Nov 15, 2018 ---------------------- -* Fix ordering of attributes in proj (#483 and #516) +* Fix ordering of attributes in proj (#483, #516) * Prohibit direct insert into auto-populated tables (#511) 0.11.0 -- Oct 25, 2018 diff --git a/tests/schema_simple.py b/tests/schema_simple.py index 73553d286..9263fa01a 100644 --- a/tests/schema_simple.py +++ b/tests/schema_simple.py @@ -192,6 +192,7 @@ class Website(dj.Part): """ def populate_random(self, n=10): + faker.Faker.seed(0) fake = faker.Faker() faker.Faker.seed(0) # make tests deterministic for _ in range(n): From ad5cf904a511ce83dd5649641789226a9394c491 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 20 Jan 2022 10:28:29 -0600 Subject: [PATCH 15/16] style improvments --- datajoint/autopopulate.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/datajoint/autopopulate.py b/datajoint/autopopulate.py index 3aa9e78a8..baf2284ff 100644 --- a/datajoint/autopopulate.py +++ b/datajoint/autopopulate.py @@ -63,7 +63,8 @@ def _rename_attributes(table, props): if props['aliased'] else table.proj()) if self._key_source is None: - parents = self.target.parents(primary=True, as_objects=True, foreign_key_info=True) + parents = self.target.parents( + primary=True, as_objects=True, foreign_key_info=True) if not parents: raise DataJointError('A table must have dependencies ' 'from its primary key for auto-populate to work') @@ -74,17 +75,19 @@ def _rename_attributes(table, props): def make(self, key): """ - Derived classes must implement method `make` that fetches data from tables that are - above them in the dependency hierarchy, restricting by the given key, computes dependent - attributes, and inserts the new tuples into self. + Derived classes must implement method `make` that fetches data from tables + above them in the dependency hierarchy, restricting by the given key, + computes secondary attributes, and inserts the new tuples into self. """ - raise NotImplementedError('Subclasses of AutoPopulate must implement the method `make`') + raise NotImplementedError( + 'Subclasses of AutoPopulate must implement the method `make`') @property def target(self): """ :return: table to be populated. - In the typical case, dj.AutoPopulate is mixed into a dj.Table class by inheritance and the target is self. + In the typical case, dj.AutoPopulate is mixed into a dj.Table class by + inheritance and the target is self. """ return self @@ -111,11 +114,14 @@ def _jobs_to_do(self, restrictions): if not isinstance(todo, QueryExpression): raise DataJointError('Invalid key_source value') - # check if target lacks any attributes from the primary key of key_source + try: + # check if target lacks any attributes from the primary key of key_source raise DataJointError( - 'The populate target lacks attribute %s from the primary key of key_source' % next( - name for name in todo.heading.primary_key if name not in self.target.heading)) + 'The populate target lacks attribute %s ' + 'from the primary key of key_source' % next( + name for name in todo.heading.primary_key + if name not in self.target.heading)) except StopIteration: pass return (todo & AndList(restrictions)).proj() @@ -126,7 +132,8 @@ def populate(self, *restrictions, suppress_errors=False, return_exception_object """ table.populate() calls table.make(key) for every primary key in self.key_source for which there is not already a tuple in table. - :param restrictions: a list of restrictions each restrict (table.key_source - target.proj()) + :param restrictions: a list of restrictions each restrict + (table.key_source - target.proj()) :param suppress_errors: if True, do not terminate execution. :param return_exception_objects: return error objects instead of just error messages :param reserve_jobs: if True, reserve jobs to populate in asynchronous fashion @@ -259,5 +266,6 @@ def progress(self, *restrictions, display=True): print('%-20s' % self.__class__.__name__, 'Completed %d of %d (%2.1f%%) %s' % ( total - remaining, total, 100 - 100 * remaining / (total+1e-12), - datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')), flush=True) + datetime.datetime.strftime(datetime.datetime.now(), + '%Y-%m-%d %H:%M:%S')), flush=True) return remaining, total From 741f451cab28176bed5d0dd35a4a4e33be0a7235 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 20 Jan 2022 12:31:48 -0600 Subject: [PATCH 16/16] convert some nosetest asserts into python asserts in tests --- tests/test_cascading_delete.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/tests/test_cascading_delete.py b/tests/test_cascading_delete.py index 362685bad..6e730590a 100644 --- a/tests/test_cascading_delete.py +++ b/tests/test_cascading_delete.py @@ -32,13 +32,14 @@ def test_stepwise_delete(): B.C().delete(force=True) assert not B.C(), 'failed to delete child tables' B().delete() - assert not B(), 'failed to delete from the parent table following child table deletion' + assert not B(), \ + 'failed to delete from the parent table following child table deletion' @staticmethod def test_delete_tree_restricted(): - assert_false(dj.config['safemode'], 'safemode must be off for testing') - assert_true(L() and A() and B() and B.C() and D() and E() and E.F(), - 'schema is not populated') + assert not dj.config['safemode'], 'safemode must be off for testing' + assert L() and A() and B() and B.C() and D() and E() and E.F(), \ + 'schema is not populated' cond = 'cond_in_a' rel = A() & cond rest = dict( @@ -49,19 +50,14 @@ def test_delete_tree_restricted(): E=len(E()-rel), F=len(E.F()-rel)) rel.delete() - assert_false(rel or - (B() & rel) or - (B.C() & rel) or - (D() & rel) or - (E() & rel) or - (E.F() & rel), - 'incomplete delete') - assert_equal(len(A()), rest['A'], 'invalid delete restriction') - assert_equal(len(B()), rest['B'], 'invalid delete restriction') - assert_equal(len(B.C()), rest['C'], 'invalid delete restriction') - assert_equal(len(D()), rest['D'], 'invalid delete restriction') - assert_equal(len(E()), rest['E'], 'invalid delete restriction') - assert_equal(len(E.F()), rest['F'], 'invalid delete restriction') + assert not (rel or B() & rel or B.C() & rel or D() & rel or E() & rel + or (E.F() & rel)), 'incomplete delete' + assert len(A()) == rest['A'], 'invalid delete restriction' + assert len(B()) == rest['B'], 'invalid delete restriction' + assert len(B.C()) == rest['C'], 'invalid delete restriction' + assert len(D()) == rest['D'], 'invalid delete restriction' + assert len(E()) == rest['E'], 'invalid delete restriction' + assert len(E.F()) == rest['F'], 'invalid delete restriction' @staticmethod def test_delete_lookup():