From 8208f722100a0a0324f88ddc9f917ed63dffe2d2 Mon Sep 17 00:00:00 2001 From: oodiete Date: Thu, 31 Oct 2019 17:13:21 -0400 Subject: [PATCH 1/5] remove doc_type and small refactoring to support elasticsearch 7 --- .gitignore | 4 +++- image_match/elasticsearch_driver.py | 17 ++++++++++------- image_match/goldberg.py | 4 ++-- setup.py | 2 +- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index c3506e2..2587fbd 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ __pycache__/ # Distribution / packaging .Python env/ +.idea +.venv build/ develop-eggs/ dist/ @@ -57,4 +59,4 @@ docs/_build/ target/ image_match/web/static/tmp/ -*.jpg \ No newline at end of file +*.jpg diff --git a/image_match/elasticsearch_driver.py b/image_match/elasticsearch_driver.py index 1f105cc..714222e 100644 --- a/image_match/elasticsearch_driver.py +++ b/image_match/elasticsearch_driver.py @@ -64,11 +64,12 @@ def search_single_record(self, rec, pre_filter=None): if pre_filter is not None: body['query']['bool']['filter'] = pre_filter - res = self.es.search(index=self.index, - doc_type=self.doc_type, - body=body, - size=self.size, - timeout=self.timeout)['hits']['hits'] + res = self.es.search( + index=self.index, + body=body, + size=self.size, + timeout=self.timeout + )['hits']['hits'] sigs = np.array([x['_source']['signature'] for x in res]) @@ -91,7 +92,9 @@ def search_single_record(self, rec, pre_filter=None): def insert_single_record(self, rec, refresh_after=False): rec['timestamp'] = datetime.now() - self.es.index(index=self.index, doc_type=self.doc_type, body=rec, refresh=refresh_after) + self.es.index( + index=self.index, body=rec, refresh=refresh_after + ) def delete_duplicates(self, path): """Delete all but one entries in elasticsearch whose `path` value is equivalent to that of path. @@ -108,4 +111,4 @@ def delete_duplicates(self, path): if item['_source']['path'] == path] if len(matching_paths) > 0: for id_tag in matching_paths[1:]: - self.es.delete(index=self.index, doc_type=self.doc_type, id=id_tag) + self.es.delete(index=self.index, id=id_tag) diff --git a/image_match/goldberg.py b/image_match/goldberg.py index da93ff8..e053b1c 100644 --- a/image_match/goldberg.py +++ b/image_match/goldberg.py @@ -236,14 +236,14 @@ def preprocess_image(image_or_path, bytestream=False, handle_mpo=False): return rgb2gray(np.asarray(img, dtype=np.uint8)) elif type(image_or_path) in string_types or \ type(image_or_path) is text_type: - return imread(image_or_path, as_grey=True) + return imread(image_or_path, as_gray=True) elif type(image_or_path) is bytes: try: img = Image.open(image_or_path) arr = np.array(img.convert('RGB')) except IOError: # try again due to PIL weirdness - return imread(image_or_path, as_grey=True) + return imread(image_or_path, as_gray=True) if handle_mpo: # take the first images from the MPO if arr.shape == (2,) and isinstance(arr[1].tolist(), MpoImageFile): diff --git a/setup.py b/setup.py index e05c1f1..29c97db 100644 --- a/setup.py +++ b/setup.py @@ -89,7 +89,7 @@ def find_version(*file_paths): ], install_requires=[ 'scikit-image>=0.14', - 'elasticsearch>=5.0.0,<6.0.0', + 'elasticsearch>=7.0.0,<8.0.0', 'six>=1.11.0', ], tests_require=tests_require, From c939aebe37b8dcfdb93ea071f2cb2fc531e6a0c9 Mon Sep 17 00:00:00 2001 From: oodiete Date: Thu, 31 Oct 2019 17:45:39 -0400 Subject: [PATCH 2/5] update test --- tests/test_goldberg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_goldberg.py b/tests/test_goldberg.py index 83857dc..7ba52d2 100644 --- a/tests/test_goldberg.py +++ b/tests/test_goldberg.py @@ -74,4 +74,4 @@ def test_difference(): sig1 = gis.generate_signature('test.jpg') sig2 = gis.generate_signature(test_diff_img_url) dist = gis.normalized_distance(sig1, sig2) - assert dist == 0.42263283502672722 + assert dist == 0.424549547059671 From 94829b674c1d4764517fb692b8c55a5747d1f310 Mon Sep 17 00:00:00 2001 From: Obaro Odiete <37550560+oodiete@users.noreply.github.com> Date: Sun, 3 Nov 2019 14:49:34 -0500 Subject: [PATCH 3/5] setup circleci (#1) * setup circleci * deploy package to gemfury --- .circleci/config.yml | 54 +++++++++++++++++++ .gitignore | 1 + MANIFEST.in | 1 + build.info | 1 + circle/build_pip_package.sh | 32 +++++++++++ image_match/__init__.py | 1 - image_match/elasticsearch_driver.py | 4 +- setup.py | 8 +-- tests/test_elasticsearch_driver.py | 6 +-- ...elasticsearch_driver_metadata_as_nested.py | 7 +-- 10 files changed, 98 insertions(+), 17 deletions(-) create mode 100644 .circleci/config.yml create mode 100644 MANIFEST.in create mode 100644 build.info create mode 100644 circle/build_pip_package.sh diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..1422544 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,54 @@ +version: 2 +executorType: docker +jobs: + test: + docker: + - image: python:3.6.4-stretch + - image: elasticsearch:7.1.0 + environment: + - cluster.name: elasticsearch + - xpack.security.enabled: false + - transport.host: localhost + - network.host: 127.0.0.1 + - http.port: 9200 + - discovery.type: single-node + working_directory: ~/code + steps: + - checkout + - run: + name: Install Dependencies + command: pip install -e .[test] + - run: + name: Test + command: pytest + deploy: + docker: + - image: docker:18.06.1-git + working_directory: ~/code + steps: + - checkout + - setup_remote_docker: + docker_layer_caching: true + - run: + name: Install cli tools + command: | + apk --update add py-pip curl + pip install --upgrade pip + - run: + name: Build and push pip package + command: | + source circle/build_pip_package.sh +workflows: + version: 2 + test_deploy: + jobs: + - test: + context: 360platform + - deploy: + context: 360platform + filters: + branches: + only: + - master + requires: + - test diff --git a/.gitignore b/.gitignore index 2587fbd..0b6a143 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ var/ *.egg-info/ .installed.cfg *.egg +*.tar.gz # PyInstaller # Usually these files are written by a python script from a template diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..ac36a5b --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include build.info \ No newline at end of file diff --git a/build.info b/build.info new file mode 100644 index 0000000..afced14 --- /dev/null +++ b/build.info @@ -0,0 +1 @@ +__version__ = '2.0.0' diff --git a/circle/build_pip_package.sh b/circle/build_pip_package.sh new file mode 100644 index 0000000..be235de --- /dev/null +++ b/circle/build_pip_package.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +set -e + +MAJOR=2 +MINOR=0 +PATCH=0 + +rm -f image-match-*.tar.gz +pip download --no-deps --extra-index-url https://pypi.fury.io/UmFoYTJTNACus1W8zjP8/360core/ image-match + +for DOWNLOADED_FILE in image-match-${MAJOR}.${MINOR}.*.gz; do + if [[ -e ${DOWNLOADED_FILE} ]]; then + DOWNLOADED_FILE=${DOWNLOADED_FILE/image-match-${MAJOR}.${MINOR}./} + DOWNLOADED_FILE=${DOWNLOADED_FILE/.tar.gz/} + PATCH=$((DOWNLOADED_FILE+1)) + fi + break +done + +echo "__version__ = '$MAJOR.$MINOR.$PATCH'" > build.info +PYTHONPATH=. python setup.py sdist +PACKAGENAME=dist/image-match-${MAJOR}.${MINOR}.${PATCH}.tar.gz + +exec 3>&1 +STATUS=$(curl -w "%{http_code}" -o /dev/null -s -F package=@${PACKAGENAME} https://UmFoYTJTNACus1W8zjP8@push.fury.io/360core/) + +if [ ${STATUS} != 200 ] +then + echo curl response status: ${STATUS} + exit ${STATUS} +fi diff --git a/image_match/__init__.py b/image_match/__init__.py index fc788f9..f11f592 100644 --- a/image_match/__init__.py +++ b/image_match/__init__.py @@ -1,2 +1 @@ __author__ = 'ryan' -__version__ = '1.1.2' diff --git a/image_match/elasticsearch_driver.py b/image_match/elasticsearch_driver.py index 714222e..45f3876 100644 --- a/image_match/elasticsearch_driver.py +++ b/image_match/elasticsearch_driver.py @@ -10,14 +10,13 @@ class SignatureES(SignatureDatabaseBase): """ - def __init__(self, es, index='images', doc_type='image', timeout='10s', size=100, + def __init__(self, es, index='images', timeout='10s', size=100, *args, **kwargs): """Extra setup for Elasticsearch Args: es (elasticsearch): an instance of the elasticsearch python driver index (Optional[string]): a name for the Elasticsearch index (default 'images') - doc_type (Optional[string]): a name for the document time (default 'image') timeout (Optional[int]): how long to wait on an Elasticsearch query, in seconds (default 10) size (Optional[int]): maximum number of Elasticsearch results (default 100) *args (Optional): Variable length argument list to pass to base constructor @@ -40,7 +39,6 @@ def __init__(self, es, index='images', doc_type='image', timeout='10s', size=100 """ self.es = es self.index = index - self.doc_type = doc_type self.timeout = timeout self.size = size diff --git a/setup.py b/setup.py index 29c97db..4c15bd4 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ """ -image_match is a simple package for finding approximate image matches from a +image-match is a simple package for finding approximate image matches from a corpus. It is similar, for instance, to pHash , but includes a database backend that easily scales to billions of images and supports sustained high rates of image insertion: up to 10,000 images/s on our @@ -56,9 +56,9 @@ def find_version(*file_paths): setup( - name='image_match', - version=find_version('image_match', '__init__.py'), - description='image_match is a simple package for finding approximate '\ + name='image-match', + version=find_version('', 'build.info'), + description='image-match is a simple package for finding approximate '\ 'image matches from a corpus.', long_description=__doc__, url='https://github.com/ascribe/image-match/', diff --git a/tests/test_elasticsearch_driver.py b/tests/test_elasticsearch_driver.py index 2a6daaa..2472703 100644 --- a/tests/test_elasticsearch_driver.py +++ b/tests/test_elasticsearch_driver.py @@ -17,10 +17,8 @@ urlretrieve(test_img_url2, 'test2.jpg') INDEX_NAME = 'test_environment_{}'.format(hashlib.md5(os.urandom(128)).hexdigest()[:12]) -DOC_TYPE = 'image' MAPPINGS = { "mappings": { - DOC_TYPE: { "dynamic": True, "properties": { "metadata": { @@ -31,7 +29,6 @@ } } } - } } } @@ -74,7 +71,8 @@ def es(): @pytest.fixture def ses(es, index_name): - return SignatureES(es=es, index=index_name, doc_type=DOC_TYPE) + return SignatureES(es=es, index=index_name) + def test_elasticsearch_running(es): i = 0 diff --git a/tests/test_elasticsearch_driver_metadata_as_nested.py b/tests/test_elasticsearch_driver_metadata_as_nested.py index 3c37c7c..4103222 100644 --- a/tests/test_elasticsearch_driver_metadata_as_nested.py +++ b/tests/test_elasticsearch_driver_metadata_as_nested.py @@ -17,10 +17,8 @@ urlretrieve(test_img_url2, 'test2.jpg') INDEX_NAME = 'test_environment_{}'.format(hashlib.md5(os.urandom(128)).hexdigest()[:12]) -DOC_TYPE = 'image' MAPPINGS = { "mappings": { - DOC_TYPE: { "dynamic": True, "properties": { "metadata": { @@ -32,7 +30,6 @@ } } } - } } } @@ -75,7 +72,7 @@ def es(): @pytest.fixture def ses(es, index_name): - return SignatureES(es=es, index=index_name, doc_type=DOC_TYPE) + return SignatureES(es=es, index=index_name) def test_elasticsearch_running(es): i = 0 @@ -134,4 +131,4 @@ def _nested_filter(tenant_id, project_id): } } } - } \ No newline at end of file + } From c1eadaa2d5e3fc66b918c032ca3bf1b3153392eb Mon Sep 17 00:00:00 2001 From: Mathieu Robinson Date: Mon, 14 Dec 2020 15:52:10 -0400 Subject: [PATCH 4/5] This line fails in pytest pytest is checking the value of temp. numpy does not like that. refcheck=False will prevent numpy from complaining about it. We are not doing anything in these 3 lines that would break. --- image_match/signature_database_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/image_match/signature_database_base.py b/image_match/signature_database_base.py index 42eadd0..63e5aba 100644 --- a/image_match/signature_database_base.py +++ b/image_match/signature_database_base.py @@ -404,7 +404,7 @@ def get_words(array, k, N): words[i] = array[pos:pos+k] else: temp = array[pos:].copy() - temp.resize(k) + temp.resize(k, refcheck=False) words[i] = temp return words From 60359af91e07435738230bfb3f569597cebe73b4 Mon Sep 17 00:00:00 2001 From: Mathieu Robinson Date: Mon, 14 Dec 2020 15:53:47 -0400 Subject: [PATCH 5/5] Bump version --- build.info | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.info b/build.info index afced14..3f39079 100644 --- a/build.info +++ b/build.info @@ -1 +1 @@ -__version__ = '2.0.0' +__version__ = '2.0.1'