diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..1422544 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,54 @@ +version: 2 +executorType: docker +jobs: + test: + docker: + - image: python:3.6.4-stretch + - image: elasticsearch:7.1.0 + environment: + - cluster.name: elasticsearch + - xpack.security.enabled: false + - transport.host: localhost + - network.host: 127.0.0.1 + - http.port: 9200 + - discovery.type: single-node + working_directory: ~/code + steps: + - checkout + - run: + name: Install Dependencies + command: pip install -e .[test] + - run: + name: Test + command: pytest + deploy: + docker: + - image: docker:18.06.1-git + working_directory: ~/code + steps: + - checkout + - setup_remote_docker: + docker_layer_caching: true + - run: + name: Install cli tools + command: | + apk --update add py-pip curl + pip install --upgrade pip + - run: + name: Build and push pip package + command: | + source circle/build_pip_package.sh +workflows: + version: 2 + test_deploy: + jobs: + - test: + context: 360platform + - deploy: + context: 360platform + filters: + branches: + only: + - master + requires: + - test diff --git a/.gitignore b/.gitignore index c3506e2..0b6a143 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ __pycache__/ # Distribution / packaging .Python env/ +.idea +.venv build/ develop-eggs/ dist/ @@ -22,6 +24,7 @@ var/ *.egg-info/ .installed.cfg *.egg +*.tar.gz # PyInstaller # Usually these files are written by a python script from a template @@ -57,4 +60,4 @@ docs/_build/ target/ image_match/web/static/tmp/ -*.jpg \ No newline at end of file +*.jpg diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..ac36a5b --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include build.info \ No newline at end of file diff --git a/build.info b/build.info new file mode 100644 index 0000000..3f39079 --- /dev/null +++ b/build.info @@ -0,0 +1 @@ +__version__ = '2.0.1' diff --git a/circle/build_pip_package.sh b/circle/build_pip_package.sh new file mode 100644 index 0000000..be235de --- /dev/null +++ b/circle/build_pip_package.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +set -e + +MAJOR=2 +MINOR=0 +PATCH=0 + +rm -f image-match-*.tar.gz +pip download --no-deps --extra-index-url https://pypi.fury.io/UmFoYTJTNACus1W8zjP8/360core/ image-match + +for DOWNLOADED_FILE in image-match-${MAJOR}.${MINOR}.*.gz; do + if [[ -e ${DOWNLOADED_FILE} ]]; then + DOWNLOADED_FILE=${DOWNLOADED_FILE/image-match-${MAJOR}.${MINOR}./} + DOWNLOADED_FILE=${DOWNLOADED_FILE/.tar.gz/} + PATCH=$((DOWNLOADED_FILE+1)) + fi + break +done + +echo "__version__ = '$MAJOR.$MINOR.$PATCH'" > build.info +PYTHONPATH=. python setup.py sdist +PACKAGENAME=dist/image-match-${MAJOR}.${MINOR}.${PATCH}.tar.gz + +exec 3>&1 +STATUS=$(curl -w "%{http_code}" -o /dev/null -s -F package=@${PACKAGENAME} https://UmFoYTJTNACus1W8zjP8@push.fury.io/360core/) + +if [ ${STATUS} != 200 ] +then + echo curl response status: ${STATUS} + exit ${STATUS} +fi diff --git a/image_match/__init__.py b/image_match/__init__.py index fc788f9..f11f592 100644 --- a/image_match/__init__.py +++ b/image_match/__init__.py @@ -1,2 +1 @@ __author__ = 'ryan' -__version__ = '1.1.2' diff --git a/image_match/elasticsearch_driver.py b/image_match/elasticsearch_driver.py index 1f105cc..45f3876 100644 --- a/image_match/elasticsearch_driver.py +++ b/image_match/elasticsearch_driver.py @@ -10,14 +10,13 @@ class SignatureES(SignatureDatabaseBase): """ - def __init__(self, es, index='images', doc_type='image', timeout='10s', size=100, + def __init__(self, es, index='images', timeout='10s', size=100, *args, **kwargs): """Extra setup for Elasticsearch Args: es (elasticsearch): an instance of the elasticsearch python driver index (Optional[string]): a name for the Elasticsearch index (default 'images') - doc_type (Optional[string]): a name for the document time (default 'image') timeout (Optional[int]): how long to wait on an Elasticsearch query, in seconds (default 10) size (Optional[int]): maximum number of Elasticsearch results (default 100) *args (Optional): Variable length argument list to pass to base constructor @@ -40,7 +39,6 @@ def __init__(self, es, index='images', doc_type='image', timeout='10s', size=100 """ self.es = es self.index = index - self.doc_type = doc_type self.timeout = timeout self.size = size @@ -64,11 +62,12 @@ def search_single_record(self, rec, pre_filter=None): if pre_filter is not None: body['query']['bool']['filter'] = pre_filter - res = self.es.search(index=self.index, - doc_type=self.doc_type, - body=body, - size=self.size, - timeout=self.timeout)['hits']['hits'] + res = self.es.search( + index=self.index, + body=body, + size=self.size, + timeout=self.timeout + )['hits']['hits'] sigs = np.array([x['_source']['signature'] for x in res]) @@ -91,7 +90,9 @@ def search_single_record(self, rec, pre_filter=None): def insert_single_record(self, rec, refresh_after=False): rec['timestamp'] = datetime.now() - self.es.index(index=self.index, doc_type=self.doc_type, body=rec, refresh=refresh_after) + self.es.index( + index=self.index, body=rec, refresh=refresh_after + ) def delete_duplicates(self, path): """Delete all but one entries in elasticsearch whose `path` value is equivalent to that of path. @@ -108,4 +109,4 @@ def delete_duplicates(self, path): if item['_source']['path'] == path] if len(matching_paths) > 0: for id_tag in matching_paths[1:]: - self.es.delete(index=self.index, doc_type=self.doc_type, id=id_tag) + self.es.delete(index=self.index, id=id_tag) diff --git a/image_match/goldberg.py b/image_match/goldberg.py index da93ff8..e053b1c 100644 --- a/image_match/goldberg.py +++ b/image_match/goldberg.py @@ -236,14 +236,14 @@ def preprocess_image(image_or_path, bytestream=False, handle_mpo=False): return rgb2gray(np.asarray(img, dtype=np.uint8)) elif type(image_or_path) in string_types or \ type(image_or_path) is text_type: - return imread(image_or_path, as_grey=True) + return imread(image_or_path, as_gray=True) elif type(image_or_path) is bytes: try: img = Image.open(image_or_path) arr = np.array(img.convert('RGB')) except IOError: # try again due to PIL weirdness - return imread(image_or_path, as_grey=True) + return imread(image_or_path, as_gray=True) if handle_mpo: # take the first images from the MPO if arr.shape == (2,) and isinstance(arr[1].tolist(), MpoImageFile): diff --git a/image_match/signature_database_base.py b/image_match/signature_database_base.py index 42eadd0..63e5aba 100644 --- a/image_match/signature_database_base.py +++ b/image_match/signature_database_base.py @@ -404,7 +404,7 @@ def get_words(array, k, N): words[i] = array[pos:pos+k] else: temp = array[pos:].copy() - temp.resize(k) + temp.resize(k, refcheck=False) words[i] = temp return words diff --git a/setup.py b/setup.py index e05c1f1..4c15bd4 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ """ -image_match is a simple package for finding approximate image matches from a +image-match is a simple package for finding approximate image matches from a corpus. It is similar, for instance, to pHash , but includes a database backend that easily scales to billions of images and supports sustained high rates of image insertion: up to 10,000 images/s on our @@ -56,9 +56,9 @@ def find_version(*file_paths): setup( - name='image_match', - version=find_version('image_match', '__init__.py'), - description='image_match is a simple package for finding approximate '\ + name='image-match', + version=find_version('', 'build.info'), + description='image-match is a simple package for finding approximate '\ 'image matches from a corpus.', long_description=__doc__, url='https://github.com/ascribe/image-match/', @@ -89,7 +89,7 @@ def find_version(*file_paths): ], install_requires=[ 'scikit-image>=0.14', - 'elasticsearch>=5.0.0,<6.0.0', + 'elasticsearch>=7.0.0,<8.0.0', 'six>=1.11.0', ], tests_require=tests_require, diff --git a/tests/test_elasticsearch_driver.py b/tests/test_elasticsearch_driver.py index 2a6daaa..2472703 100644 --- a/tests/test_elasticsearch_driver.py +++ b/tests/test_elasticsearch_driver.py @@ -17,10 +17,8 @@ urlretrieve(test_img_url2, 'test2.jpg') INDEX_NAME = 'test_environment_{}'.format(hashlib.md5(os.urandom(128)).hexdigest()[:12]) -DOC_TYPE = 'image' MAPPINGS = { "mappings": { - DOC_TYPE: { "dynamic": True, "properties": { "metadata": { @@ -31,7 +29,6 @@ } } } - } } } @@ -74,7 +71,8 @@ def es(): @pytest.fixture def ses(es, index_name): - return SignatureES(es=es, index=index_name, doc_type=DOC_TYPE) + return SignatureES(es=es, index=index_name) + def test_elasticsearch_running(es): i = 0 diff --git a/tests/test_elasticsearch_driver_metadata_as_nested.py b/tests/test_elasticsearch_driver_metadata_as_nested.py index 3c37c7c..4103222 100644 --- a/tests/test_elasticsearch_driver_metadata_as_nested.py +++ b/tests/test_elasticsearch_driver_metadata_as_nested.py @@ -17,10 +17,8 @@ urlretrieve(test_img_url2, 'test2.jpg') INDEX_NAME = 'test_environment_{}'.format(hashlib.md5(os.urandom(128)).hexdigest()[:12]) -DOC_TYPE = 'image' MAPPINGS = { "mappings": { - DOC_TYPE: { "dynamic": True, "properties": { "metadata": { @@ -32,7 +30,6 @@ } } } - } } } @@ -75,7 +72,7 @@ def es(): @pytest.fixture def ses(es, index_name): - return SignatureES(es=es, index=index_name, doc_type=DOC_TYPE) + return SignatureES(es=es, index=index_name) def test_elasticsearch_running(es): i = 0 @@ -134,4 +131,4 @@ def _nested_filter(tenant_id, project_id): } } } - } \ No newline at end of file + } diff --git a/tests/test_goldberg.py b/tests/test_goldberg.py index 83857dc..7ba52d2 100644 --- a/tests/test_goldberg.py +++ b/tests/test_goldberg.py @@ -74,4 +74,4 @@ def test_difference(): sig1 = gis.generate_signature('test.jpg') sig2 = gis.generate_signature(test_diff_img_url) dist = gis.normalized_distance(sig1, sig2) - assert dist == 0.42263283502672722 + assert dist == 0.424549547059671