Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
version: 2
executorType: docker
jobs:
test:
docker:
- image: python:3.6.4-stretch
- image: elasticsearch:7.1.0
environment:
- cluster.name: elasticsearch
- xpack.security.enabled: false
- transport.host: localhost
- network.host: 127.0.0.1
- http.port: 9200
- discovery.type: single-node
working_directory: ~/code
steps:
- checkout
- run:
name: Install Dependencies
command: pip install -e .[test]
- run:
name: Test
command: pytest
deploy:
docker:
- image: docker:18.06.1-git
working_directory: ~/code
steps:
- checkout
- setup_remote_docker:
docker_layer_caching: true
- run:
name: Install cli tools
command: |
apk --update add py-pip curl
pip install --upgrade pip
- run:
name: Build and push pip package
command: |
source circle/build_pip_package.sh
workflows:
version: 2
test_deploy:
jobs:
- test:
context: 360platform
- deploy:
context: 360platform
filters:
branches:
only:
- master
requires:
- test
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ __pycache__/
# Distribution / packaging
.Python
env/
.idea
.venv
build/
develop-eggs/
dist/
Expand All @@ -22,6 +24,7 @@ var/
*.egg-info/
.installed.cfg
*.egg
*.tar.gz

# PyInstaller
# Usually these files are written by a python script from a template
Expand Down Expand Up @@ -57,4 +60,4 @@ docs/_build/
target/
image_match/web/static/tmp/

*.jpg
*.jpg
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include build.info
1 change: 1 addition & 0 deletions build.info
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = '2.0.1'
32 changes: 32 additions & 0 deletions circle/build_pip_package.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env bash

set -e

MAJOR=2
MINOR=0
PATCH=0

rm -f image-match-*.tar.gz
pip download --no-deps --extra-index-url https://pypi.fury.io/UmFoYTJTNACus1W8zjP8/360core/ image-match

for DOWNLOADED_FILE in image-match-${MAJOR}.${MINOR}.*.gz; do
if [[ -e ${DOWNLOADED_FILE} ]]; then
DOWNLOADED_FILE=${DOWNLOADED_FILE/image-match-${MAJOR}.${MINOR}./}
DOWNLOADED_FILE=${DOWNLOADED_FILE/.tar.gz/}
PATCH=$((DOWNLOADED_FILE+1))
fi
break
done

echo "__version__ = '$MAJOR.$MINOR.$PATCH'" > build.info
PYTHONPATH=. python setup.py sdist
PACKAGENAME=dist/image-match-${MAJOR}.${MINOR}.${PATCH}.tar.gz

exec 3>&1
STATUS=$(curl -w "%{http_code}" -o /dev/null -s -F package=@${PACKAGENAME} https://UmFoYTJTNACus1W8zjP8@push.fury.io/360core/)

if [ ${STATUS} != 200 ]
then
echo curl response status: ${STATUS}
exit ${STATUS}
fi
1 change: 0 additions & 1 deletion image_match/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
__author__ = 'ryan'
__version__ = '1.1.2'
21 changes: 11 additions & 10 deletions image_match/elasticsearch_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,13 @@ class SignatureES(SignatureDatabaseBase):

"""

def __init__(self, es, index='images', doc_type='image', timeout='10s', size=100,
def __init__(self, es, index='images', timeout='10s', size=100,
*args, **kwargs):
"""Extra setup for Elasticsearch

Args:
es (elasticsearch): an instance of the elasticsearch python driver
index (Optional[string]): a name for the Elasticsearch index (default 'images')
doc_type (Optional[string]): a name for the document time (default 'image')
timeout (Optional[int]): how long to wait on an Elasticsearch query, in seconds (default 10)
size (Optional[int]): maximum number of Elasticsearch results (default 100)
*args (Optional): Variable length argument list to pass to base constructor
Expand All @@ -40,7 +39,6 @@ def __init__(self, es, index='images', doc_type='image', timeout='10s', size=100
"""
self.es = es
self.index = index
self.doc_type = doc_type
self.timeout = timeout
self.size = size

Expand All @@ -64,11 +62,12 @@ def search_single_record(self, rec, pre_filter=None):
if pre_filter is not None:
body['query']['bool']['filter'] = pre_filter

res = self.es.search(index=self.index,
doc_type=self.doc_type,
body=body,
size=self.size,
timeout=self.timeout)['hits']['hits']
res = self.es.search(
index=self.index,
body=body,
size=self.size,
timeout=self.timeout
)['hits']['hits']

sigs = np.array([x['_source']['signature'] for x in res])

Expand All @@ -91,7 +90,9 @@ def search_single_record(self, rec, pre_filter=None):

def insert_single_record(self, rec, refresh_after=False):
rec['timestamp'] = datetime.now()
self.es.index(index=self.index, doc_type=self.doc_type, body=rec, refresh=refresh_after)
self.es.index(
index=self.index, body=rec, refresh=refresh_after
)

def delete_duplicates(self, path):
"""Delete all but one entries in elasticsearch whose `path` value is equivalent to that of path.
Expand All @@ -108,4 +109,4 @@ def delete_duplicates(self, path):
if item['_source']['path'] == path]
if len(matching_paths) > 0:
for id_tag in matching_paths[1:]:
self.es.delete(index=self.index, doc_type=self.doc_type, id=id_tag)
self.es.delete(index=self.index, id=id_tag)
4 changes: 2 additions & 2 deletions image_match/goldberg.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,14 +236,14 @@ def preprocess_image(image_or_path, bytestream=False, handle_mpo=False):
return rgb2gray(np.asarray(img, dtype=np.uint8))
elif type(image_or_path) in string_types or \
type(image_or_path) is text_type:
return imread(image_or_path, as_grey=True)
return imread(image_or_path, as_gray=True)
elif type(image_or_path) is bytes:
try:
img = Image.open(image_or_path)
arr = np.array(img.convert('RGB'))
except IOError:
# try again due to PIL weirdness
return imread(image_or_path, as_grey=True)
return imread(image_or_path, as_gray=True)
if handle_mpo:
# take the first images from the MPO
if arr.shape == (2,) and isinstance(arr[1].tolist(), MpoImageFile):
Expand Down
2 changes: 1 addition & 1 deletion image_match/signature_database_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ def get_words(array, k, N):
words[i] = array[pos:pos+k]
else:
temp = array[pos:].copy()
temp.resize(k)
temp.resize(k, refcheck=False)
words[i] = temp

return words
Expand Down
10 changes: 5 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
image_match is a simple package for finding approximate image matches from a
image-match is a simple package for finding approximate image matches from a
corpus. It is similar, for instance, to pHash <http://www.phash.org/>, but
includes a database backend that easily scales to billions of images and
supports sustained high rates of image insertion: up to 10,000 images/s on our
Expand Down Expand Up @@ -56,9 +56,9 @@ def find_version(*file_paths):


setup(
name='image_match',
version=find_version('image_match', '__init__.py'),
description='image_match is a simple package for finding approximate '\
name='image-match',
version=find_version('', 'build.info'),
description='image-match is a simple package for finding approximate '\
'image matches from a corpus.',
long_description=__doc__,
url='https://github.com/ascribe/image-match/',
Expand Down Expand Up @@ -89,7 +89,7 @@ def find_version(*file_paths):
],
install_requires=[
'scikit-image>=0.14',
'elasticsearch>=5.0.0,<6.0.0',
'elasticsearch>=7.0.0,<8.0.0',
'six>=1.11.0',
],
tests_require=tests_require,
Expand Down
6 changes: 2 additions & 4 deletions tests/test_elasticsearch_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@
urlretrieve(test_img_url2, 'test2.jpg')

INDEX_NAME = 'test_environment_{}'.format(hashlib.md5(os.urandom(128)).hexdigest()[:12])
DOC_TYPE = 'image'
MAPPINGS = {
"mappings": {
DOC_TYPE: {
"dynamic": True,
"properties": {
"metadata": {
Expand All @@ -31,7 +29,6 @@
}
}
}
}
}
}

Expand Down Expand Up @@ -74,7 +71,8 @@ def es():

@pytest.fixture
def ses(es, index_name):
return SignatureES(es=es, index=index_name, doc_type=DOC_TYPE)
return SignatureES(es=es, index=index_name)


def test_elasticsearch_running(es):
i = 0
Expand Down
7 changes: 2 additions & 5 deletions tests/test_elasticsearch_driver_metadata_as_nested.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@
urlretrieve(test_img_url2, 'test2.jpg')

INDEX_NAME = 'test_environment_{}'.format(hashlib.md5(os.urandom(128)).hexdigest()[:12])
DOC_TYPE = 'image'
MAPPINGS = {
"mappings": {
DOC_TYPE: {
"dynamic": True,
"properties": {
"metadata": {
Expand All @@ -32,7 +30,6 @@
}
}
}
}
}
}

Expand Down Expand Up @@ -75,7 +72,7 @@ def es():

@pytest.fixture
def ses(es, index_name):
return SignatureES(es=es, index=index_name, doc_type=DOC_TYPE)
return SignatureES(es=es, index=index_name)

def test_elasticsearch_running(es):
i = 0
Expand Down Expand Up @@ -134,4 +131,4 @@ def _nested_filter(tenant_id, project_id):
}
}
}
}
}
2 changes: 1 addition & 1 deletion tests/test_goldberg.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,4 @@ def test_difference():
sig1 = gis.generate_signature('test.jpg')
sig2 = gis.generate_signature(test_diff_img_url)
dist = gis.normalized_distance(sig1, sig2)
assert dist == 0.42263283502672722
assert dist == 0.424549547059671