Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,4 @@ The rake task ```store_annotations``` has more functions:
- these functions are idempotent: clipping images will only be fetched if they are not already present
(otherwise it will output "No fetching [image]"),
and the clippings.csv will always be regenerated from all the annotations, regardless of whether they are old or new.
- note that the rake task needs to look at the annotations as rendered in the site, so you must run ```jekyll build```
before running the rake task. Otherwise you will see errors like ```error: pathspec 'annotations/heresies_01' did not match any file(s) known to git.```
If this happens, just run ```jekyll build``` and then run the rake task again. (This requirement will be removed in a future release.)

50 changes: 32 additions & 18 deletions Rakefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
require 'fileutils'
require 'json'
require 'yaml'
require 'csv'
require 'slugify'
require 'sanitize'
require 'net/http'

require 'byebug'

require 'jekyll'

task :default => [:store_annotations]

task :store_annotations do
Expand Down Expand Up @@ -38,9 +41,14 @@ task :store_annotations do
puts "Updating #{manifest}"
update_manifest_copy(manifest)
end
end
# build jekyll site to get annotations for clippings
site = Jekyll::Site.new(Jekyll.configuration({
"source" => ".",
"destination" => "_site"})).process

make_clippings(manifest)

manifests.each do | manifest |
make_clippings(manifest, site)
end
end

Expand Down Expand Up @@ -94,21 +102,27 @@ def update_manifest_copy(manifest)
File.open("iiif/" + manifest + "/manifest.json", 'w+') { |f| f.write("---\nlayout: null\n---\n"+JSON.pretty_generate(manifest_json)) }
end

def make_clippings(manifest)

manifest_json = JSON.parse(File.read("iiif/" + manifest + "/manifest.json").gsub(/\A---(.|\n)*?---/, "").to_s)
def make_clippings(manifest, site)

manifest_file = File.read("iiif/" + manifest + "/manifest.json").gsub(/\A---(.|\n)*?---/, "").to_s
manifest_json = JSON.parse(manifest_file)

# select canvases with annotations from manifest
canvasesWithAnnos = manifest_json['sequences'][0]['canvases']
.select { |canvas| canvas['otherContent'] }
.select { |canvas| canvas['otherContent'][0]['@type'] == 'sc:AnnotationList' }

clippings = []

canvasesWithAnnos.each do |canvas|
canvasID = canvas['@id']
listpath = canvas['otherContent'][0]['@id'].gsub('{{ site.url }}{{ site.baseurl }}/', '')
list_json = JSON.parse(File.read('_site/' + listpath).to_s) #TODO remove dependence on generated _site

puts listpath
list_file = File.read('_site/'+listpath).to_s

# parse list file as JSON
list_json = JSON.parse(list_file)

list_json['resources'].each do |resource|
canvasOn = resource['on'][0]['full']
next 'WTF canvas ID doesn\'t match' unless canvasID == canvasOn
Expand All @@ -120,42 +134,42 @@ def make_clippings(manifest)
# build label and csv from specified data elements
labelElements = []
csvElements = {id: resource['@id'], item: manifest, canvas: canvasID}

canvasNum = canvasID.gsub(/.+\$([0-9]+)\/canvas.*/, '\1')
labelElements << canvasNum
csvElements[:canvasNum] = canvasNum

tagElements = []
tags.each do |tag|
labelElements << tag['chars']
tagElements << tag['chars']
end
csvElements[:tags] = tagElements.join('|')

textElements = []
texts.each do |text|
# strip html markup
longfilename = Sanitize.clean(text['chars']).strip
filename = longfilename.length > 180 ? longfilename[0..179] : longfilename
filename = longfilename.length > 100 ? longfilename[0..99] : longfilename # Edited to shorten further for working in deep paths
labelElements << filename
textElements << Sanitize.clean(text['chars']).strip
end
csvElements[:texts] = textElements.join('|')

labelElements << xywh
csvElements[:xywh] = xywh

# label ends up like 1-photo-woman-with-film-camera-1235-134-1126-637
label = labelElements.join(' ').slugify

imageRoot = canvas['images'][0]['resource']['service']['@id']
clippingURL = imageRoot + '/' + xywh + '/full/0/default.jpg'
csvElements[:clippingURL] = clippingURL

clippingsPath = 'clippings/' + manifest + '/' + canvasNum
clippingImage = clippingsPath + '/' + label + '.jpg'
csvElements[:clippingImage] = clippingImage

FileUtils.mkdir_p clippingsPath
# fetch clipping image, if not already fetched
if File.exist?(clippingImage)
Expand Down