Skip to content

Commit

Permalink
fixtures: fix loans and items
Browse files Browse the repository at this point in the history
* Corrects small/big loans and items files. There were too few items
  for the small files.
* Improves error handling and logging for JSON reference resolvers.
* Uses more reliable loading of json files for records creation.
* Improves functions parameters documentation.

Co-Authored-by: Peter Weber <[email protected]>
  • Loading branch information
rerowep committed Nov 15, 2019
1 parent 7ab6e9b commit f9a2c1f
Show file tree
Hide file tree
Showing 32 changed files with 15,505 additions and 34,423 deletions.
16,112 changes: 4,708 additions & 11,404 deletions data/holdings_big.json

Large diffs are not rendered by default.

2,642 changes: 1,111 additions & 1,531 deletions data/holdings_small.json

Large diffs are not rendered by default.

26,206 changes: 7,442 additions & 18,764 deletions data/items_big.json

Large diffs are not rendered by default.

4,276 changes: 1,760 additions & 2,516 deletions data/items_small.json

Large diffs are not rendered by default.

87 changes: 56 additions & 31 deletions rero_ils/modules/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 28,7 @@
import sys
from collections import OrderedDict
from glob import glob
from json import JSONDecodeError, JSONDecoder, loads
from json import loads

import click
import jsonref
Expand All @@ -54,6 54,7 @@
from .items.cli import create_items, reindex_items
from .loans.cli import create_loans
from .patrons.cli import import_users
from .utils import read_json_record
from ..modules.providers import append_fixtures_new_identifiers

_datastore = LocalProxy(lambda: current_app.extensions['security'].datastore)
Expand Down Expand Up @@ -200,48 201,26 @@ def init(force):
bar.label = name


def read_json_record(json_file, buf_size=1024, decoder=JSONDecoder()):
"""Read lasy json records from file."""
buffer = json_file.read(5)
# we have to delete the first [ for an list of records
if buffer.startswith('['):
buffer = buffer[-1:].lstrip()
while True:
block = json_file.read(buf_size)
if not block:
break
buffer = block
pos = 0
while True:
try:
buffer = buffer.lstrip()
obj, pos = decoder.raw_decode(buffer)
except JSONDecodeError as err:
break
else:
yield obj
buffer = buffer[pos:]
if buffer.startswith(','):
buffer = buffer[1:]


@click.command('create')
@click.option('-a', '--append', 'append', is_flag=True, default=False)
@click.option('-r', '--reindex', 'reindex', is_flag=True, default=False)
@click.option('-c', '--dbcommit', 'dbcommit', is_flag=True, default=True)
@click.option('-v', '--verbose', 'verbose', is_flag=True, default=True)
@click.option('-s', '--schema', 'schema', default=None)
@click.option('-p', '--pid_type', 'pid_type', default=None)
@click.option('-l', '--lazy', 'lazy', is_flag=True, default=False)
@click.argument('infile', type=click.File('r'), default=sys.stdin)
@with_appcontext
def create(infile, pid_type, schema, verbose, dbcommit, reindex, append):
def create(infile, append, reindex, dbcommit, verbose, schema, pid_type, lazy):
"""Load REROILS record.
infile: Json file
append: appends pids to database
reindex: reindex record by record
dbcommit: commit record to database
pid_type: record type
schema: recoord schema
lazy: lazy reads file
"""
click.secho(
'Loading {pid_type} records from {file_name}.'.format(
Expand All @@ -257,7 236,13 @@ def create(infile, pid_type, schema, verbose, dbcommit, reindex, append):
count = 0
error_records = []
pids = []
for record in read_json_record(infile):
if lazy:
# try to lazy read json file (slower, better memory management)
records = read_json_record(infile)
else:
# load everything in memory (faster, bad memory management)
records = json.load(infile)
for record in records:
count = 1
if schema:
record['$schema'] = schema
Expand All @@ -282,7 267,7 @@ def create(infile, pid_type, schema, verbose, dbcommit, reindex, append):
count=count,
pid_type=pid_type,
pid=record.get('pid', '???'),
err=err.args[0]
err=err
),
err=True,
fg='red'
Expand All @@ -300,14 285,54 @@ def create(infile, pid_type, schema, verbose, dbcommit, reindex, append):
if append:
identifier = record_class.provider.identifier
try:
append_fixtures_new_identifiers(identifier, sorted(pids), pid_type)
append_fixtures_new_identifiers(
identifier,
sorted(pids, key=lambda x: int(x)),
pid_type
)
except Exception as err:
pass
click.secho(
"ERROR append fixtures new identifiers: {err}".format(
err=err
),
fg='red'
)


fixtures.add_command(create)


@click.command('count')
@click.option('-l', '--lazy', 'lazy', is_flag=True, default=False)
@click.argument('infile', type=click.File('r'), default=sys.stdin)
def count(infile, lazy):
"""Count records in file.
:param infile: Json file
:param lazy: lazy reads file
:return: count of records
"""
click.secho(
'Count records from {file_name}.'.format(
file_name=infile.name
),
fg='green'
)
if lazy:
# try to lazy read json file (slower, better memory management)
records = read_json_record(infile)
else:
# load everything in memory (faster, bad memory management)
records = json.load(infile)
count = 0
for record in records:
count = 1
click.echo('Count: {count}'.format(count=count))


fixtures.add_command(count)


@utils.command('check_license')
@click.argument('configfile', type=click.File('r'), default=sys.stdin)
@click.option('-v', '--verbose', 'verbose', is_flag=True, default=False)
Expand Down
4 changes: 2 additions & 2 deletions rero_ils/modules/documents/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 133,11 @@ def reasons_not_to_delete(self):
def dumps(self, **kwargs):
"""Return pure Python dictionary with record metadata."""
dump = super(Document, self).dumps(**kwargs)
provision_activities = dump.get('provisionActivity')
provision_activities = dump.get('provisionActivity', [])
for provision_activity in provision_activities:
provision_activity["_text"] = \
publication_statement_text(provision_activity)
series = dump.get('series')
series = dump.get('series', [])
for series_element in series:
series_element["_text"] = series_format_text(series_element)
return dump
15 changes: 3 additions & 12 deletions rero_ils/modules/documents/jsonresolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 19,11 @@


import jsonresolver
from flask import current_app
from invenio_pidstore.models import PersistentIdentifier, PIDStatus

from ..jsonresolver import resolve_json_refs


@jsonresolver.route('/api/documents/<pid>', host='ils.rero.ch')
def document_resolver(pid):
"""Document resolver."""
persistent_id = PersistentIdentifier.get('doc', pid)
if persistent_id.status == PIDStatus.REGISTERED:
return dict(pid=persistent_id.pid_value)
current_app.logger.error(
'Doc resolver error: /api/documents/{pid} {persistent_id}'.format(
pid=pid,
persistent_id=persistent_id
)
)
raise Exception('unable to resolve')
return resolve_json_refs('doc', pid)
8 changes: 4 additions & 4 deletions rero_ils/modules/documents/listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 37,10 @@ def enrich_document_data(sender, json=None, record=None, index=None,
**dummy_kwargs):
"""Signal sent before a record is indexed.
:params json: The dumped record dictionary which can be modified.
:params record: The record being indexed.
:params index: The index in which the record will be indexed.
:params doc_type: The doc_type for the record.
:param json: The dumped record dictionary which can be modified.
:param record: The record being indexed.
:param index: The index in which the record will be indexed.
:param doc_type: The doc_type for the record.
"""
# TODO: this multiply the indexing time by 5, try an other way!
document_index_name = DocumentsSearch.Meta.index
Expand Down
8 changes: 4 additions & 4 deletions rero_ils/modules/fees/listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 24,10 @@ def enrich_fee_data(sender, json=None, record=None, index=None,
**dummy_kwargs):
"""Signal sent before a record is indexed.
:params json: The dumped record dictionary which can be modified.
:params record: The record being indexed.
:params index: The index in which the record will be indexed.
:params doc_type: The doc_type for the record.
:param json: The dumped record dictionary which can be modified.
:param record: The record being indexed.
:param index: The index in which the record will be indexed.
:param doc_type: The doc_type for the record.
"""
fee_index_name = FeesSearch.Meta.index
if index.startswith(fee_index_name):
Expand Down
15 changes: 3 additions & 12 deletions rero_ils/modules/holdings/jsonresolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 18,11 @@
"""Holding resolver."""

import jsonresolver
from flask import current_app
from invenio_pidstore.models import PersistentIdentifier, PIDStatus

from ..jsonresolver import resolve_json_refs


@jsonresolver.route('/api/holdings/<pid>', host='ils.rero.ch')
def holding_resolver(pid):
"""Resolver for holding record."""
persistent_id = PersistentIdentifier.get('hold', pid)
if persistent_id.status == PIDStatus.REGISTERED:
return dict(pid=persistent_id.pid_value)
current_app.logger.error(
'Doc resolver error: /api/holdings/{pid} {persistent_id}'.format(
pid=pid,
persistent_id=persistent_id
)
)
raise Exception('unable to resolve')
return resolve_json_refs('hold', pid)
8 changes: 4 additions & 4 deletions rero_ils/modules/holdings/listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 24,10 @@ def enrich_holding_data(sender, json=None, record=None, index=None,
**dummy_kwargs):
"""Signal sent before a record is indexed.
:params json: The dumped record dictionary which can be modified.
:params record: The record being indexed.
:params index: The index in which the record will be indexed.
:params doc_type: The doc_type for the record.
:param json: The dumped record dictionary which can be modified.
:param record: The record being indexed.
:param index: The index in which the record will be indexed.
:param doc_type: The doc_type for the record.
"""
holding_index_name = HoldingsSearch.Meta.index
if index.startswith(holding_index_name):
Expand Down
8 changes: 3 additions & 5 deletions rero_ils/modules/item_types/jsonresolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 19,11 @@


import jsonresolver
from invenio_pidstore.models import PersistentIdentifier, PIDStatus

from ..jsonresolver import resolve_json_refs


@jsonresolver.route('/api/item_types/<pid>', host='ils.rero.ch')
def item_type_resolver(pid):
"""Item type resolver."""
persistent_id = PersistentIdentifier.get('itty', pid)
if persistent_id.status == PIDStatus.REGISTERED:
return dict(pid=persistent_id.pid_value)
raise Exception('unable to resolve')
return resolve_json_refs('itty', pid)
39 changes: 14 additions & 25 deletions rero_ils/modules/items/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,31 67,17 @@ def reindex_items():


@click.command('create_items')
@click.option(
'-c', '--count', 'count',
type=click.INT, default=-1, help='default=for all records'
)
@click.option(
'-i', '--itemscount', 'itemscount',
type=click.INT, default=1, help='default=1'
)
@click.option(
'-m', '--missing', 'missing', type=click.INT, default=5, help='default=5'
)
@click.option('-c', '--count', 'count',
type=click.INT, default=-1, help='default=for all records')
@click.option('-i', '--itemscount', 'itemscount',
type=click.INT, default=1, help='default=1')
@click.option('-m', '--missing', 'missing',
type=click.INT, default=5, help='default=5')
# @click.argument('output', type=click.File('w'))
@click.option(
'-t',
'--items_f',
'items_f',
help='Items output file.')
@click.option(
'-h',
'--holdings_f',
'holdings_f',
help='Holdings output file.')
@click.option('-t', '--items_f', 'items_f', help='Items output file.')
@click.option('-h', '--holdings_f', 'holdings_f', help='Holdings output file.')
@with_appcontext
def create_items(
count, itemscount, missing, items_f, holdings_f):
def create_items(count, itemscount, missing, items_f, holdings_f):
"""Create circulation items."""
def generate(count, itemscount, missing):

Expand All @@ -101,8 87,10 @@ def generate(count, itemscount, missing):
count = len(documents_pids)

click.secho(
'Starting generating {0} items, random {1} ...'.format(
count, itemscount),
'Starting generating {count} items, random {itemsc} ...'.format(
count=count,
itemsc=itemscount
),
fg='green',
)

Expand Down Expand Up @@ -165,6 153,7 @@ def generate(count, itemscount, missing):
)
item_pid = 1
yield item, new_holding

items = []
holdings = []
with open(holdings_f, 'w', encoding='utf-8') as holdings_file:
Expand Down
8 changes: 3 additions & 5 deletions rero_ils/modules/items/jsonresolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 19,11 @@


import jsonresolver
from invenio_pidstore.models import PersistentIdentifier, PIDStatus

from ..jsonresolver import resolve_json_refs


@jsonresolver.route('/api/items/<pid>', host='ils.rero.ch')
def item_resolver(pid):
"""Item resolver."""
persistent_id = PersistentIdentifier.get('item', pid)
if persistent_id.status == PIDStatus.REGISTERED:
return dict(pid=persistent_id.pid_value)
raise Exception('unable to resolve')
return resolve_json_refs('item', pid)
8 changes: 4 additions & 4 deletions rero_ils/modules/items/listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 24,10 @@ def enrich_item_data(sender, json=None, record=None, index=None,
**dummy_kwargs):
"""Signal sent before a record is indexed.
:params json: The dumped record dictionary which can be modified.
:params record: The record being indexed.
:params index: The index in which the record will be indexed.
:params doc_type: The doc_type for the record.
:param json: The dumped record dictionary which can be modified.
:param record: The record being indexed.
:param index: The index in which the record will be indexed.
:param doc_type: The doc_type for the record.
"""
item_index_name = ItemsSearch.Meta.index
if index.startswith(item_index_name):
Expand Down
Loading

0 comments on commit f9a2c1f

Please sign in to comment.