Skip to content

Commit

Permalink
handle cases with several authority types to fix #276
Browse files Browse the repository at this point in the history
  • Loading branch information
SvenLieber committed Aug 1, 2024
1 parent 83c17d6 commit 58bf8ae
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 3 deletions.
2 changes: 1 addition & 1 deletion data-sources/kbr/authority-marc-to-csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 330,7 @@ def main():

# Parse record based on its type (person or organization)
authorityID = utils.getElementValue(elem.find('./marc:controlfield[@tag="001"]', ALL_NS))
authorityType = utils.getElementValue(elem.xpath('./marc:datafield[@tag="075"]/marc:subfield[@code="a"]', namespaces=ALL_NS))
authorityType = utils.getUniqueValue(utils.getElementValue(elem.xpath('./marc:datafield[@tag="075"]/marc:subfield[@code="a"]', namespaces=ALL_NS)))

if authorityType == 'p':
# TYPE PERSON
Expand Down
33 changes: 31 additions & 2 deletions data-sources/kbr/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 1,6 @@
from datetime import datetime
import xml.etree.ElementTree as ET
#import xml.etree.ElementTree as ET
import lxml.etree as ET
import unicodedata as ud
import enchant
import re
Expand All @@ -13,7 14,7 @@ def getListOfIdentifiers(authorityID, rawString, identifierName, stats):
>>> getListOfIdentifiers('1', '0000000000000001', 'ISNI', {})
['0000000000000001']
>>> getListOfIdentifiers('1', '0000000000000001;0000 0000 0000 0002', 'ISNI', {})
['0000000000000001','0000000000000002']
['0000000000000001', '0000000000000002']
>>> getListOfIdentifiers('1', '1234', 'VIAF', {})
['1234']
"""
Expand Down Expand Up @@ -850,6 851,34 @@ def getNormalizedISBN13(inputISBN):
else:
return ''

# -----------------------------------------------------------------------------
def getUniqueValue(value, sep=';'):
"""Returns the unique values of a possibly value-separated list, see https://github.com/kbrbe/beltrans-data-integration/issues/276
It returns p if only p values were given
>>> getUniqueValue('p')
'p'
>>> getUniqueValue('p;p')
'p'
It returns an empty string if an empty string was given
>>> getUniqueValue('')
''
It returns a sep-separated string with the unique values
>>> getUniqueValue('p;c;p;p')
'c;p'
"""

if value == '':
return ''
else:
if sep in value:
types = set(value.split(sep))
return sep.join(sorted(types))
else:
return value

# -----------------------------------------------------------------------------
if __name__ == "__main__":
import doctest
Expand Down

0 comments on commit 58bf8ae

Please sign in to comment.