sfeed

RSS and Atom parser
git clone git://git.codemadness.org/sfeed
Log | Files | Refs | README | LICENSE

commit aeb1398411ce245fa7982365640f7852d63b3d52
parent dbb7f7b66d2d10a4bf14a404b66fa20cbf8a02ca
Author: Hiltjo Posthuma <[email protected]>
Date:   Sat,  4 Feb 2023 12:34:56  0100

README: describe how to add new parsed tags and fields to sfeed.c

Diffstat:
MREADME | 90
1 file changed, 90 insertions( ), 0 deletions(-)

diff --git a/README b/README @@ -1070,6 1070,96 @@ file: - - - sfeed.c: adding new XML tags or sfeed(5) fields to the parser ------------------------------------------------------------- sfeed.c contains definitions to parse XML tags and map them to sfeed(5) TSV fields. Parsed RSS and Atom tag names are first stored as a TagId, which is a number. This TagId is then mapped to the output field index. * Add a new TagId enum for the tag. * (optional) Add a new FeedField* enum for the new output field or you can map it to an existing field. * Add the new XML tag name to the array variable of parsed RSS or Atom tags: rsstags[] or atomtags[]. These must be defined in alphabetical order, because a binary search is used which uses the strcasecmp() function. * Add the parsed TagId to the output field in the array variable fieldmap[]. When another tag is also mapped to the same output field then the tag with the highest TagId number value overrides the mapped field: the order is from least important to high. * If this defined tag is just using the inner data of the XML tag, then this definition is enough. If it for example has to parse a certain attribute you have to add a check for the TagId to the xmlattr() callback function. * (optional) Print the new field in the printfields() function. Below is a patch example to add the MRSS "media:content" field as a new field: diff --git a/sfeed.c b/sfeed.c --- a/sfeed.c b/sfeed.c @@ -50,7 50,7 @@ enum TagId { RSSTagGuidPermalinkTrue, /* must be defined after GUID, because it can be a link (isPermaLink) */ RSSTagLink, - RSSTagEnclosure, RSSTagMediaContent, RSSTagEnclosure, RSSTagAuthor, RSSTagDccreator, RSSTagCategory, /* Atom */ @@ -81,7 81,7 @@ typedef struct field { enum { FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldContent, FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCategory, - FeedFieldLast FeedFieldMediaContent, FeedFieldLast }; typedef struct feedcontext { @@ -137,6 137,7 @@ static const FeedTag rsstags[] = { { STRP("enclosure"), RSSTagEnclosure }, { STRP("guid"), RSSTagGuid }, { STRP("link"), RSSTagLink }, { STRP("media:content"), RSSTagMediaContent }, { STRP("media:description"), RSSTagMediaDescription }, { STRP("pubdate"), RSSTagPubdate }, { STRP("title"), RSSTagTitle } @@ -180,6 181,7 @@ static const int fieldmap[TagLast] = { [RSSTagGuidPermalinkFalse] = FeedFieldId, [RSSTagGuidPermalinkTrue] = FeedFieldId, /* special-case: both a link and an id */ [RSSTagLink] = FeedFieldLink, [RSSTagMediaContent] = FeedFieldMediaContent, [RSSTagEnclosure] = FeedFieldEnclosure, [RSSTagAuthor] = FeedFieldAuthor, [RSSTagDccreator] = FeedFieldAuthor, @@ -677,6 679,8 @@ printfields(void) string_print_uri(&ctx.fields[FeedFieldEnclosure].str); putchar(FieldSeparator); string_print_trimmed_multi(&ctx.fields[FeedFieldCategory].str); putchar(FieldSeparator); string_print_trimmed(&ctx.fields[FeedFieldMediaContent].str); putchar('\n'); if (ferror(stdout)) /* check for errors but do not flush */ @@ -718,7 722,7 @@ xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, } if (ctx.feedtype == FeedTypeRSS) { - if (ctx.tag.id == RSSTagEnclosure && if ((ctx.tag.id == RSSTagEnclosure || ctx.tag.id == RSSTagMediaContent) && isattr(n, nl, STRP("url"))) { string_append(&tmpstr, v, vl); } else if (ctx.tag.id == RSSTagGuid && - - - Running custom commands inside the sfeed_curses program -------------------------------------------------------