Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions librepo/handle.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ lr_handle_free(LrHandle *handle)
lr_lrmirrorlist_free(handle->metalink_mirrors);
lr_lrmirrorlist_free(handle->mirrors);
lr_metalink_free(handle->metalink);
lr_handle_free_list(&handle->metalink_exclude_domain);
lr_handle_free_list(&handle->metalink_exclude_location);
lr_handle_free_list(&handle->yumdlist);
lr_urlvars_free(handle->yumslist);
lr_handle_free_list(&handle->yumblist);
Expand Down Expand Up @@ -536,6 +538,8 @@ lr_handle_setopt(LrHandle *handle,
case LRO_URLS:
case LRO_YUMDLIST:
case LRO_YUMBLIST:
case LRO_METALINK_EXCLUDE_DOMAIN:
case LRO_METALINK_EXCLUDE_LOCATION:
{
int size = 0;
char **list = va_arg(arg, char **);
Expand All @@ -548,6 +552,10 @@ lr_handle_setopt(LrHandle *handle,
handle_list = &handle->yumdlist;
} else if (option == LRO_YUMBLIST) {
handle_list = &handle->yumblist;
} else if (option == LRO_METALINK_EXCLUDE_DOMAIN) {
handle_list = &handle->metalink_exclude_domain;
} else if (option == LRO_METALINK_EXCLUDE_LOCATION) {
handle_list = &handle->metalink_exclude_location;
}

lr_handle_free_list(handle_list);
Expand Down Expand Up @@ -1212,6 +1220,7 @@ lr_handle_prepare_metalink(LrHandle *handle, gchar *localpath, GError **err)

LrMetalink *ml = lr_metalink_init();
gboolean ret = lr_metalink_parse_file(ml,
handle,
fd,
metalink_file,
lr_xml_parser_warning_logger,
Expand Down
6 changes: 6 additions & 0 deletions librepo/handle.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,12 @@ typedef enum {
LRO_PASSWORD, /*!< (char *)
Password for HTTP authentication */

LRO_METALINK_EXCLUDE_DOMAIN, /*!< (char ** NULL-terminated)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This enhances API. You need to increase a minor version of librepo in VERSION.cmake.

List of domains to exclude from metalink */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also please append to the documentation since which version is this option available.


LRO_METALINK_EXCLUDE_LOCATION, /*!< (char ** NULL-terminated)
List of locations to exclude from metalink */

LRO_SENTINEL, /*!< Sentinel */

} LrHandleOption; /*!< Handle config options */
Expand Down
6 changes: 6 additions & 0 deletions librepo/handle_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@ struct _LrHandle {
LrMetalink *metalink; /*!<
Parsed metalink for repomd.xml */

char **metalink_exclude_domain; /*!<
List of domains to exclude from metalink */

char **metalink_exclude_location; /*!<
List of locations to exclude from metalink */

LrInternalMirrorlist *mirrors; /*!<
Mirrors from metalink or mirrorlist */

Expand Down
105 changes: 104 additions & 1 deletion librepo/metalink.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "util.h"
#include "metalink.h"
#include "xmlparser_internal.h"
#include "handle_internal.h"

/** TODO:
* - (?) Use GStringChunk
Expand Down Expand Up @@ -311,8 +312,44 @@ lr_metalink_start_handler(void *pdata, const xmlChar *xmlElement, const xmlChar
assert(!pd->metalinkurl);
assert(!pd->metalinkhash);

pd->skip_url = 0;

const char *val;
assert(!pd->metalinkurl);

if ((val = lr_find_attr("location", attr))) {
if (pd->handle && pd->handle->metalink_exclude_location) {
for (int i = 0; pd->handle->metalink_exclude_location[i]; i++) {
const char *pattern = pd->handle->metalink_exclude_location[i];
GError *regex_err = NULL;
GRegex *regex = g_regex_new(pattern,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you compile and validate the expression here deep in the XML parser? Best place is in lr_handle_setopt().

G_REGEX_OPTIMIZE,
0,
&regex_err);
if (regex) {
gboolean matches = g_regex_match(regex, val, 0, NULL);
g_regex_unref(regex);
if (matches) {
pd->skip_url = 1;
return;
}
} else {
// Invalid regex, treat as literal string
g_warning("%s: Invalid regex for metalink location exclusion \"%s\": %s",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please spell "regex" in full as "regular expression". This is not Perl.

__func__, pattern,
regex_err ? regex_err->message : "unknown error");
if (regex_err)
g_error_free(regex_err);

if (g_strcmp0(val, pattern) == 0) {
pd->skip_url = 1;
return;
}
}
}
}
}

LrMetalinkUrl *url = lr_new_metalinkurl(pd->metalink);
if ((val = lr_find_attr("protocol", attr)))
url->protocol = g_strdup(val);
Expand Down Expand Up @@ -447,11 +484,75 @@ lr_metalink_end_handler(void *pdata, G_GNUC_UNUSED const xmlChar *element)
break;

case STATE_URL:
if (pd->skip_url) {
pd->skip_url = 0;
break;
}

assert(pd->metalink);
assert(pd->metalinkurl);
assert(!pd->metalinkhash);

pd->metalinkurl->url = g_strdup(pd->content);
// Check domain filtering before setting URL
if (pd->handle && pd->handle->metalink_exclude_domain && pd->content) {
GError *uri_err = NULL;
GUri *uri = g_uri_parse(pd->content, G_URI_FLAGS_NONE, &uri_err);
if (uri) {
const char *host = g_uri_get_host(uri);
gboolean excluded = FALSE;
if (host) {
for (int i = 0; pd->handle->metalink_exclude_domain[i]; i++) {
const char *pattern = pd->handle->metalink_exclude_domain[i];
GError *regex_err = NULL;
GRegex *regex = g_regex_new(pattern,
G_REGEX_OPTIMIZE,
0,
&regex_err);
if (regex) {
gboolean matches = g_regex_match(regex, host, 0, NULL);
g_regex_unref(regex);
if (matches) {
excluded = TRUE;
break;
}
} else {
// Invalid regex, treat as literal string
g_warning("%s: Invalid regex for metalink domain exclusion \"%s\": %s",
__func__, pattern,
regex_err ? regex_err->message : "unknown error");
if (regex_err)
g_error_free(regex_err);

if (g_strcmp0(host, pattern) == 0) {
excluded = TRUE;
break;
}
}
}
}
g_uri_unref(uri);

if (excluded) {
// remove last url
GSList *last = g_slist_last(pd->metalink->urls);
lr_free_metalinkurl(last->data);
pd->metalink->urls = g_slist_delete_link(pd->metalink->urls, last);
pd->metalinkurl = NULL;
}
} else {
g_debug("%s: Unable to parse URL \"%s\" for domain exclusion: %s",
__func__, pd->content,
uri_err ? uri_err->message : "unknown error");
if (uri_err)
g_error_free(uri_err);
}
}

// If URL was excluded above, metalinkurl will be NULL
if (pd->metalinkurl) {
pd->metalinkurl->url = g_strdup(pd->content);
}

pd->metalinkurl = NULL;
break;

Expand All @@ -464,6 +565,7 @@ lr_metalink_end_handler(void *pdata, G_GNUC_UNUSED const xmlChar *element)

gboolean
lr_metalink_parse_file(LrMetalink *metalink,
LrHandle *handle,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lr_metalink_parse_file() is public functio. This breaks API and ABI. I think we need to wind a different way of passing the exclusion options. If there is not context or object to store the options to, I recommend adding a new function and making this old as a single wrapper around it.

int fd,
const char *filename,
LrXmlParserWarningCb warningcb,
Expand Down Expand Up @@ -491,6 +593,7 @@ lr_metalink_parse_file(LrMetalink *metalink,
pd->parser = &parser;
pd->state = STATE_START;
pd->metalink = metalink;
pd->handle = handle;
pd->filename = (char *) filename;
pd->ignore = 1;
pd->found = 0;
Expand Down
4 changes: 4 additions & 0 deletions librepo/metalink.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include <glib.h>
#include <librepo/xmlparser.h>
#include <librepo/handle.h>

G_BEGIN_DECLS

Expand Down Expand Up @@ -71,6 +72,8 @@ lr_metalink_init(void);

/** Parse metalink file.
* @param metalink Metalink object.
* @param handle LrHandle or NULL. If LrHandle is provided, it's used
* for filtering metalink data (e.g. by location or domain).
* @param fd File descriptor.
* @param filename File to look for in metalink file.
* @param warningcb ::LrXmlParserWarningCb function or NULL
Expand All @@ -80,6 +83,7 @@ lr_metalink_init(void);
*/
gboolean
lr_metalink_parse_file(LrMetalink *metalink,
LrHandle *handle,
int fd,
const char *filename,
LrXmlParserWarningCb warningcb,
Expand Down
22 changes: 22 additions & 0 deletions librepo/python/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,20 @@

*String or None*. Set password for HTTP authentication.

.. data:: LRO_METALINK_EXCLUDE_DOMAIN

*List of strings*. List of regex patterns to exclude domains from metalink.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is it a list? If it is a regular expression, then string is enough.
What kind of regular expression it it? That needs to be documented.

Each pattern is matched against the domain name in metalink URLs. If the pattern
is not a valid regex, it will be treated as a literal string match.
Example: ``["^mirror\\.example\\.com$"]``

.. data:: LRO_METALINK_EXCLUDE_LOCATION

*List of strings*. List of regex patterns to exclude locations from metalink.
Each pattern is matched against the location attribute in metalink URLs. If the
pattern is not a valid regex, it will be treated as a literal string match.
Example: ``["^US$", "^(CA|MX)$"]``

.. data:: LRO_PROXY

*String or None*. Set proxy server address. Port could be
Expand Down Expand Up @@ -1280,6 +1294,14 @@ class Handle(_librepo.Handle):

See: :data:`.LRO_PASSWORD`

.. attribute:: metalink_exclude_domain:

See: :data:`.LRO_METALINK_EXCLUDE_DOMAIN`

.. attribute:: metalink_exclude_location:

See: :data:`.LRO_METALINK_EXCLUDE_LOCATION`

.. attribute:: proxy:

See: :data:`.LRO_PROXY`
Expand Down
2 changes: 2 additions & 0 deletions librepo/python/handle-py.c
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,8 @@ py_setopt(_HandleObject *self, PyObject *args)
case LRO_URLS:
case LRO_YUMDLIST:
case LRO_YUMBLIST:
case LRO_METALINK_EXCLUDE_DOMAIN:
case LRO_METALINK_EXCLUDE_LOCATION:
case LRO_HTTPHEADER:
{
Py_ssize_t len = 0;
Expand Down
2 changes: 2 additions & 0 deletions librepo/python/librepomodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,8 @@ PyInit__librepo(void)
PYMODULE_ADDINTCONSTANT(LRO_USERPWD);
PYMODULE_ADDINTCONSTANT(LRO_USERNAME);
PYMODULE_ADDINTCONSTANT(LRO_PASSWORD);
PYMODULE_ADDINTCONSTANT(LRO_METALINK_EXCLUDE_DOMAIN);
PYMODULE_ADDINTCONSTANT(LRO_METALINK_EXCLUDE_LOCATION);
PYMODULE_ADDINTCONSTANT(LRO_PROXY);
PYMODULE_ADDINTCONSTANT(LRO_PROXYPORT);
PYMODULE_ADDINTCONSTANT(LRO_PROXYTYPE);
Expand Down
5 changes: 5 additions & 0 deletions librepo/xmlparser_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@

G_BEGIN_DECLS

struct _LrHandle;

/** \defgroup xmlparser_internal Common stuff for XML parsers in Librepo (datatypes, etc.)
* \addtogroup xmlparser_internal
* @{
Expand Down Expand Up @@ -91,6 +93,8 @@ typedef struct {
ignore all subelements of the current file element */
int found; /*!<
wanted file was already parsed */
int skip_url; /*!<
skip currently parsed url element */

LrMetalink *metalink; /*!<
metalink object */
Expand All @@ -100,6 +104,7 @@ typedef struct {
Hash in progress or NULL */
LrMetalinkAlternate *metalinkalternate; /*!<
Alternate in progress or NULL */
struct _LrHandle *handle;

} LrParserData;

Expand Down
Loading