diff --git a/Tmain/list-map-rexprs.d/exit-expected.txt b/Tmain/list-map-rexprs.d/exit-expected.txt new file mode 100644 index 0000000000..573541ac97 --- /dev/null +++ b/Tmain/list-map-rexprs.d/exit-expected.txt @@ -0,0 +1 @@ +0 diff --git a/Tmain/list-map-rexprs.d/run.sh b/Tmain/list-map-rexprs.d/run.sh new file mode 100644 index 0000000000..6cb21fab9c --- /dev/null +++ b/Tmain/list-map-rexprs.d/run.sh @@ -0,0 +1,38 @@ +# Copyright: 2025 Masatake YAMATO +# License: GPL-2 + +CTAGS=$1 + +$CTAGS --quiet --options=NONE \ + --langdef=Something \ + --map-Something='%\%ESCAPING\%%' \ + --map-Something=+'%ICASE%i' \ + --map-Something=+'%TEMP%' \ + --map-Something=-'%TEMP%' \ + --map-Something=+'%TEMPi%i' \ + --map-Something=-'%TEMPi%i' \ + --list-map-rexprs=Something + +$CTAGS --quiet --options=NONE \ + --langdef=Something \ + --map-Something='%aExpr%' \ + --map-Something=+'%\%ESCAPING\%%' \ + --map-Something=+'%ICASE%i' \ + --map-Something=+'%TEMP%' \ + --map-Something=-'%TEMP%' \ + --map-Something=+'%TEMPi%i' \ + --map-Something=-'%TEMPi%i' \ + --list-maps=Something + +$CTAGS --quiet --options=NONE \ + --langdef=Something \ + --kinddef-Something=t,type,types \ + --fields=+'{language}' \ + --regex-Something='/^([a-z]+)[ \t]+tdef;$/\1/t/' \ + \ + --map-Something='%something/.*\.c%' \ + --map-Something=+'%something/.*\.cpp%i' \ + --map-Something=+'%something/.*\.h%{icase}' \ + \ + -x --_xformat='%10N %{language}' \ + -R something diff --git a/Tmain/list-map-rexprs.d/something/input.CPP b/Tmain/list-map-rexprs.d/something/input.CPP new file mode 100644 index 0000000000..4919cb067c --- /dev/null +++ b/Tmain/list-map-rexprs.d/something/input.CPP @@ -0,0 +1 @@ +float tdef; diff --git a/Tmain/list-map-rexprs.d/something/input.c b/Tmain/list-map-rexprs.d/something/input.c new file mode 100644 index 0000000000..4702779302 --- /dev/null +++ b/Tmain/list-map-rexprs.d/something/input.c @@ -0,0 +1 @@ +int tdef; diff --git a/Tmain/list-map-rexprs.d/something/input.h b/Tmain/list-map-rexprs.d/something/input.h new file mode 100644 index 0000000000..32fbb67523 --- /dev/null +++ b/Tmain/list-map-rexprs.d/something/input.h @@ -0,0 +1 @@ +double tdef; diff --git a/Tmain/list-map-rexprs.d/stderr-expected.txt b/Tmain/list-map-rexprs.d/stderr-expected.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Tmain/list-map-rexprs.d/stdout-expected.txt b/Tmain/list-map-rexprs.d/stdout-expected.txt new file mode 100644 index 0000000000..fb12feab9b --- /dev/null +++ b/Tmain/list-map-rexprs.d/stdout-expected.txt @@ -0,0 +1,7 @@ +#EXPRESSION CASE +%ESCAPING% sensitive +ICASE insensitive +Something %aExpr% %\%ESCAPING\%% %ICASE%i + int Something + float Something + double Something diff --git a/Tmain/map-rexpr.d/macros.d/macros.vim b/Tmain/map-rexpr.d/macros.d/macros.vim new file mode 100644 index 0000000000..a43156e7a9 --- /dev/null +++ b/Tmain/map-rexpr.d/macros.d/macros.vim @@ -0,0 +1 @@ +%vimfiles_root %{_datadir}/vim/vimfiles diff --git a/Tmain/map-rexpr.d/run.sh b/Tmain/map-rexpr.d/run.sh new file mode 100644 index 0000000000..1645e0e866 --- /dev/null +++ b/Tmain/map-rexpr.d/run.sh @@ -0,0 +1,17 @@ +# Copyright: 2025 Masatake YAMATO +# License: GPL-2 + +CTAGS=$1 + +. ../utils.sh + +echo "# < macros.d/macros.vm" +${CTAGS} --quiet --options=NONE --print-language macros.d/macros.vim +${CTAGS} --quiet --options=NONE -o - macros.d/macros.vim + +echo "# cd macros.d; < macros.vim" +( + cd macros.d; + ${CTAGS} --quiet --options=NONE --print-language macros.vim; + ${CTAGS} --quiet --options=NONE -o - macros.vim +) diff --git a/Tmain/map-rexpr.d/stderr-expected.txt b/Tmain/map-rexpr.d/stderr-expected.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Tmain/map-rexpr.d/stdout-expected.txt b/Tmain/map-rexpr.d/stdout-expected.txt new file mode 100644 index 0000000000..793ecb4241 --- /dev/null +++ b/Tmain/map-rexpr.d/stdout-expected.txt @@ -0,0 +1,5 @@ +# < macros.d/macros.vm +macros.d/macros.vim: RpmMacros +vimfiles_root macros.d/macros.vim /^%vimfiles_root %{_datadir}\/vim\/vimfiles$/;" m +# cd macros.d; < macros.vim +macros.vim: Vim diff --git a/Tmain/versioning.d/stdout-expected.txt b/Tmain/versioning.d/stdout-expected.txt index 629304f0a2..2fe67679b6 100644 --- a/Tmain/versioning.d/stdout-expected.txt +++ b/Tmain/versioning.d/stdout-expected.txt @@ -16,6 +16,10 @@ About TEST language enabled: yes version: 10.9 +Mappings/rexprs +------------------------------------------------------- + + Mappings/patterns ------------------------------------------------------- MYTEST diff --git a/docs/man/ctags-optlib.7.rst b/docs/man/ctags-optlib.7.rst index ab29253b1e..a90debd17f 100644 --- a/docs/man/ctags-optlib.7.rst +++ b/docs/man/ctags-optlib.7.rst @@ -31,7 +31,7 @@ readers should read :ref:`ctags(1) ` of Universal Ctags first. Following options are for defining (or customizing) a parser: * ``--langdef=`` -* ``--map-=[+|-]|`` +* ``--map-=[+|-]||`` * ``--kinddef-=,,`` * ``--regex-=////[]`` * ``--mline-regex-=////{mgroup=}[]`` @@ -103,7 +103,7 @@ Overview for defining a parser 3. Give a file pattern or file extension for activating the parser - Use ``--map-=[+|-]|``. + Use ``--map-=[+|-]||``. 4. Define kinds diff --git a/docs/man/ctags.1.rst b/docs/man/ctags.1.rst index e7a9566162..453590e673 100644 --- a/docs/man/ctags.1.rst +++ b/docs/man/ctags.1.rst @@ -499,26 +499,71 @@ Language Selection and Mapping Options Exuberant Ctags. See :ref:`ctags-incompatibilities(7) ` for the background of this incompatible change. -``--map-=[+|-]|`` + Unlike ``--map-`` option, you cannot specify relative-path regular + expressions to ``--langmap`` option. + +``--map-=[+|-]||`` This option provides the way to control mapping(s) of file names to languages in a more fine-grained way than ``--langmap`` option. In ctags, more than one language can map to a - file name ** or file ** (*N:1 map*). Alternatively, - ``--langmap`` option handle only *1:1 map*, only one language - mapping to one file name ** or file **. A typical N:1 - map is seen in C++ and ObjectiveC language; both languages have - a map to ``.h`` as a file extension. - - A file extension is specified by preceding the extension with a period (e.g. ``.c``). - A file name pattern is specified by enclosing the pattern in parentheses (e.g. - ``([Mm]akefile)``). A prefixed plus ('``+``') sign is for adding, and + relative-path regular expression (**), file name **, or + file ** (*N:1 map*). Alternatively, ``--langmap`` + option handle only *1:1 map*, only one language mapping to one + file name ** or file **. A typical N:1 map is + seen in C++ and ObjectiveC language; both languages have a map to + ``.h`` as a file extension. + + A file extension is specified by preceding the extension with a period + (e.g. ``.c``). A file name pattern is specified by enclosing the pattern in + parentheses (e.g. ``([Mm]akefile)``). A relative-path regular expression is + specified by enclosing the expressions in percent signs '``%``' + (e.g. ``%include/.*\.h%``). To include a literal percent sign + inside the regular expression, escape it as ``\%``. + + A prefixed plus ('``+``') sign is for adding, and minus ('``-``') is for removing. No prefix means replacing the map of **. - Unlike ``--langmap``, ** (or **) is not a list. - ``--map-`` takes one extension (or pattern). However, - the option can be specified with different arguments multiple times - in a command line. + Unlike ``--langmap``, ``--map-`` does not take a list; ``--map-`` + takes one extension, one pattern, or one regular expression. However, the + option can be specified with different arguments multiple times in a command + line. + + For file extensions and file name patterns, the match is performed + with a base file name, a file without any directory components. + For relative-path regular expressions, the match is performed with + a relative-path incorporating the directory components. A + relative-path is relative to the directory where ctags launches. + + Assume your shell is in ``/project/x`` directory and you have the following + source tree under the directory. + + .. code-block:: + + src + └── lib + ├── data.c + └── logic.c + + If you run ctags with ``ctags -R src``, + the match is performed with ``src/lib/data.c`` and ``src/lib/logic.c`` If you + give ``--map-YourParser='%src/lib/.*\.c%'``, ctags + chooses ``YourParser`` parser for processing ``data.c`` and ``logic.c`` in the + tree. + + If your shell is in ``/project/x/src`` and you run + ``ctags -R lib``, ctags may not choose + ``YourParser`` because the match is performed with ``lib/data.c`` and + ``lib/logic.c``. + + A relative-path regular expression can take a flag controlling its testing. + The flag comes after the last percent sign. Currently only one available flag: + + ``{icase}`` (one-letter form '``i``') + The regular expression is to be applied in a case-insensitive + manner. (e.g. ``%include/.*\.h%i`` or ``%include/.*\.h%{icase}`` + + The relative-path regular expression is available since version 6.3.0. .. _option_tags_file_contents: @@ -1243,14 +1288,24 @@ Listing Options languages, and then exits. ``all`` is used as default value if the option argument is omitted. -``--list-maps[=(|all)]`` - Lists file name patterns and the file extensions which associate a file +``--list-map-rexprs[=(|all)]`` + Lists the relative-path regular expressions which associate a file name with a language for either the specified ** or ``all`` languages, and then exits. ``all`` is used as default value if the option argument is omitted. - To list the file extensions or file name patterns individually, use - ``--list-map-extensions`` or ``--list-map-patterns`` option. + (since version 6.3.0) + +``--list-maps[=(|all)]`` + Lists the file name patterns, the file extensions, and the relative-path + regular expressions which associate a file name with a language for either + the specified ** or ``all`` languages, and then exits. + ``all`` is used as default value if the option argument is omitted. + + To list the file extensions, file name patterns, or relative-path regular + expressions individually, use ``--list-map-extensions``, + ``--list-map-patterns``, or ``--list-map-rexprs`` option. + See the ``--langmap`` option, and "`Determining file language`_", above. This option does not work with ``--machinable`` nor @@ -1507,10 +1562,13 @@ are mapped to C++, C and ObjectiveC. These mappings can cause issues. ctags tries to select the proper parser for the source file by applying heuristics to its content, however it is not perfect. In case of issues one can use ``--language-force=``, -``--langmap=[,[...]]``, or the ``--map-=[+|-]|`` +``--langmap=[,[...]]``, or the ``--map-=[+|-]||`` options. (Some of the heuristics are applied whether ``--guess-language-eagerly`` is given or not.) +The order of testing is relative-path regular expressions (specified with +``--map-=``), file name patterns, then file extensions. + .. TODO: all heuristics??? To be confirmed. Heuristically guessing diff --git a/docs/news/HEAD.rst b/docs/news/HEAD.rst index cd24dc93ed..70d8f35549 100644 --- a/docs/news/HEAD.rst +++ b/docs/news/HEAD.rst @@ -18,6 +18,15 @@ New column, VER in the output of ``--list-{kinds-full,roles,fields,extras,pseudo Indicates the versions of ctags output (or the parser) introducing the item in the list. +Extend ``--map-=`` and ``--langmap=`` options to choose a parser using regular expressions + + ``--map-=[+]%REXPR%`` (or ``--langmap=:[+]%REXPR%``) maps relative-paths + that match the regular expression to the given language. + + The new ``--list-map-rexprs`` lists all regular-expression-based mappings. + + ``--list-maps`` has also been extended to include regular-expression-based mappings. + Incompatible changes --------------------------------------------------------------------- diff --git a/extra-cmds/utiltest.c b/extra-cmds/utiltest.c index df35a7b2cc..d0e0b521cf 100644 --- a/extra-cmds/utiltest.c +++ b/extra-cmds/utiltest.c @@ -278,27 +278,27 @@ static void test_routines_strrstr(void) TEST_CHECK(strcmp(strrstr("abcdcdb", "cd"), "cdb") == 0); } -static void test_routines_baseFilenameSansExtensionNew(void) +static void test_routines_filenameSansExtensionNew(void) { char *bs; - TEST_CHECK ((bs = baseFilenameSansExtensionNew ("a.in", ".in")) + TEST_CHECK ((bs = filenameSansExtensionNew ("a.in", ".in")) && strcmp(bs, "a") == 0); if (bs) eFree (bs); - TEST_CHECK ((bs = baseFilenameSansExtensionNew ("x/b.in", ".in")) - && strcmp(bs, "b") == 0); + TEST_CHECK ((bs = filenameSansExtensionNew ("x/b.in", ".in")) + && strcmp(bs, "x/b") == 0); if (bs) eFree (bs); - TEST_CHECK ((bs = baseFilenameSansExtensionNew ("c.in.in", ".in.in")) + TEST_CHECK ((bs = filenameSansExtensionNew ("c.in.in", ".in.in")) && strcmp(bs, "c") == 0); if (bs) eFree (bs); - TEST_CHECK ((bs = baseFilenameSansExtensionNew ("/y/d.in.in", ".in.in")) - && strcmp(bs, "d") == 0); + TEST_CHECK ((bs = filenameSansExtensionNew ("/y/d.in.in", ".in.in")) + && strcmp(bs, "/y/d") == 0); if (bs) eFree (bs); } @@ -377,7 +377,7 @@ TEST_LIST = { { "intern", test_intern }, { "numarray", test_numarray }, { "routines/strrstr", test_routines_strrstr }, - { "routines/baseFilenameSansExtensionNew", test_routines_baseFilenameSansExtensionNew }, + { "routines/filenameSansExtensionNew", test_routines_filenameSansExtensionNew }, { "vstring/ncats", test_vstring_ncats }, { "vstring/truncate_leading", test_vstring_truncate_leading }, { "vstring/EqC", test_vstring_eqc }, diff --git a/main/options.c b/main/options.c index 469da5efc1..b0731c023d 100644 --- a/main/options.c +++ b/main/options.c @@ -39,6 +39,7 @@ #include "interactive_p.h" #include "writer_p.h" #include "trace.h" +#include "flags_p.h" #ifdef HAVE_JANSSON #include @@ -59,6 +60,8 @@ /* The following separators are permitted for list options. */ #define EXTENSION_SEPARATOR '.' +#define REXPR_START '%' +#define REXPR_STOP '%' #define PATTERN_START '(' #define PATTERN_STOP ')' #define IGNORE_SEPARATORS ", \t\n" @@ -303,10 +306,10 @@ static optionDescription LongOptionDescription [] = { {1,0," --langmap=[,[...]]"}, {1,0," Override default mapping of language to input file extension."}, {1,0," e.g. --langmap=c:.c.x,java:+.j,make:([Mm]akefile).mak"}, - {1,0," --map-=[+|-]|"}, + {1,0," --map-=[+|-]||"}, {1,0," Set, add(+) or remove(-) the map for ."}, - {1,0," Unlike --langmap, this doesn't take a list; only one file name "}, - {1,0," or one file can be specified at once."}, + {1,0," Unlike --langmap, this doesn't take a list; only one file name ,"}, + {1,0," one file name , or one file can be specified at once."}, {1,0," Unlike --langmap the change with this option affects mapping of only."}, {1,0,""}, {1,0,"Tags File Contents Options"}, @@ -436,6 +439,8 @@ static optionDescription LongOptionDescription [] = { {1,0," Output list of language extensions in mapping."}, {1,0," --list-map-patterns[=(|all)]"}, {1,0," Output list of language patterns in mapping."}, + {1,0," --list-map-rexprs[=(|all)]"}, + {1,0," Output list of language regular expressions in mapping."}, {1,0," --list-maps[=(|all)]"}, {1,0," Output list of language mappings (both extensions and patterns)."}, {1,0," --list-mline-regex-flags"}, @@ -1757,7 +1762,7 @@ static char* skipPastMap (char* p) static char* extractMapFromParameter (const langType language, char* parameter, char** tail, - bool* pattern_p, + langmapType *mapType, char* (* skip) (char *)) { char* p = NULL; @@ -1767,7 +1772,7 @@ static char* extractMapFromParameter (const langType language, if (first == EXTENSION_SEPARATOR) /* extension map */ { - *pattern_p = false; + *mapType = LMAP_EXTENSION; ++parameter; p = (* skip) (parameter); @@ -1777,19 +1782,18 @@ static char* extractMapFromParameter (const langType language, *tail = parameter + strlen (parameter); return result; } - else - { - tmp = *p; - *p = '\0'; - result = eStrdup (parameter); - *p = tmp; - *tail = p; - return result; - } + + tmp = *p; + *p = '\0'; + result = eStrdup (parameter); + *p = tmp; + *tail = p; + return result; } - else if (first == PATTERN_START) /* pattern map */ + + if (first == PATTERN_START) /* pattern map */ { - *pattern_p = true; + *mapType = LMAP_PATTERN; ++parameter; for (p = parameter ; *p != PATTERN_STOP && *p != '\0' ; ++p) @@ -1800,32 +1804,73 @@ static char* extractMapFromParameter (const langType language, if (*p == '\0') error (FATAL, "Unterminated file name pattern for %s language", getLanguageName (language)); - else + + tmp = *p; + *p = '\0'; + result = eStrdup (parameter); + *p = tmp; + *tail = p + 1; + return result; + } + + if (first == REXPR_START) + { + *mapType = LMAP_REXPR; + + ++parameter; + vString *rexpr = vStringNew (); + for (p = parameter ; *p != REXPR_STOP && *p != '\0' ; ++p) { - tmp = *p; - *p = '\0'; - result = eStrdup (parameter); - *p = tmp; - *tail = p + 1; - return result; + if (*p == '\\' && *(p + 1) == REXPR_STOP) + p++; + vStringPut (rexpr, *p); } + if (*p == '\0') + error (FATAL, "Unterminated file name regular expression for %s language: %s", + getLanguageName (language), parameter); + + *tail = p + 1; + return vStringDeleteUnwrap (rexpr); } return NULL; } +static void langmap_rexpr_icase_short (char c CTAGS_ATTR_UNUSED, void* data) +{ + bool *icase = data; + *icase = true; +} + +static void langmap_rexpr_icase_long (const char* s CTAGS_ATTR_UNUSED, const char* const unused CTAGS_ATTR_UNUSED, void* data) +{ + langmap_rexpr_icase_short ('i', data); +} + +static flagDefinition langmapRexprFlagDef[] = { + { 'i', "icase", langmap_rexpr_icase_short, langmap_rexpr_icase_long, + NULL, "applied in a case-insensitive manner"}, +}; + static char* addLanguageMap (const langType language, char* map_parameter, - bool exclusiveInAllLanguages) + bool exclusiveInAllLanguages, bool handleRexpr) { char* p = NULL; - bool pattern_p; + langmapType map_type; char* map; - map = extractMapFromParameter (language, map_parameter, &p, &pattern_p, skipPastMap); - if (map && pattern_p == false) + map = extractMapFromParameter (language, map_parameter, &p, &map_type, skipPastMap); + if (map && map_type == LMAP_EXTENSION) addLanguageExtensionMap (language, map, exclusiveInAllLanguages); - else if (map && pattern_p == true) + else if (map && map_type == LMAP_PATTERN) addLanguagePatternMap (language, map, exclusiveInAllLanguages); + else if (handleRexpr && map && map_type == LMAP_REXPR) + { + bool icase = false; + + flagsEval (p, langmapRexprFlagDef, ARRAY_SIZE(langmapRexprFlagDef), &icase); + addLanguageRexprMap (language, map, icase, exclusiveInAllLanguages); + } else error (FATAL, "Badly formed language map for %s language", getLanguageName (language)); @@ -1838,14 +1883,21 @@ static char* addLanguageMap (const langType language, char* map_parameter, static char* removeLanguageMap (const langType language, char* map_parameter) { char* p = NULL; - bool pattern_p; + langmapType map_type; char* map; - map = extractMapFromParameter (language, map_parameter, &p, &pattern_p, skipPastMap); - if (map && pattern_p == false) + map = extractMapFromParameter (language, map_parameter, &p, &map_type, skipPastMap); + if (map && map_type == LMAP_EXTENSION) removeLanguageExtensionMap (language, map); - else if (map && pattern_p == true) + else if (map && map_type == LMAP_PATTERN) removeLanguagePatternMap (language, map); + else if (map && map_type == LMAP_REXPR) + { + bool icase = false; + + flagsEval (p, langmapRexprFlagDef, ARRAY_SIZE(langmapRexprFlagDef), &icase); + removeLanguageRexprMap (language, map, icase); + } else error (FATAL, "Badly formed language map for %s language", getLanguageName (language)); @@ -1892,7 +1944,7 @@ static char* processLanguageMap (char* map) else verbose (" Adding to %s language map:", getLanguageName (language)); while (list != NULL && *list != '\0' && *list != ',') - list = addLanguageMap (language, list, true); + list = addLanguageMap (language, list, true, false); verbose ("\n"); } if (list != NULL && *list == ',') @@ -2019,7 +2071,7 @@ extern bool processMapOption ( map_parameter = eStrdup (spec); if (op == '+') - addLanguageMap (language, map_parameter, false); + addLanguageMap (language, map_parameter, false, true); else if (op == '-') removeLanguageMap (language, map_parameter); else @@ -2167,6 +2219,13 @@ static void processListMapPatternsOption (const char *const option, processListMapsOptionForType (option, parameter, LMAP_PATTERN|LMAP_TABLE_OUTPUT); } +static void processListMapRexprsOption (const char *const option, + const char *const parameter) +{ + processListMapsOptionForType (option, parameter, LMAP_REXPR|LMAP_TABLE_OUTPUT); +} + + static void processListMapsOption ( const char *const option CTAGS_ATTR_UNUSED, const char *const parameter CTAGS_ATTR_UNUSED) @@ -2330,6 +2389,13 @@ static void processDescribeLanguage(const char *const option, getLanguageVersionCurrent (language), getLanguageVersionAge (language)); + puts(""); + puts("Mappings/rexprs"); + puts("-------------------------------------------------------"); + printLanguageMaps (language, LMAP_REXPR|LMAP_NO_LANG_PREFIX, + localOption.withListHeader, localOption.machinable, + stdout); + puts(""); puts("Mappings/patterns"); puts("-------------------------------------------------------"); @@ -3002,6 +3068,7 @@ static parametricOption ParametricOptions [] = { { "list-maps", processListMapsOption, true, STAGE_ANY }, { "list-map-extensions", processListMapExtensionsOption, true, STAGE_ANY }, { "list-map-patterns", processListMapPatternsOption, true, STAGE_ANY }, + { "list-map-rexprs", processListMapRexprsOption, true, STAGE_ANY }, { "list-mline-regex-flags", processListMultilineRegexFlagsOption, true, STAGE_ANY }, { "list-output-formats", processListOutputFormatsOption, true, STAGE_ANY }, { "list-params", processListParametersOption, true, STAGE_ANY }, diff --git a/main/parse.c b/main/parse.c index b80cd63f5c..0e7f0e8fe4 100644 --- a/main/parse.c +++ b/main/parse.c @@ -38,6 +38,7 @@ #include "ptrarray.h" #include "read.h" #include "read_p.h" +#include "rexprcode_p.h" #include "routines.h" #include "routines_p.h" #include "stats_p.h" @@ -62,9 +63,10 @@ enum specType { SPEC_ALIAS = SPEC_NAME, SPEC_EXTENSION, SPEC_PATTERN, + SPEC_REXPR, }; const char *specTypeName [] = { - "none", "name", "extension", "pattern" + "none", "name", "extension", "pattern", "rexpr" }; typedef struct { @@ -81,6 +83,7 @@ typedef struct sParserObject { stringList* currentPatterns; /* current list of file name patterns */ stringList* currentExtensions; /* current list of extensions */ stringList* currentAliases; /* current list of aliases */ + ptrArray* currentRexprs; /* current list of regular expressions (rexprs). */ unsigned int initialized:1; /* initialize() is called or not */ unsigned int dontEmit:1; /* run but don't emit tags. @@ -148,13 +151,13 @@ static parserDefinitionFunc* BuiltInParsers[] = { #ifdef HAVE_LIBYAML , #endif - PEG_PARSER_LIST + PEG_PARSER_LIST #ifdef HAVE_PACKCC - , + , #endif OPTLIB2C_PCRE2_PARSER_LIST #ifdef HAVE_PCRE2 - , + , #endif #endif /* EXTERNAL_PARSER_LIST */ }; @@ -198,7 +201,7 @@ extern int makeSimpleTag ( } extern int makeSimpleRefTag (const vString* const name, const int kindIndex, - int roleIndex) + int roleIndex) { int r = CORK_NIL; @@ -207,10 +210,10 @@ extern int makeSimpleRefTag (const vString* const name, const int kindIndex, /* do not check for kind being disabled - that happens later in makeTagEntry() */ if (name != NULL && vStringLength (name) > 0) { - tagEntryInfo e; - initRefTagEntry (&e, vStringValue (name), kindIndex, roleIndex); + tagEntryInfo e; + initRefTagEntry (&e, vStringValue (name), kindIndex, roleIndex); - r = makeTagEntry (&e); + r = makeTagEntry (&e); } return r; } @@ -431,7 +434,7 @@ static langType getNameOrAliasesLanguageAndSpec (const char *const key, langType if (start_index == LANG_AUTO) - start_index = 0; + start_index = 0; else if (start_index == LANG_IGNORE || start_index >= (int) LanguageCount) return result; @@ -471,18 +474,74 @@ extern langType getLanguageForCommand (const char *const command, langType start &tmp_specType); } -static langType getPatternLanguageAndSpec (const char *const baseName, langType start_index, +static ptrArray* rexprsNew (void) +{ + return ptrArrayNew ((ptrArrayDeleteFunc)rExprCodeDelete); +} + +static void rexprsDelete (ptrArray* rexprs) +{ + ptrArrayDelete (rexprs); +} + +static void rExpressionsAddFromArray (ptrArray* rexprs, const struct rExprSrc *const array) +{ + for (unsigned int i = 0; array[i].expr; i++) + { + struct rExprCode *rxcode = rExprCodeNew (array[i].expr, array[i].iCase); + if (rxcode) + ptrArrayAdd (rexprs, rxcode); + } +} + +static struct rExprCode *rExpressionsFinds(ptrArray *rexprs, const char *fullName) +{ + for (unsigned int i = 0; i < ptrArrayCount (rexprs); i++) + { + struct rExprCode *rxcode = ptrArrayItem (rexprs, i); + if (rExprCodeMatch (rxcode, fullName)) + return rxcode; + } + return NULL; +} + +static langType getPatternLanguageAndSpec (const char *const baseName, + const char *const fullName, + langType start_index, const char **const spec, enum specType *specType) { langType result = LANG_IGNORE; unsigned int i; if (start_index == LANG_AUTO) - start_index = 0; + start_index = 0; else if (start_index == LANG_IGNORE || start_index >= (int) LanguageCount) return result; *spec = NULL; + + if (fullName == NULL) + goto classical_methods; + + for (i = start_index ; i < LanguageCount && result == LANG_IGNORE ; ++i) + { + if (! isLanguageEnabled (i)) + continue; + + parserObject *parser = LanguageTable + i; + ptrArray* const rexprs = parser->currentRexprs; + struct rExprCode *rxcode; + + if (rexprs != NULL && (rxcode = rExpressionsFinds (rexprs, fullName))) + { + result = i; + *spec = rExprCodeGetSource (rxcode); + *specType = SPEC_REXPR; + goto found; + } + } + + classical_methods: for (i = start_index ; i < LanguageCount && result == LANG_IGNORE ; ++i) { if (! isLanguageEnabled (i)) @@ -511,7 +570,7 @@ static langType getPatternLanguageAndSpec (const char *const baseName, langType vString* tmp; if (exts != NULL && (tmp = stringListExtensionFinds (exts, - fileExtension (baseName)))) + fileExtension (baseName)))) { result = i; *spec = vStringValue(tmp); @@ -529,7 +588,7 @@ extern langType getLanguageForFilename (const char *const filename, langType sta char *tmp_spec; enum specType tmp_specType; - return getPatternLanguageAndSpec (tmp_filename, startFrom, + return getPatternLanguageAndSpec (tmp_filename, filename, startFrom, (const char **const)&tmp_spec, &tmp_specType); } @@ -695,7 +754,8 @@ static parserCandidate* parserCandidateNew(unsigned int count CTAGS_ATTR_UNUSED) } /* If multiple parsers are found, return LANG_AUTO */ -static unsigned int nominateLanguageCandidates (const char *const key, parserCandidate** candidates) +static unsigned int nominateLanguageCandidates (const char *const key, const char *const fullKey CTAGS_ATTR_UNUSED, + parserCandidate** candidates) { unsigned int count; langType i; @@ -719,7 +779,8 @@ static unsigned int nominateLanguageCandidates (const char *const key, parserCan } static unsigned int -nominateLanguageCandidatesForPattern(const char *const baseName, parserCandidate** candidates) +nominateLanguageCandidatesForPattern(const char *const baseName, const char *const fullName, + parserCandidate** candidates) { unsigned int count; langType i; @@ -730,7 +791,7 @@ nominateLanguageCandidatesForPattern(const char *const baseName, parserCandidate for (count = 0, i = LANG_AUTO; i != LANG_IGNORE; ) { - i = getPatternLanguageAndSpec (baseName, i, &spec, &specType); + i = getPatternLanguageAndSpec (baseName, fullName, i, &spec, &specType); if (i != LANG_IGNORE) { (*candidates)[count].lang = i++; @@ -976,8 +1037,8 @@ static vString* extractVimFileTypeCommon(MIO* input, bool eof) 'modelines' 'mls' number (default 5) global {not in Vi} - If 'modeline' is on 'modelines' gives the number of lines that is - checked for set commands. */ + If 'modeline' is on 'modelines' gives the number of lines that is + checked for set commands. */ vString* filetype = NULL; #define RING_SIZE 5 @@ -1072,10 +1133,10 @@ static vString* determineZshAutoloadTag (const char *const modeline, #autoload [ OPTIONS ] */ if (((strncmp (modeline, "#compdef", 8) == 0) - && isspace ((unsigned char) *(modeline + 8))) - || ((strncmp (modeline, "#autoload", 9) == 0) - && (isspace ((unsigned char) *(modeline + 9)) - || *(modeline + 9) == '\0'))) + && isspace ((unsigned char) *(modeline + 8))) + || ((strncmp (modeline, "#autoload", 9) == 0) + && (isspace ((unsigned char) *(modeline + 9)) + || *(modeline + 9) == '\0'))) return vStringNewInit ("zsh"); else return NULL; @@ -1102,19 +1163,19 @@ static vString* extractPHPMark(MIO* input) struct getLangCtx { - const char *fileName; - MIO *input; - bool err; + const char *fileName; + MIO *input; + bool err; }; #define GLC_FOPEN_IF_NECESSARY0(_glc_, _label_) do { \ - if (!(_glc_)->input) { \ - (_glc_)->input = getMio((_glc_)->fileName, "rb", false); \ - if (!(_glc_)->input) { \ - (_glc_)->err = true; \ - goto _label_; \ - } \ - } \ + if (!(_glc_)->input) { \ + (_glc_)->input = getMio((_glc_)->fileName, "rb", false); \ + if (!(_glc_)->input) { \ + (_glc_)->err = true; \ + goto _label_; \ + } \ + } \ } while (0) \ #define GLC_FOPEN_IF_NECESSARY(_glc_, _label_, _doesParserRequireMemoryStream_) \ @@ -1122,7 +1183,7 @@ struct getLangCtx { if (!(_glc_)->input) \ GLC_FOPEN_IF_NECESSARY0 (_glc_, _label_); \ if ((_doesParserRequireMemoryStream_) && \ - (mio_memory_get_data((_glc_)->input, NULL) == NULL)) \ + (mio_memory_get_data((_glc_)->input, NULL) == NULL)) \ { \ MIO *tmp_ = (_glc_)->input; \ (_glc_)->input = mio_new_mio (tmp_, 0, -1); \ @@ -1135,10 +1196,10 @@ struct getLangCtx { } while (0) #define GLC_FCLOSE(_glc_) do { \ - if ((_glc_)->input) { \ - mio_unref((_glc_)->input); \ - (_glc_)->input = NULL; \ - } \ + if ((_glc_)->input) { \ + mio_unref((_glc_)->input); \ + (_glc_)->input = NULL; \ + } \ } while (0) static const struct taster { @@ -1175,7 +1236,7 @@ static const struct taster { } }; static langType tasteLanguage (struct getLangCtx *glc, const struct taster *const tasters, int n_tasters, - langType *fallback); + langType *fallback); /* If all the candidates have the same specialized language selector, return * it. Otherwise, return NULL. @@ -1201,24 +1262,24 @@ hasTheSameSelector (langType lang, selectLanguage candidate_selector) static selectLanguage commonSelector (const parserCandidate *candidates, int n_candidates) { - Assert (n_candidates > 1); - selectLanguage *selector; - int i; + Assert (n_candidates > 1); + selectLanguage *selector; + int i; - selector = LanguageTable[ candidates[0].lang ].def->selectLanguage; - if (selector == NULL) - return NULL; + selector = LanguageTable[ candidates[0].lang ].def->selectLanguage; + if (selector == NULL) + return NULL; - while (*selector) - { - for (i = 1; i < n_candidates; ++i) - if (! hasTheSameSelector (candidates[i].lang, *selector)) - break; - if (i == n_candidates) - return *selector; - selector++; - } - return NULL; + while (*selector) + { + for (i = 1; i < n_candidates; ++i) + if (! hasTheSameSelector (candidates[i].lang, *selector)) + break; + if (i == n_candidates) + return *selector; + selector++; + } + return NULL; } @@ -1236,19 +1297,19 @@ pickLanguageBySelection (selectLanguage selector, MIO *input, for (i = 0; i < nCandidates; i++) cs[i] = candidates[i].lang; - lang = selector(input, cs, nCandidates); + lang = selector(input, cs, nCandidates); eFree (cs); - if (lang) - { - verbose (" selection: %s\n", lang); - return getNamedLanguage(lang, 0); - } - else - { + if (lang) + { + verbose (" selection: %s\n", lang); + return getNamedLanguage(lang, 0); + } + else + { verbose (" no selection\n"); - return LANG_IGNORE; - } + return LANG_IGNORE; + } } static int compareParsersByName (const void *a, const void* b) @@ -1287,7 +1348,7 @@ static unsigned int sortAndFilterParserCandidates (parserCandidate *candidates, return n_candidates; qsort (candidates, n_candidates, sizeof(*candidates), - sortParserCandidatesBySpecType); + sortParserCandidatesBySpecType); highestSpecType = candidates [0].specType; r = 1; @@ -1300,8 +1361,8 @@ static unsigned int sortAndFilterParserCandidates (parserCandidate *candidates, } static void verboseReportCandidate (const char *header, - parserCandidate *candidates, - unsigned int n_candidates) + parserCandidate *candidates, + unsigned int n_candidates) { unsigned int i; verbose (" #%s: %u\n", header, n_candidates); @@ -1325,9 +1386,9 @@ static bool doesCandidatesRequireMemoryStream(const parserCandidate *candidates, return false; } -static langType getSpecLanguageCommon (const char *const spec, struct getLangCtx *glc, - unsigned int nominate (const char *const, parserCandidate**), - langType *fallback) +static langType getSpecLanguageCommon (const char *const spec, const char *const fullSpec, struct getLangCtx *glc, + unsigned int nominate (const char *const, const char *const, parserCandidate**), + langType *fallback) { langType language; parserCandidate *candidates; @@ -1336,7 +1397,7 @@ static langType getSpecLanguageCommon (const char *const spec, struct getLangCtx if (fallback) *fallback = LANG_IGNORE; - n_candidates = (*nominate)(spec, &candidates); + n_candidates = (*nominate)(spec, fullSpec, &candidates); verboseReportCandidate ("candidates", candidates, n_candidates); @@ -1352,7 +1413,7 @@ static langType getSpecLanguageCommon (const char *const spec, struct getLangCtx { selectLanguage selector = commonSelector(candidates, n_candidates); bool memStreamRequired = doesCandidatesRequireMemoryStream (candidates, - n_candidates); + n_candidates); GLC_FOPEN_IF_NECESSARY(glc, fopen_error, memStreamRequired); if (selector) { @@ -1381,20 +1442,21 @@ static langType getSpecLanguageCommon (const char *const spec, struct getLangCtx } static langType getSpecLanguage (const char *const spec, - struct getLangCtx *glc, + struct getLangCtx *glc, langType *fallback) { - return getSpecLanguageCommon(spec, glc, nominateLanguageCandidates, - fallback); + return getSpecLanguageCommon(spec, NULL, glc, nominateLanguageCandidates, + fallback); } static langType getPatternLanguage (const char *const baseName, - struct getLangCtx *glc, - langType *fallback) + const char *const fullName, + struct getLangCtx *glc, + langType *fallback) { - return getSpecLanguageCommon(baseName, glc, - nominateLanguageCandidatesForPattern, - fallback); + return getSpecLanguageCommon(baseName, fullName, glc, + nominateLanguageCandidatesForPattern, + fallback); } /* This function tries to figure out language contained in a file by @@ -1402,30 +1464,30 @@ static langType getPatternLanguage (const char *const baseName, */ static langType tasteLanguage (struct getLangCtx *glc, const struct taster *const tasters, int n_tasters, - langType *fallback) + langType *fallback) { - int i; + int i; - if (fallback) - *fallback = LANG_IGNORE; - for (i = 0; i < n_tasters; ++i) { - langType language; - vString* spec; + if (fallback) + *fallback = LANG_IGNORE; + for (i = 0; i < n_tasters; ++i) { + langType language; + vString* spec; - mio_rewind(glc->input); + mio_rewind(glc->input); spec = tasters[i].taste(glc->input); - if (NULL != spec) { - verbose (" %s: %s\n", tasters[i].msg, vStringValue (spec)); - language = getSpecLanguage (vStringValue (spec), glc, + if (NULL != spec) { + verbose (" %s: %s\n", tasters[i].msg, vStringValue (spec)); + language = getSpecLanguage (vStringValue (spec), glc, (fallback && (*fallback == LANG_IGNORE))? fallback: NULL); - vStringDelete (spec); - if (language != LANG_IGNORE) - return language; - } - } + vStringDelete (spec); + if (language != LANG_IGNORE) + return language; + } + } - return LANG_IGNORE; + return LANG_IGNORE; } @@ -1440,93 +1502,94 @@ static langType getFileLanguageForRequestInternal (struct GetLanguageRequest *req) { const char *const fileName = req->fileName; - langType language; - - /* ctags tries variety ways(HINTS) to choose a proper language - for given fileName. If multiple candidates are chosen in one of - the hint, a SELECTOR common between the candidate languages - is called. - - "selection failure" means a selector common between the - candidates doesn't exist or the common selector returns NULL. - - "hint failure" means the hint finds no candidate or - "selection failure" occurs though the hint finds multiple - candidates. - - If a hint chooses multiple candidates, and selection failure is - occurred, the hint records one of the candidates as FALLBACK for - the hint. (The candidates are stored in an array. The first - element of the array is recorded. However, there is no - specification about the order of elements in the array.) - - If all hints are failed, FALLBACKs of the hints are examined. - Which fallbacks should be chosen? `enum hint' defines the order. */ - enum hint { - HINT_INTERP, - HINT_OTHER, - HINT_FILENAME, - HINT_TEMPLATE, - N_HINTS, - }; - langType fallback[N_HINTS]; - int i; - struct getLangCtx glc = { - .fileName = fileName, - .input = (req->type == GLR_REUSE)? mio_ref (req->mio): NULL, - .err = false, - }; - const char* const baseName = baseFilename (fileName); - char *templateBaseName = NULL; - fileStatus *fstatus = NULL; - - for (i = 0; i < N_HINTS; i++) + langType language; + + /* ctags tries variety ways(HINTS) to choose a proper language + for given fileName. If multiple candidates are chosen in one of + the hint, a SELECTOR common between the candidate languages + is called. + + "selection failure" means a selector common between the + candidates doesn't exist or the common selector returns NULL. + + "hint failure" means the hint finds no candidate or + "selection failure" occurs though the hint finds multiple + candidates. + + If a hint chooses multiple candidates, and selection failure is + occurred, the hint records one of the candidates as FALLBACK for + the hint. (The candidates are stored in an array. The first + element of the array is recorded. However, there is no + specification about the order of elements in the array.) + + If all hints are failed, FALLBACKs of the hints are examined. + Which fallbacks should be chosen? `enum hint' defines the order. */ + enum hint { + HINT_INTERP, + HINT_OTHER, + HINT_FILENAME, + HINT_TEMPLATE, + N_HINTS, + }; + langType fallback[N_HINTS]; + int i; + struct getLangCtx glc = { + .fileName = fileName, + .input = (req->type == GLR_REUSE)? mio_ref (req->mio): NULL, + .err = false, + }; + const char* const baseName = baseFilename (fileName); + char *templateFileNameSansExt = NULL; + fileStatus *fstatus = NULL; + + for (i = 0; i < N_HINTS; i++) fallback [i] = LANG_IGNORE; - verbose ("Get file language for %s\n", fileName); + verbose ("Get file language for %s\n", fileName); - verbose (" pattern: %s\n", baseName); - language = getPatternLanguage (baseName, &glc, + verbose (" pattern: %s\n", baseName); + language = getPatternLanguage (baseName, fileName, &glc, fallback + HINT_FILENAME); - if (language != LANG_IGNORE || glc.err) - goto cleanup; - - { - const char* const tExt = ".in"; - templateBaseName = baseFilenameSansExtensionNew (fileName, tExt); - if (templateBaseName) - { - verbose (" pattern + template(%s): %s\n", tExt, templateBaseName); - GLC_FOPEN_IF_NECESSARY(&glc, cleanup, false); - mio_rewind(glc.input); - language = getPatternLanguage(templateBaseName, &glc, - fallback + HINT_TEMPLATE); - if (language != LANG_IGNORE) - goto cleanup; - } - } + if (language != LANG_IGNORE || glc.err) + goto cleanup; + + { + const char* const tExt = ".in"; + templateFileNameSansExt = filenameSansExtensionNew (fileName, tExt); + if (templateFileNameSansExt) + { + verbose (" pattern + template(%s): %s\n", tExt, templateFileNameSansExt); + GLC_FOPEN_IF_NECESSARY(&glc, cleanup, false); + mio_rewind(glc.input); + const char *const templateBaseFileNameSansExt = baseFilename (templateFileNameSansExt); + language = getPatternLanguage (templateBaseFileNameSansExt, templateFileNameSansExt, &glc, + fallback + HINT_TEMPLATE); + if (language != LANG_IGNORE) + goto cleanup; + } + } /* If the input is already opened, we don't have to verify the existence. */ - if (glc.input || ((fstatus = eStat (fileName)) && fstatus->exists)) - { - if ((fstatus && fstatus->isExecutable) || Option.guessLanguageEagerly) - { - GLC_FOPEN_IF_NECESSARY (&glc, cleanup, false); - language = tasteLanguage(&glc, eager_tasters, 1, - fallback + HINT_INTERP); - } - if (language != LANG_IGNORE) - goto cleanup; - - if (Option.guessLanguageEagerly) - { - GLC_FOPEN_IF_NECESSARY(&glc, cleanup, false); - language = tasteLanguage(&glc, - eager_tasters + 1, - ARRAY_SIZE(eager_tasters) - 1, - fallback + HINT_OTHER); - } - } + if (glc.input || ((fstatus = eStat (fileName)) && fstatus->exists)) + { + if ((fstatus && fstatus->isExecutable) || Option.guessLanguageEagerly) + { + GLC_FOPEN_IF_NECESSARY (&glc, cleanup, false); + language = tasteLanguage(&glc, eager_tasters, 1, + fallback + HINT_INTERP); + } + if (language != LANG_IGNORE) + goto cleanup; + + if (Option.guessLanguageEagerly) + { + GLC_FOPEN_IF_NECESSARY(&glc, cleanup, false); + language = tasteLanguage(&glc, + eager_tasters + 1, + ARRAY_SIZE(eager_tasters) - 1, + fallback + HINT_OTHER); + } + } cleanup: @@ -1538,20 +1601,20 @@ getFileLanguageForRequestInternal (struct GetLanguageRequest *req) if (fstatus) req->mtime = fstatus->mtime; } - GLC_FCLOSE(&glc); - if (fstatus) - eStatFree (fstatus); - if (templateBaseName) - eFree (templateBaseName); + GLC_FCLOSE(&glc); + if (fstatus) + eStatFree (fstatus); + if (templateFileNameSansExt) + eFree (templateFileNameSansExt); - for (i = 0; + for (i = 0; language == LANG_IGNORE && i < N_HINTS; i++) - { - language = fallback [i]; + { + language = fallback [i]; if (language != LANG_IGNORE) - verbose (" fallback[hint = %d]: %s\n", i, getLanguageName (language)); - } + verbose (" fallback[hint = %d]: %s\n", i, getLanguageName (language)); + } if (language == LANG_IGNORE && isLanguageEnabled (LANG_FALLBACK)) @@ -1560,7 +1623,7 @@ getFileLanguageForRequestInternal (struct GetLanguageRequest *req) verbose (" last resort: using \"%s\" parser\n", getLanguageName (LANG_FALLBACK)); } - return language; + return language; } static langType getFileLanguageForRequest (struct GetLanguageRequest *req) @@ -1572,8 +1635,8 @@ static langType getFileLanguageForRequest (struct GetLanguageRequest *req) else if (! isLanguageEnabled (l)) { error (FATAL, - "%s parser specified with --language-force is disabled", - getLanguageName (l)); + "%s parser specified with --language-force is disabled", + getLanguageName (l)); /* For suppressing warnings. */ return LANG_AUTO; } @@ -1611,14 +1674,31 @@ static void printLanguageMap (const langType language, FILE *fp) bool first = true; unsigned int i; parserObject *parser = LanguageTable + language; - stringList* map = parser->currentPatterns; + ptrArray* rexprs; + stringList* map; + Assert (0 <= language && language < (int) LanguageCount); + + rexprs = parser->currentRexprs; + for (i = 0 ; i < ptrArrayCount (rexprs) ; ++i) + { + struct rExprCode *rxcode = ptrArrayItem (rexprs, i); + const char *rxsrc = rExprCodeGetSource (rxcode); + bool iCase = rExprCodeGetICase (rxcode); + + fprintf (fp, "%s%%%s%%%s", (first ? "": " "), + rxsrc, iCase? "i": ""); + first = false; + } + + map = parser->currentPatterns; for (i = 0 ; map != NULL && i < stringListCount (map) ; ++i) { fprintf (fp, "%s(%s)", (first ? "" : " "), vStringValue (stringListItem (map, i))); first = false; } + map = parser->currentExtensions; for (i = 0 ; map != NULL && i < stringListCount (map) ; ++i) { @@ -1633,11 +1713,19 @@ extern void installLanguageMapDefault (const langType language) parserObject* parser; Assert (0 <= language && language < (int) LanguageCount); parser = LanguageTable + language; + + if (parser->currentRexprs != NULL) + rexprsDelete (parser->currentRexprs); if (parser->currentPatterns != NULL) stringListDelete (parser->currentPatterns); if (parser->currentExtensions != NULL) stringListDelete (parser->currentExtensions); + parser->currentRexprs = rexprsNew (); + if (parser->def->rexprs) + rExpressionsAddFromArray (parser->currentRexprs, + parser->def->rexprs); + if (parser->def->patterns == NULL) parser->currentPatterns = stringListNew (); else @@ -1707,6 +1795,7 @@ extern void installLanguageAliasesDefaults (void) extern void clearLanguageMap (const langType language) { Assert (0 <= language && language < (int) LanguageCount); + ptrArrayClear ((LanguageTable + language)->currentRexprs); stringListClear ((LanguageTable + language)->currentPatterns); stringListClear ((LanguageTable + language)->currentExtensions); } @@ -1799,6 +1888,59 @@ extern void addLanguageExtensionMap ( stringListAdd ((LanguageTable + language)->currentExtensions, str); } +static bool removeLanguageRexprMap1(const langType language, const char *const rexpr, bool iCase) +{ + bool result = false; + ptrArray* const rexprs = (LanguageTable + language)->currentRexprs; + + for (unsigned int i = 0; i < ptrArrayCount (rexprs); i++) + { + struct rExprCode *rxcode = ptrArrayItem (rexprs, i); + if (strcmp (rExprCodeGetSource (rxcode), rexpr) == 0 && + rExprCodeGetICase (rxcode) == iCase) + { + ptrArrayDeleteItem (rexprs, i); + verbose (" (removed from %s)", getLanguageName (language)); + result = true; + break; + } + } + return result; +} + +extern bool removeLanguageRexprMap (const langType language, const char *const rexpr, bool iCase) +{ + bool result = false; + + /* Currently, we don't provide CLI for removing rexpr from all languages + * at once. */ + Assert (language != LANG_AUTO); + + result = removeLanguageRexprMap1 (language, rexpr, iCase); + + return result; +} + +extern void addLanguageRexprMap (const langType language, const char* rexpr, bool iCase, + bool exclusiveInAllLanguages) +{ + Assert (0 <= language && language < (int) LanguageCount); + + struct rExprCode *rxcode = rExprCodeNew (rexpr, iCase); + if (rxcode) + { + if (exclusiveInAllLanguages) + removeLanguageRexprMap (LANG_AUTO, rexpr, iCase); + + parserObject* parser = LanguageTable + language; + if (!parser->currentRexprs) + parser->currentRexprs = rexprsNew (); + ptrArray* const rexprs = parser->currentRexprs; + + ptrArrayAdd (rexprs, rxcode); + } +} + extern void addLanguageAlias (const langType language, const char* alias) { vString* const str = vStringNewInit (alias); @@ -2129,6 +2271,11 @@ extern void freeParserResources (void) freeParamControlBlock (parser->paramControlBlock); + if (parser->currentRexprs != NULL) + { + rexprsDelete (parser->currentRexprs); + parser->currentRexprs = NULL; + } freeList (&parser->currentPatterns); freeList (&parser->currentExtensions); freeList (&parser->currentAliases); @@ -2475,6 +2622,7 @@ extern void processLanguageDefineOption ( initializeParsingCommon (def, false, NULL); linkDependenciesAtInitializeParsing (def); + LanguageTable [def->id].currentRexprs = rexprsNew (); LanguageTable [def->id].currentPatterns = stringListNew (); LanguageTable [def->id].currentExtensions = stringListNew (); LanguageTable [def->id].pretendingAsLanguage = LANG_IGNORE; @@ -2618,20 +2766,20 @@ static void processLangKindDefinition ( case '{': if (inLongName) error(FATAL, - "unexpected character in kind specification: \'%c\'", - c); + "unexpected character in kind specification: \'%c\'", + c); inLongName = true; break; case '}': if (!inLongName) error(FATAL, - "unexpected character in kind specification: \'%c\'", - c); + "unexpected character in kind specification: \'%c\'", + c); k = vStringValue (longName); r = enableLanguageKindForName (language, k, mode); if (! r) error (WARNING, "Unsupported kind: '%s' for --%s option", - k, option); + k, option); inLongName = false; vStringClear (longName); @@ -2644,7 +2792,7 @@ static void processLangKindDefinition ( r = enableLanguageKindForLetter (language, c, mode); if (! r) error (WARNING, "Unsupported kind: '%c' for --%s option", - c, option); + c, option); } break; } @@ -3669,7 +3817,7 @@ extern void printLanguageParams (const langType language, } static void processLangAliasOption (const langType language, - const char *const parameter) + const char *const parameter) { const char* alias; const parserObject * parser; @@ -3764,6 +3912,18 @@ static void printMaps (const langType language, langmapType type) parser = LanguageTable + language; if (! (LMAP_NO_LANG_PREFIX & type)) printf ("%-8s", parser->def->name); + if (parser->currentRexprs != NULL && (type & LMAP_REXPR)) + { + for (i = 0 ; i < ptrArrayCount (parser->currentRexprs) ; ++i) + { + struct rExprCode *rxcode = ptrArrayItem (parser->currentRexprs, + i); + vString *encodedSource = rExprCodeNewEncodedSource (rxcode); + printf (" %s", vStringValue (encodedSource)); + vStringDelete (encodedSource); + } + } + if (parser->currentPatterns != NULL && (type & LMAP_PATTERN)) for (i = 0 ; i < stringListCount (parser->currentPatterns) ; ++i) printf (" %s", vStringValue ( @@ -3783,6 +3943,8 @@ static struct colprintTable *mapColprintTableNew (langmapType type) return colprintTableNew ("L:LANGUAGE", "L:PATTERN", NULL); else if (type & LMAP_EXTENSION) return colprintTableNew ("L:LANGUAGE", "L:EXTENSION", NULL); + else if (type & LMAP_REXPR) + return colprintTableNew ("L:LANGUAGE", "L:EXPRESSION", "L:CASE", NULL); else { AssertNotReached (); @@ -3798,6 +3960,35 @@ static void mapColprintAddLanguage (struct colprintTable * table, unsigned int count; unsigned int i; + if ((type & LMAP_REXPR) + && parser->currentRexprs + && (0 < (count = ptrArrayCount (parser->currentRexprs)))) + { + for (i = 0; i < count; i++) + { + line = colprintTableGetNewLine (table); + struct rExprCode *rxcode = ptrArrayItem (parser->currentRexprs, + i); + + colprintLineAppendColumnCString (line, parser->def->name); + if ((type & LMAP_ALL) != LMAP_REXPR) + { + colprintLineAppendColumnCString (line, "rexpr"); + vString *encodedSource = rExprCodeNewEncodedSource (rxcode); + colprintLineAppendColumnVString (line, encodedSource); + vStringDelete (encodedSource); + } + else + { + const char *rxsrc = rExprCodeGetSource (rxcode); + bool iCase = rExprCodeGetICase (rxcode); + + colprintLineAppendColumnCString (line, rxsrc); + colprintLineAppendColumnCString (line, iCase? "insensitive": "sensitive"); + } + } + } + if ((type & LMAP_PATTERN) && (0 < (count = stringListCount (parser->currentPatterns)))) { for (i = 0; i < count; i++) @@ -3806,7 +3997,7 @@ static void mapColprintAddLanguage (struct colprintTable * table, vString *pattern = stringListItem (parser->currentPatterns, i); colprintLineAppendColumnCString (line, parser->def->name); - if (type & LMAP_EXTENSION) + if ((type & LMAP_ALL) != LMAP_PATTERN) colprintLineAppendColumnCString (line, "pattern"); colprintLineAppendColumnVString (line, pattern); } @@ -3820,7 +4011,7 @@ static void mapColprintAddLanguage (struct colprintTable * table, vString *extension = stringListItem (parser->currentExtensions, i); colprintLineAppendColumnCString (line, parser->def->name); - if (type & LMAP_PATTERN) + if ((type & LMAP_ALL) != LMAP_EXTENSION) colprintLineAppendColumnCString (line, "extension"); colprintLineAppendColumnVString (line, extension); } @@ -4187,7 +4378,7 @@ extern bool processFielddefOption (const char *const option, const char *const p */ static rescanReason createTagsForFile (const langType language, - const unsigned int passCount) + const unsigned int passCount) { parserDefinition *const lang = LanguageTable [language].def; rescanReason rescan = RESCAN_NONE; @@ -4230,7 +4421,7 @@ static unsigned int parserCorkFlags (parserDefinition *parser) r |= parser->useCork; if (doesLanguageExpectCorkInRegex (parser->id) - || parser->requestAutomaticFQTag) + || parser->requestAutomaticFQTag) r |= CORK_QUEUE; pushLanguage (parser->id); @@ -4842,7 +5033,7 @@ static void installTagRegexTable (const langType language) */ initRegexOptscript (); - for (i = 0; i < lang->tagRegexCount; ++i) + for (i = 0; i < lang->tagRegexCount; ++i) { if (lang->tagRegexTable [i].mline) addTagMultiLineRegex (parser->lregexControlBlock, @@ -4874,8 +5065,8 @@ static void installKeywordTable (const langType language) { for (i = 0; i < lang->keywordCount; ++i) addKeyword (lang->keywordTable [i].name, - language, - lang->keywordTable [i].id); + language, + lang->keywordTable [i].id); } } @@ -4945,7 +5136,7 @@ extern xpathFileSpec* getXpathFileSpec (const langType language, unsigned int nt } extern bool makeKindSeparatorsPseudoTags (const langType language, - const ptagDesc *pdesc) + const ptagDesc *pdesc) { parserObject* parser; parserDefinition* lang; @@ -4989,7 +5180,7 @@ extern bool makeKindSeparatorsPseudoTags (const langType language, else { upperKind = getLanguageKind (language, - sep->parentKindIndex); + sep->parentKindIndex); if (!upperKind) continue; @@ -4999,7 +5190,7 @@ extern bool makeKindSeparatorsPseudoTags (const langType language, r = writePseudoTag (pdesc, sep->separator? sep->separator: "", - name, lang->name) || r; + name, lang->name) || r; } } @@ -5013,7 +5204,7 @@ struct makeKindDescriptionPseudoTagData { }; static bool makeKindDescriptionPseudoTag (kindDefinition *kind, - void *user_data) + void *user_data) { struct makeKindDescriptionPseudoTagData *data = user_data; vString *letter_and_name; @@ -5053,7 +5244,7 @@ static bool makeRoleDescriptionPseudoTag (kindDefinition *kind, } extern bool makeKindDescriptionsPseudoTags (const langType language, - const ptagDesc *pdesc) + const ptagDesc *pdesc) { parserObject *parser; struct kindControlBlock *kcb; @@ -5452,7 +5643,7 @@ extern void printLanguageSubparsers (const langType language, #define defineSimplePrintFLagsFunction(target, flagDef) \ extern void print##target##Flags (bool withListHeader, bool machinable, FILE *fp) \ { \ - struct colprintTable * table = flagsColprintTableNew(); \ + struct colprintTable * table = flagsColprintTableNew(); \ flagsColprintAddDefinitions (table, flagDef, ARRAY_SIZE (flagDef)); \ flagsColprintTablePrint (table, withListHeader, machinable, fp); \ colprintTableDelete(table); \ @@ -5840,15 +6031,15 @@ static void createCTSTTags (void) makePromise (SELF_TEST_PARSER, lb + 1, 0, le, 0, lb + 1); break; #if defined(DEBUG) && defined(HAVE_SECCOMP) - case K_CALL_GETPPID: + case K_CALL_GETPPID: getppid(); break; #endif case K_QUIT: quit = true; break; - case K_DISABLED: - case K_ENABLED: + case K_DISABLED: + case K_ENABLED: { int role; char *name; diff --git a/main/parse.h b/main/parse.h index 8758b09069..21f8e8bd27 100644 --- a/main/parse.h +++ b/main/parse.h @@ -77,6 +77,13 @@ enum scriptHook { SCRIPT_HOOK_MAX, }; +/* --map-=[+|-|]%regular-expression%[i] */ +struct rExprSrc { + const char *expr; /* The last element must be NULL. */ + bool iCase; +}; +#define REXPR_LAST_ENTRY { .expr = NULL, } + struct sParserDefinition { /* defined by parser */ char* name; /* name of language */ @@ -104,6 +111,9 @@ struct sParserDefinition { const char *const *extensions; /* list of default extensions */ const char *const *patterns; /* list of default file name patterns */ const char *const *aliases; /* list of default aliases (alternative names) */ + const struct rExprSrc * rexprs; /* list of default file name regex patterns. + * Put REXPR_LAST_ENTRY as the last element. */ + parserInitialize initialize; /* initialization routine, if needed */ parserFinalize finalize; /* finalize routine, if needed */ simpleParser parser; /* simple parser (common case) */ diff --git a/main/parse_p.h b/main/parse_p.h index f26cd6a8af..0e52e687fa 100644 --- a/main/parse_p.h +++ b/main/parse_p.h @@ -31,11 +31,12 @@ * DATA DECLARATIONS */ typedef enum { - LMAP_PATTERN = 1 << 0, - LMAP_EXTENSION = 1 << 1, - LMAP_ALL = LMAP_PATTERN | LMAP_EXTENSION, - LMAP_TABLE_OUTPUT = 1 << 2, - LMAP_NO_LANG_PREFIX = 1 << 3, + LMAP_REXPR = 1 << 0, + LMAP_PATTERN = 1 << 1, + LMAP_EXTENSION = 1 << 2, + LMAP_ALL = LMAP_PATTERN | LMAP_EXTENSION | LMAP_REXPR, + LMAP_TABLE_OUTPUT = 1 << 3, + LMAP_NO_LANG_PREFIX = 1 << 4, } langmapType; enum parserCategory @@ -103,6 +104,9 @@ extern void addLanguageExtensionMap (const langType language, const char* extens extern bool removeLanguagePatternMap (const langType language, const char *const pattern); extern void addLanguagePatternMap (const langType language, const char* ptrn, bool exclusiveInAllLanguages); +extern bool removeLanguageRexprMap (const langType language, const char *const rexpr, bool iCase); +extern void addLanguageRexprMap (const langType language, const char* rexpr, bool iCase, + bool exclusiveInAllLanguages); extern void installLanguageAliasesDefault (const langType language); extern void installLanguageAliasesDefaults (void); diff --git a/main/rexprcode.c b/main/rexprcode.c new file mode 100644 index 0000000000..e8fe079aa5 --- /dev/null +++ b/main/rexprcode.c @@ -0,0 +1,98 @@ +/* +* Copyright (c) 2025, Red Hat, Inc. +* Copyright (c) 2025, Masatake YAMATO +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. + +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" + +#include "routines.h" +#include "rexprcode_p.h" + +#include +#include + +/* +* DATA DECLARATIONS +*/ +struct rExprCode { + regex_t *code; + char *src; + bool iCase; +}; + +/* +* FUNCTION DECLARATIONS +*/ +extern const char *rExprCodeGetSource (const struct rExprCode *rxcode) +{ + return rxcode->src; +} + +extern bool rExprCodeGetICase (const struct rExprCode *rxcode) +{ + return rxcode->iCase; +} + +extern vString *rExprCodeNewEncodedSource (const struct rExprCode *rxcode) +{ + vString *encoded_src = vStringNew(); + + vStringPut (encoded_src, '%'); + + for (const char *c = rExprCodeGetSource (rxcode); *c != '\0'; c++) + { + if (*c == '%') + vStringPut (encoded_src, '\\'); + vStringPut (encoded_src, *c); + } + + vStringPut (encoded_src, '%'); + if (rExprCodeGetICase (rxcode)) + vStringPut (encoded_src, 'i'); + + return encoded_src; +} + +extern struct rExprCode *rExprCodeNew(const char *rxsrc, bool iCase) +{ + regex_t *regex_code = xMalloc (1, regex_t); + int errcode = regcomp (regex_code, rxsrc, + REG_EXTENDED|REG_NOSUB|(iCase? REG_ICASE: 0)); + if (errcode != 0) + { + char errmsg[256]; + regerror (errcode, regex_code, errmsg, sizeof(errmsg)); + error (WARNING, "regcomp: %s", errmsg); + regfree (regex_code); + eFree (regex_code); + return NULL; + } + + struct rExprCode *rxcode = xMalloc (1, struct rExprCode); + + rxcode->code = regex_code; + rxcode->src = eStrdup (rxsrc); + rxcode->iCase = iCase; + + return rxcode; +} + +extern void rExprCodeDelete (struct rExprCode *rxcode) +{ + regfree (rxcode->code); + eFree (rxcode->code); + eFree (rxcode->src); + eFree (rxcode); +} + +extern bool rExprCodeMatch (struct rExprCode *rxcode, const char *fname) +{ + return (regexec (rxcode->code, fname, 0, 0, 0) == 0); +} diff --git a/main/rexprcode_p.h b/main/rexprcode_p.h new file mode 100644 index 0000000000..4eb421c911 --- /dev/null +++ b/main/rexprcode_p.h @@ -0,0 +1,36 @@ +/* +* Copyright (c) 2025, Red Hat, Inc. +* Copyright (c) 2025, Masatake YAMATO +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. + +*/ + +#ifndef CTAGS_MAIN_REXPRCODE_H +#define CTAGS_MAIN_REXPRCODE_H + +/* +* INCLUDE FILES +*/ +#include "general.h" + +#include "vstring.h" + +/* +* DATA DECLARATIONS +*/ + +struct rExprCode; + +/* +* FUNCTION DECLARATIONS +*/ +extern const char *rExprCodeGetSource (const struct rExprCode *rxcode); +extern vString *rExprCodeNewEncodedSource (const struct rExprCode *rxcode); +extern bool rExprCodeGetICase (const struct rExprCode *rxcode); +extern struct rExprCode *rExprCodeNew(const char *rxsrc, bool iCase); +extern void rExprCodeDelete (struct rExprCode *rxcode); +extern bool rExprCodeMatch (struct rExprCode *rxcode, const char *fname); + +#endif /* CTAGS_MAIN_REXPRCODE_H */ diff --git a/main/routines.c b/main/routines.c index 6edffba7d3..a7e752e859 100644 --- a/main/routines.c +++ b/main/routines.c @@ -657,22 +657,17 @@ extern const char *fileExtension (const char *const fileName) return extension; } -extern char* baseFilenameSansExtensionNew (const char *const fileName, - const char *const templateExt) +extern char* filenameSansExtensionNew (const char *const fileName, + const char *const templateExt) { - const char *pDelimiter; - const char *const base = baseFilename (fileName); - char* shorten_base; + Assert (templateExt); + Assert (fileName); - pDelimiter = strrstr (base, templateExt); + const char *pDelimiter = strrstr (fileName, templateExt); if (pDelimiter && (strcmp (pDelimiter, templateExt) == 0)) - { - shorten_base = eStrndup (base, pDelimiter - base); - return shorten_base; - } - else - return NULL; + return eStrndup (fileName, pDelimiter - fileName); + return NULL; } extern bool isAbsolutePath (const char *const path) diff --git a/main/routines_p.h b/main/routines_p.h index fb72aa2456..5209d07d33 100644 --- a/main/routines_p.h +++ b/main/routines_p.h @@ -97,6 +97,6 @@ extern char* absoluteDirname (char *file); extern char* relativeFilename (const char *file, const char *dir); extern MIO *tempFile (const char *const mode, char **const pName); -extern char* baseFilenameSansExtensionNew (const char *const fileName, const char *const templateExt); +extern char* filenameSansExtensionNew (const char *const fileName, const char *const templateExt); #endif /* CTAGS_MAIN_ROUTINES_PRIVATE_H */ diff --git a/man/ctags-optlib.7.rst.in b/man/ctags-optlib.7.rst.in index b32d0c7084..e68f93f7c0 100644 --- a/man/ctags-optlib.7.rst.in +++ b/man/ctags-optlib.7.rst.in @@ -31,7 +31,7 @@ readers should read ctags(1) of Universal Ctags first. Following options are for defining (or customizing) a parser: * ``--langdef=`` -* ``--map-=[+|-]|`` +* ``--map-=[+|-]||`` * ``--kinddef-=,,`` * ``--regex-=////[]`` * ``--mline-regex-=////{mgroup=}[]`` @@ -103,7 +103,7 @@ Overview for defining a parser 3. Give a file pattern or file extension for activating the parser - Use ``--map-=[+|-]|``. + Use ``--map-=[+|-]||``. 4. Define kinds diff --git a/man/ctags.1.rst.in b/man/ctags.1.rst.in index 2b6285ecdd..a7712de163 100644 --- a/man/ctags.1.rst.in +++ b/man/ctags.1.rst.in @@ -499,26 +499,71 @@ Language Selection and Mapping Options Exuberant Ctags. See ctags-incompatibilities(7) for the background of this incompatible change. -``--map-=[+|-]|`` + Unlike ``--map-`` option, you cannot specify relative-path regular + expressions to ``--langmap`` option. + +``--map-=[+|-]||`` This option provides the way to control mapping(s) of file names to languages in a more fine-grained way than ``--langmap`` option. In @CTAGS_NAME_EXECUTABLE@, more than one language can map to a - file name ** or file ** (*N:1 map*). Alternatively, - ``--langmap`` option handle only *1:1 map*, only one language - mapping to one file name ** or file **. A typical N:1 - map is seen in C++ and ObjectiveC language; both languages have - a map to ``.h`` as a file extension. - - A file extension is specified by preceding the extension with a period (e.g. ``.c``). - A file name pattern is specified by enclosing the pattern in parentheses (e.g. - ``([Mm]akefile)``). A prefixed plus ('``+``') sign is for adding, and + relative-path regular expression (**), file name **, or + file ** (*N:1 map*). Alternatively, ``--langmap`` + option handle only *1:1 map*, only one language mapping to one + file name ** or file **. A typical N:1 map is + seen in C++ and ObjectiveC language; both languages have a map to + ``.h`` as a file extension. + + A file extension is specified by preceding the extension with a period + (e.g. ``.c``). A file name pattern is specified by enclosing the pattern in + parentheses (e.g. ``([Mm]akefile)``). A relative-path regular expression is + specified by enclosing the expressions in percent signs '``%``' + (e.g. ``%include/.*\.h%``). To include a literal percent sign + inside the regular expression, escape it as ``\%``. + + A prefixed plus ('``+``') sign is for adding, and minus ('``-``') is for removing. No prefix means replacing the map of **. - Unlike ``--langmap``, ** (or **) is not a list. - ``--map-`` takes one extension (or pattern). However, - the option can be specified with different arguments multiple times - in a command line. + Unlike ``--langmap``, ``--map-`` does not take a list; ``--map-`` + takes one extension, one pattern, or one regular expression. However, the + option can be specified with different arguments multiple times in a command + line. + + For file extensions and file name patterns, the match is performed + with a base file name, a file without any directory components. + For relative-path regular expressions, the match is performed with + a relative-path incorporating the directory components. A + relative-path is relative to the directory where ctags launches. + + Assume your shell is in ``/project/x`` directory and you have the following + source tree under the directory. + + .. code-block:: + + src + └── lib + ├── data.c + └── logic.c + + If you run @CTAGS_NAME_EXECUTABLE@ with ``@CTAGS_NAME_EXECUTABLE@ -R src``, + the match is performed with ``src/lib/data.c`` and ``src/lib/logic.c`` If you + give ``--map-YourParser='%src/lib/.*\.c%'``, @CTAGS_NAME_EXECUTABLE@ + chooses ``YourParser`` parser for processing ``data.c`` and ``logic.c`` in the + tree. + + If your shell is in ``/project/x/src`` and you run + ``@CTAGS_NAME_EXECUTABLE@ -R lib``, @CTAGS_NAME_EXECUTABLE@ may not choose + ``YourParser`` because the match is performed with ``lib/data.c`` and + ``lib/logic.c``. + + A relative-path regular expression can take a flag controlling its testing. + The flag comes after the last percent sign. Currently only one available flag: + + ``{icase}`` (one-letter form '``i``') + The regular expression is to be applied in a case-insensitive + manner. (e.g. ``%include/.*\.h%i`` or ``%include/.*\.h%{icase}`` + + The relative-path regular expression is available since version 6.3.0. .. _option_tags_file_contents: @@ -1243,14 +1288,24 @@ Listing Options languages, and then exits. ``all`` is used as default value if the option argument is omitted. -``--list-maps[=(|all)]`` - Lists file name patterns and the file extensions which associate a file +``--list-map-rexprs[=(|all)]`` + Lists the relative-path regular expressions which associate a file name with a language for either the specified ** or ``all`` languages, and then exits. ``all`` is used as default value if the option argument is omitted. - To list the file extensions or file name patterns individually, use - ``--list-map-extensions`` or ``--list-map-patterns`` option. + (since version 6.3.0) + +``--list-maps[=(|all)]`` + Lists the file name patterns, the file extensions, and the relative-path + regular expressions which associate a file name with a language for either + the specified ** or ``all`` languages, and then exits. + ``all`` is used as default value if the option argument is omitted. + + To list the file extensions, file name patterns, or relative-path regular + expressions individually, use ``--list-map-extensions``, + ``--list-map-patterns``, or ``--list-map-rexprs`` option. + See the ``--langmap`` option, and "`Determining file language`_", above. This option does not work with ``--machinable`` nor @@ -1507,10 +1562,13 @@ are mapped to C++, C and ObjectiveC. These mappings can cause issues. @CTAGS_NAME_EXECUTABLE@ tries to select the proper parser for the source file by applying heuristics to its content, however it is not perfect. In case of issues one can use ``--language-force=``, -``--langmap=[,[...]]``, or the ``--map-=[+|-]|`` +``--langmap=[,[...]]``, or the ``--map-=[+|-]||`` options. (Some of the heuristics are applied whether ``--guess-language-eagerly`` is given or not.) +The order of testing is relative-path regular expressions (specified with +``--map-=``), file name patterns, then file extensions. + .. TODO: all heuristics??? To be confirmed. Heuristically guessing diff --git a/misc/optlib2c b/misc/optlib2c index aa1b3c3789..a966e143a9 100755 --- a/misc/optlib2c +++ b/misc/optlib2c @@ -293,7 +293,12 @@ my $options = die "Adding a map is allowed only to the language specified with --langdef: $1" unless ($_[0]->{'langdef'} eq $1); my $spec = $2; - if ($spec =~ /\((.*)\)/) { + if ($spec =~ /%(.+?)%(i|\{icase\})?/) { + # TODO: handle \% in the regular expression. + my $rexpr = { expr => $1, + iCase => (defined $2 && ($2 eq 'i' || $2 eq '{icase}'))? 1: 0 }; + push @{$_[0]->{'rexprs'}}, $rexpr; + } elsif ($spec =~ /\((.*)\)/) { push @{$_[0]->{'patterns'}}, $1; } elsif ($spec =~ /\.(.*)/) { push @{$_[0]->{'extensions'}}, $1; @@ -894,6 +899,31 @@ sub emit_patterns { emit_list $_[0], "patterns"; } +sub emit_rexprs { + my $opts = shift; + + return if (! @{$opts->{'rexprs'}}); + + printf <{'rexprs'}}) { + my $expr = escape_as_cstr ("$_->{'expr'}"); + my $iCase = $_->{'iCase'}? "true": "false"; + printf <enabled = ${enabled}; def->extensions = extensions; def->patterns = patterns; +EOF + if (@{$opts->{'rexprs'}}) { + print <rexprs = rexprs; +EOF + } + print <aliases = aliases; EOF if (defined $opts->{'selector'}) { @@ -1357,6 +1394,7 @@ EOF emit_extensions $opts; emit_aliases $opts; emit_patterns $opts; + emit_rexprs $opts; emit_roledefs $opts; emit_scopeseps $opts; emit_kinddefs $opts; @@ -1417,6 +1455,7 @@ sub main { disabled => 0, patterns => [], extensions => [], + rexprs => [], aliases => [], regexs => [# { regex => "", name => "", kind => "", flags => "", mline => 1|0, optscript => "" }, ], diff --git a/optlib/rpmMacros.c b/optlib/rpmMacros.c index 1d9c5a4c02..4f280a46a0 100644 --- a/optlib/rpmMacros.c +++ b/optlib/rpmMacros.c @@ -113,6 +113,14 @@ extern parserDefinition* RpmMacrosParser (void) NULL }; + static const struct rExprSrc rexprs [] = { + { + .expr = "(.*/)?macros\\.d/macros\\.([^/]+)$", + .iCase = false, + }, + REXPR_LAST_ENTRY + }; + static kindDefinition RpmMacrosKindTable [] = { { true, 'm', "macro", "macros", @@ -126,6 +134,7 @@ extern parserDefinition* RpmMacrosParser (void) def->enabled = true; def->extensions = extensions; def->patterns = patterns; + def->rexprs = rexprs; def->aliases = aliases; def->method = METHOD_NOT_CRAFTED|METHOD_REGEX; def->useCork = CORK_QUEUE; diff --git a/optlib/rpmMacros.ctags b/optlib/rpmMacros.ctags index 43a3c3f912..80dfe61d2e 100644 --- a/optlib/rpmMacros.ctags +++ b/optlib/rpmMacros.ctags @@ -17,12 +17,7 @@ # --langdef=RpmMacros -# This map is too generic. -# e.g. "macros.h" of C language input matches this pattern. -# --map-RpmMacros=+(macros.*) - -# This one is too general. -# --map-RpmMacros=+(macros) +--map-RpmMacros=+%(.*/)?macros\.d/macros\.([^/]+)$% --kinddef-RpmMacros=m,macro,macros diff --git a/source.mak b/source.mak index 9e7870639a..90c5119e36 100644 --- a/source.mak +++ b/source.mak @@ -126,6 +126,7 @@ LIB_PRIVATE_HEADS = \ main/promise_p.h \ main/ptag_p.h \ main/read_p.h \ + main/rexprcode_p.h \ main/script_p.h \ main/sort_p.h \ main/stats_p.h \ @@ -174,6 +175,7 @@ LIB_SRCS = \ main/ptag.c \ main/rbtree.c \ main/read.c \ + main/rexprcode.c \ main/script.c \ main/seccomp.c \ main/selectors.c \ diff --git a/win32/ctags_vs2013.vcxproj b/win32/ctags_vs2013.vcxproj index 8845a2f317..3dfe444edb 100644 --- a/win32/ctags_vs2013.vcxproj +++ b/win32/ctags_vs2013.vcxproj @@ -217,6 +217,7 @@ + @@ -449,6 +450,7 @@ + diff --git a/win32/ctags_vs2013.vcxproj.filters b/win32/ctags_vs2013.vcxproj.filters index 92884f5279..93b798992f 100644 --- a/win32/ctags_vs2013.vcxproj.filters +++ b/win32/ctags_vs2013.vcxproj.filters @@ -174,6 +174,9 @@ Source Files\main + + Source Files\main + Source Files\main @@ -866,6 +869,9 @@ Header Files + + Header Files + Header Files