FileNameMatcher.java

  1. /*
  2.  * Copyright (C) 2008, Florian Köberle <florianskarten@web.de> and others
  3.  *
  4.  * This program and the accompanying materials are made available under the
  5.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  6.  * https://www.eclipse.org/org/documents/edl-v10.php.
  7.  *
  8.  * SPDX-License-Identifier: BSD-3-Clause
  9.  */

  10. package org.eclipse.jgit.fnmatch;

  11. import java.util.ArrayList;
  12. import java.util.Collections;
  13. import java.util.List;
  14. import java.util.ListIterator;
  15. import java.util.regex.Matcher;
  16. import java.util.regex.Pattern;

  17. import org.eclipse.jgit.errors.InvalidPatternException;
  18. import org.eclipse.jgit.errors.NoClosingBracketException;

  19. /**
  20.  * This class can be used to match filenames against fnmatch like patterns. It
  21.  * is not thread save.
  22.  * <p>
  23.  * Supported are the wildcard characters * and ? and groups with:
  24.  * <ul>
  25.  * <li>characters e.g. [abc]</li>
  26.  * <li>ranges e.g. [a-z]</li>
  27.  * <li>the following character classes
  28.  * <ul>
  29.  * <li>[:alnum:]</li>
  30.  * <li>[:alpha:]</li>
  31.  * <li>[:blank:]</li>
  32.  * <li>[:cntrl:]</li>
  33.  * <li>[:digit:]</li>
  34.  * <li>[:graph:]</li>
  35.  * <li>[:lower:]</li>
  36.  * <li>[:print:]</li>
  37.  * <li>[:punct:]</li>
  38.  * <li>[:space:]</li>
  39.  * <li>[:upper:]</li>
  40.  * <li>[:word:]</li>
  41.  * <li>[:xdigit:]</li>
  42.  * </ul>
  43.  * e. g. [[:xdigit:]]</li>
  44.  * </ul>
  45.  * Any character can be escaped by prepending it with a \
  46.  */
  47. public class FileNameMatcher {
  48.     static final List<Head> EMPTY_HEAD_LIST = Collections.emptyList();

  49.     private static final Pattern characterClassStartPattern = Pattern
  50.             .compile("\\[[.:=]"); //$NON-NLS-1$

  51.     private List<Head> headsStartValue;

  52.     private List<Head> heads;

  53.     /**
  54.      * {{@link #extendStringToMatchByOneCharacter(char)} needs a list for the
  55.      * new heads, allocating a new array would be bad for the performance, as
  56.      * the method gets called very often.
  57.      *
  58.      */
  59.     private List<Head> listForLocalUseage;

  60.     /**
  61.      *
  62.      * @param headsStartValue
  63.      *            must be a list which will never be modified.
  64.      */
  65.     private FileNameMatcher(List<Head> headsStartValue) {
  66.         this(headsStartValue, headsStartValue);
  67.     }

  68.     /**
  69.      *
  70.      * @param headsStartValue
  71.      *            must be a list which will never be modified.
  72.      * @param heads
  73.      *            a list which will be cloned and then used as current head
  74.      *            list.
  75.      */
  76.     private FileNameMatcher(final List<Head> headsStartValue,
  77.             final List<Head> heads) {
  78.         this.headsStartValue = headsStartValue;
  79.         this.heads = new ArrayList<>(heads.size());
  80.         this.heads.addAll(heads);
  81.         this.listForLocalUseage = new ArrayList<>(heads.size());
  82.     }

  83.     /**
  84.      * Constructor for FileNameMatcher
  85.      *
  86.      * @param patternString
  87.      *            must contain a pattern which fnmatch would accept.
  88.      * @param invalidWildgetCharacter
  89.      *            if this parameter isn't null then this character will not
  90.      *            match at wildcards(* and ? are wildcards).
  91.      * @throws org.eclipse.jgit.errors.InvalidPatternException
  92.      *             if the patternString contains a invalid fnmatch pattern.
  93.      */
  94.     public FileNameMatcher(final String patternString,
  95.             final Character invalidWildgetCharacter)
  96.             throws InvalidPatternException {
  97.         this(createHeadsStartValues(patternString, invalidWildgetCharacter));
  98.     }

  99.     /**
  100.      * A Copy Constructor which creates a new
  101.      * {@link org.eclipse.jgit.fnmatch.FileNameMatcher} with the same state and
  102.      * reset point like <code>other</code>.
  103.      *
  104.      * @param other
  105.      *            another {@link org.eclipse.jgit.fnmatch.FileNameMatcher}
  106.      *            instance.
  107.      */
  108.     public FileNameMatcher(FileNameMatcher other) {
  109.         this(other.headsStartValue, other.heads);
  110.     }

  111.     private static List<Head> createHeadsStartValues(
  112.             final String patternString, final Character invalidWildgetCharacter)
  113.             throws InvalidPatternException {

  114.         final List<AbstractHead> allHeads = parseHeads(patternString,
  115.                 invalidWildgetCharacter);

  116.         List<Head> nextHeadsSuggestion = new ArrayList<>(2);
  117.         nextHeadsSuggestion.add(LastHead.INSTANCE);
  118.         for (int i = allHeads.size() - 1; i >= 0; i--) {
  119.             final AbstractHead head = allHeads.get(i);

  120.             // explanation:
  121.             // a and * of the pattern "a*b"
  122.             // need *b as newHeads
  123.             // that's why * extends the list for it self and it's left neighbor.
  124.             if (head.isStar()) {
  125.                 nextHeadsSuggestion.add(head);
  126.                 head.setNewHeads(nextHeadsSuggestion);
  127.             } else {
  128.                 head.setNewHeads(nextHeadsSuggestion);
  129.                 nextHeadsSuggestion = new ArrayList<>(2);
  130.                 nextHeadsSuggestion.add(head);
  131.             }
  132.         }
  133.         return nextHeadsSuggestion;
  134.     }

  135.     private static int findGroupEnd(final int indexOfStartBracket,
  136.             final String pattern) throws InvalidPatternException {
  137.         int firstValidCharClassIndex = indexOfStartBracket + 1;
  138.         int firstValidEndBracketIndex = indexOfStartBracket + 2;

  139.         if (indexOfStartBracket + 1 >= pattern.length())
  140.             throw new NoClosingBracketException(indexOfStartBracket, "[", "]", //$NON-NLS-1$ //$NON-NLS-2$
  141.                     pattern);

  142.         if (pattern.charAt(firstValidCharClassIndex) == '!') {
  143.             firstValidCharClassIndex++;
  144.             firstValidEndBracketIndex++;
  145.         }

  146.         final Matcher charClassStartMatcher = characterClassStartPattern
  147.                 .matcher(pattern);

  148.         int groupEnd = -1;
  149.         while (groupEnd == -1) {

  150.             final int possibleGroupEnd = indexOfUnescaped(pattern, ']',
  151.                     firstValidEndBracketIndex);
  152.             if (possibleGroupEnd == -1)
  153.                 throw new NoClosingBracketException(indexOfStartBracket, "[", //$NON-NLS-1$
  154.                         "]", pattern); //$NON-NLS-1$

  155.             final boolean foundCharClass = charClassStartMatcher
  156.                     .find(firstValidCharClassIndex);

  157.             if (foundCharClass
  158.                     && charClassStartMatcher.start() < possibleGroupEnd) {

  159.                 final String classStart = charClassStartMatcher.group(0);
  160.                 final String classEnd = classStart.charAt(1) + "]"; //$NON-NLS-1$

  161.                 final int classStartIndex = charClassStartMatcher.start();
  162.                 final int classEndIndex = pattern.indexOf(classEnd,
  163.                         classStartIndex + 2);

  164.                 if (classEndIndex == -1)
  165.                     throw new NoClosingBracketException(classStartIndex,
  166.                             classStart, classEnd, pattern);

  167.                 firstValidCharClassIndex = classEndIndex + 2;
  168.                 firstValidEndBracketIndex = firstValidCharClassIndex;
  169.             } else {
  170.                 groupEnd = possibleGroupEnd;
  171.             }
  172.         }
  173.         return groupEnd;
  174.     }

  175.     private static List<AbstractHead> parseHeads(final String pattern,
  176.             final Character invalidWildgetCharacter)
  177.             throws InvalidPatternException {

  178.         int currentIndex = 0;
  179.         List<AbstractHead> heads = new ArrayList<>();
  180.         while (currentIndex < pattern.length()) {
  181.             final int groupStart = indexOfUnescaped(pattern, '[', currentIndex);
  182.             if (groupStart == -1) {
  183.                 final String patternPart = pattern.substring(currentIndex);
  184.                 heads.addAll(createSimpleHeads(patternPart,
  185.                         invalidWildgetCharacter));
  186.                 currentIndex = pattern.length();
  187.             } else {
  188.                 final String patternPart = pattern.substring(currentIndex,
  189.                         groupStart);
  190.                 heads.addAll(createSimpleHeads(patternPart,
  191.                         invalidWildgetCharacter));

  192.                 final int groupEnd = findGroupEnd(groupStart, pattern);
  193.                 final String groupPart = pattern.substring(groupStart + 1,
  194.                         groupEnd);
  195.                 heads.add(new GroupHead(groupPart, pattern));
  196.                 currentIndex = groupEnd + 1;
  197.             }
  198.         }
  199.         return heads;
  200.     }

  201.     private static List<AbstractHead> createSimpleHeads(
  202.             final String patternPart, final Character invalidWildgetCharacter) {
  203.         final List<AbstractHead> heads = new ArrayList<>(
  204.                 patternPart.length());

  205.         boolean escaped = false;
  206.         for (int i = 0; i < patternPart.length(); i++) {
  207.             final char c = patternPart.charAt(i);
  208.             if (escaped) {
  209.                 final CharacterHead head = new CharacterHead(c);
  210.                 heads.add(head);
  211.                 escaped = false;
  212.             } else {
  213.                 switch (c) {
  214.                 case '*': {
  215.                     final AbstractHead head = createWildCardHead(
  216.                             invalidWildgetCharacter, true);
  217.                     heads.add(head);
  218.                     break;
  219.                 }
  220.                 case '?': {
  221.                     final AbstractHead head = createWildCardHead(
  222.                             invalidWildgetCharacter, false);
  223.                     heads.add(head);
  224.                     break;
  225.                 }
  226.                 case '\\':
  227.                     escaped = true;
  228.                     break;
  229.                 default:
  230.                     final CharacterHead head = new CharacterHead(c);
  231.                     heads.add(head);
  232.                 }
  233.             }
  234.         }
  235.         return heads;
  236.     }

  237.     private static AbstractHead createWildCardHead(
  238.             final Character invalidWildgetCharacter, final boolean star) {
  239.         if (invalidWildgetCharacter != null) {
  240.             return new RestrictedWildCardHead(invalidWildgetCharacter
  241.                     .charValue(), star);
  242.         }
  243.         return new WildCardHead(star);
  244.     }

  245.     /**
  246.      * @param c new character to append
  247.      * @return true to continue, false if the matcher can stop appending
  248.      */
  249.     private boolean extendStringToMatchByOneCharacter(char c) {
  250.         final List<Head> newHeads = listForLocalUseage;
  251.         newHeads.clear();
  252.         List<Head> lastAddedHeads = null;
  253.         for (int i = 0; i < heads.size(); i++) {
  254.             final Head head = heads.get(i);
  255.             final List<Head> headsToAdd = head.getNextHeads(c);
  256.             // Why the next performance optimization isn't wrong:
  257.             // Some times two heads return the very same list.
  258.             // We save future effort if we don't add these heads again.
  259.             // This is the case with the heads "a" and "*" of "a*b" which
  260.             // both can return the list ["*","b"]
  261.             if (headsToAdd != lastAddedHeads) {
  262.                 if (!headsToAdd.isEmpty())
  263.                     newHeads.addAll(headsToAdd);
  264.                 lastAddedHeads = headsToAdd;
  265.             }
  266.         }
  267.         listForLocalUseage = heads;
  268.         heads = newHeads;
  269.         return !newHeads.isEmpty();
  270.     }

  271.     private static int indexOfUnescaped(final String searchString,
  272.             final char ch, final int fromIndex) {
  273.         for (int i = fromIndex; i < searchString.length(); i++) {
  274.             char current = searchString.charAt(i);
  275.             if (current == ch)
  276.                 return i;
  277.             if (current == '\\')
  278.                 i++; // Skip the next char as it is escaped }
  279.         }
  280.         return -1;
  281.     }

  282.     /**
  283.      * Append to the string which is matched against the patterns of this class
  284.      *
  285.      * @param stringToMatch
  286.      *            extends the string which is matched against the patterns of
  287.      *            this class.
  288.      */
  289.     public void append(String stringToMatch) {
  290.         for (int i = 0; i < stringToMatch.length(); i++) {
  291.             final char c = stringToMatch.charAt(i);
  292.             if (!extendStringToMatchByOneCharacter(c))
  293.                 break;
  294.         }
  295.     }

  296.     /**
  297.      * Resets this matcher to it's state right after construction.
  298.      */
  299.     public void reset() {
  300.         heads.clear();
  301.         heads.addAll(headsStartValue);
  302.     }

  303.     /**
  304.      * Create a {@link org.eclipse.jgit.fnmatch.FileNameMatcher} instance which
  305.      * uses the same pattern like this matcher, but has the current state of
  306.      * this matcher as reset and start point
  307.      *
  308.      * @return a {@link org.eclipse.jgit.fnmatch.FileNameMatcher} instance which
  309.      *         uses the same pattern like this matcher, but has the current
  310.      *         state of this matcher as reset and start point.
  311.      */
  312.     public FileNameMatcher createMatcherForSuffix() {
  313.         final List<Head> copyOfHeads = new ArrayList<>(heads.size());
  314.         copyOfHeads.addAll(heads);
  315.         return new FileNameMatcher(copyOfHeads);
  316.     }

  317.     /**
  318.      * Whether the matcher matches
  319.      *
  320.      * @return whether the matcher matches
  321.      */
  322.     public boolean isMatch() {
  323.         if (heads.isEmpty())
  324.             return false;

  325.         final ListIterator<Head> headIterator = heads
  326.                 .listIterator(heads.size());
  327.         while (headIterator.hasPrevious()) {
  328.             final Head head = headIterator.previous();
  329.             if (head == LastHead.INSTANCE) {
  330.                 return true;
  331.             }
  332.         }
  333.         return false;
  334.     }

  335.     /**
  336.      * Whether a match can be appended
  337.      *
  338.      * @return a boolean.
  339.      */
  340.     public boolean canAppendMatch() {
  341.         for (Head head : heads) {
  342.             if (head != LastHead.INSTANCE) {
  343.                 return true;
  344.             }
  345.         }
  346.         return false;
  347.     }
  348. }