001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.juneau.objecttools;
018
019import static org.apache.juneau.commons.utils.StringUtils.*;
020
021import java.util.*;
022import java.util.regex.*;
023
024import org.apache.juneau.*;
025import org.apache.juneau.commons.lang.*;
026import org.apache.juneau.commons.utils.*;
027
028/**
029 * String matcher factory for the {@link ObjectSearcher} class.
030 *
031 * <p>
032 *    The class provides searching based on the following patterns:
033 * </p>
034 * <ul>
035 *    <li><js>"property=foo"</js> - Simple full word match
036 *    <li><js>"property=fo*"</js>, <js>"property=?ar"</js> - Meta-character matching
037 *    <li><js>"property=foo bar"</js>(implicit), <js>"property=^foo ^bar"</js>(explicit) - Multiple OR'ed patterns
038 *    <li><js>"property=+fo* +*ar"</js> - Multiple AND'ed patterns
039 *    <li><js>"property=fo* -bar"</js> - Negative patterns
040 *    <li><js>"property='foo bar'"</js> - Patterns with whitespace
041 *    <li><js>"property=foo\\'bar"</js> - Patterns with single-quotes
042 *    <li><js>"property=/foo\\s+bar"</js> - Regular expression match
043 * </ul>
044 *
045 * <h5 class='section'>See Also:</h5><ul>
046 *    <li class='link'><a class="doclink" href="https://juneau.apache.org/docs/topics/ObjectTools">Object Tools</a>
047 * </ul>
048 */
049public class StringMatcherFactory extends MatcherFactory {
050
051   /**
052    * A construct representing a single search pattern.
053    */
054   private static class StringMatcher extends AbstractMatcher {
055      // @formatter:off
056      private static final AsciiSet
057         META_CHARS = AsciiSet.of("*?'\""),
058         SQ_CHAR = AsciiSet.of("'"),
059         DQ_CHAR = AsciiSet.of("\""),
060         REGEX_CHARS = AsciiSet.of("+\\[]{}()^$.");
061      // @formatter:on
062      private String pattern;
063
064      Pattern[] orPatterns, andPatterns, notPatterns;
065
066      public StringMatcher(String searchPattern) {
067
068         this.pattern = searchPattern.trim();
069         var ors = new LinkedList<Pattern>();
070         var ands = new LinkedList<Pattern>();
071         var nots = new LinkedList<Pattern>();
072
073         for (var s : StringUtils.splitQuoted(pattern, true)) {
074            char c0 = s.charAt(0), c9 = s.charAt(s.length() - 1);
075
076            if (c0 == '/' && c9 == '/' && s.length() > 1) {
077               ands.add(Pattern.compile(strip(s)));
078            } else {
079               char prefix = '^';
080               boolean ignoreCase = false;
081               if (s.length() > 1 && (c0 == '^' || c0 == '+' || c0 == '-')) {
082                  prefix = c0;
083                  s = s.substring(1);
084                  c0 = s.charAt(0);
085               }
086
087               if (c0 == '\'') {
088                  s = unescapeChars(strip(s), SQ_CHAR);
089                  ignoreCase = true;
090               } else if (c0 == '"') {
091                  s = unescapeChars(strip(s), DQ_CHAR);
092               }
093
094               if (REGEX_CHARS.contains(s) || META_CHARS.contains(s)) {
095                  var sb = new StringBuilder();
096                  boolean isInEscape = false;
097                  for (var i = 0; i < s.length(); i++) {
098                     var c = s.charAt(i);
099                     if (isInEscape) {
100                        if (c == '?' || c == '*' || c == '\\')
101                           sb.append('\\').append(c);
102                        else
103                           sb.append(c);
104                        isInEscape = false;
105                     } else {
106                        if (c == '\\')
107                           isInEscape = true;
108                        else if (c == '?')
109                           sb.append(".?");
110                        else if (c == '*')
111                           sb.append(".*");
112                        else if (REGEX_CHARS.contains(c))
113                           sb.append("\\").append(c);
114                        else
115                           sb.append(c);
116                     }
117                  }
118                  s = sb.toString();
119               }
120
121               int flags = Pattern.DOTALL;
122               if (ignoreCase)
123                  flags |= Pattern.CASE_INSENSITIVE;
124
125               var p = Pattern.compile(s, flags);
126
127               if (prefix == '-')
128                  nots.add(p);
129               else if (prefix == '+')
130                  ands.add(p);
131               else
132                  ors.add(p);
133            }
134         }
135         orPatterns = ors.toArray(new Pattern[ors.size()]);
136         andPatterns = ands.toArray(new Pattern[ands.size()]);
137         notPatterns = nots.toArray(new Pattern[nots.size()]);
138      }
139
140      @Override
141      public boolean matches(ClassMeta<?> cm, Object o) {
142         var s = (String)o;
143         for (var andPattern : andPatterns)
144            if (! andPattern.matcher(s).matches())
145               return false;
146         for (var notPattern : notPatterns)
147            if (notPattern.matcher(s).matches())
148               return false;
149         for (var orPattern : orPatterns)
150            if (orPattern.matcher(s).matches())
151               return true;
152         return orPatterns.length == 0;
153      }
154
155      @Override
156      public String toString() {
157         return pattern;
158      }
159   }
160
161   /**
162    * Default reusable matcher.
163    */
164   public static final StringMatcherFactory DEFAULT = new StringMatcherFactory();
165
166   @Override
167   public boolean canMatch(ClassMeta<?> cm) {
168      return true;
169   }
170
171   @Override
172   public AbstractMatcher create(String pattern) {
173      return new StringMatcher(pattern);
174   }
175}