001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.juneau.utils;
018
019import static org.apache.juneau.commons.lang.StateEnum.*;
020import static org.apache.juneau.commons.utils.CollectionUtils.*;
021import static org.apache.juneau.commons.utils.StringUtils.*;
022import static org.apache.juneau.commons.utils.Utils.*;
023
024import java.text.*;
025import java.util.*;
026import java.util.regex.*;
027
028import org.apache.juneau.commons.lang.*;
029
030/**
031 * Utility class for matching strings against string expressions.
032 *
033 * <p>
034 * Supports the following string constructs:
035 * <ul>
036 *    <li><js>"foo"</js> - Single arguments.
037 *    <li><js>"foo,bar,baz"</js> - Multiple OR'ed arguments.
038 *    <li><js>"foo | bar | bqz"</js> - Multiple OR'ed arguments, pipe syntax.
039 *    <li><js>"foo || bar || bqz"</js> - Multiple OR'ed arguments, Java-OR syntax.
040 *    <li><js>"fo*"</js> - Patterns including <js>'*'</js> and <js>'?'</js>.
041 *    <li><js>"fo* &amp; *oo"</js> - Multiple AND'ed arguments, ampersand syntax.
042 *    <li><js>"fo* &amp;&amp; *oo"</js> - Multiple AND'ed arguments, Java-AND syntax.
043 *    <li><js>"fo* || (*oo || bar)"</js> - Parenthesis.
044 * </ul>
045 *
046 * <h5 class='section'>Notes:</h5><ul>
047 *    <li class='note'>AND operations take precedence over OR operations (as expected).
048 *    <li class='note'>Whitespace is ignored.
049 *    <li class='note'><jk>null</jk> or empty expressions always match as <jk>false</jk>.
050 * </ul>
051 *
052 */
053public class StringExpressionMatcher {
054
055   static class And extends Exp {
056      Exp[] clauses;
057
058      And(List<Exp> clauses) {
059         this.clauses = clauses.toArray(new Exp[clauses.size()]);
060      }
061
062      @Override /* Overridden from Object */
063      public String toString() {
064         return "(& " + join(clauses, " ") + ')';
065      }
066
067      @Override /* Overridden from Exp */
068      void appendTokens(Set<String> set) {
069         for (var clause : clauses)
070            clause.appendTokens(set);
071      }
072
073      @Override /* Overridden from Exp */
074      boolean matches(String input) {
075         for (var e : clauses)
076            if (! e.matches(input))
077               return false;
078         return true;
079      }
080   }
081
082   static class Eq extends Exp {
083      final String operand;
084
085      Eq(String operand) {
086         this.operand = operand;
087      }
088
089      @Override /* Overridden from Object */
090      public String toString() {
091         return "[= " + operand + "]";
092      }
093
094      @Override /* Overridden from Exp */
095      void appendTokens(Set<String> set) {
096         set.add(operand);
097      }
098
099      @Override /* Overridden from Exp */
100      boolean matches(String input) {
101         return operand.equals(input);
102      }
103   }
104
105   abstract static class Exp {
106      void appendTokens(Set<String> set) {}
107
108      abstract boolean matches(String input);
109   }
110
111   static class Match extends Exp {
112      final Pattern p;
113      final String operand;
114
115      Match(String operand) {
116         this.operand = operand;
117         p = getMatchPattern(operand);
118      }
119
120      @Override /* Overridden from Object */
121      public String toString() {
122         return "[* " + p.pattern().replaceAll("\\\\[QE]", "") + "]";
123      }
124
125      @Override /* Overridden from Exp */
126      void appendTokens(Set<String> set) {
127         set.add(operand);
128      }
129
130      @Override /* Overridden from Exp */
131      boolean matches(String input) {
132         return p.matcher(input).matches();
133      }
134   }
135
136   static class Never extends Exp {
137      @Override /* Overridden from Object */
138      public String toString() {
139         return "(NEVER)";
140      }
141
142      @Override
143      boolean matches(String input) {
144         return false;
145      }
146   }
147
148   static class Or extends Exp {
149      Exp[] clauses;
150
151      Or(List<Exp> clauses) {
152         this.clauses = clauses.toArray(new Exp[clauses.size()]);
153      }
154
155      @Override /* Overridden from Object */
156      public String toString() {
157         return "(| " + join(clauses, " ") + ')';
158      }
159
160      @Override /* Overridden from Exp */
161      void appendTokens(Set<String> set) {
162         for (var clause : clauses)
163            clause.appendTokens(set);
164      }
165
166      @Override
167      boolean matches(String input) {
168         for (var e : clauses)
169            if (e.matches(input))
170               return true;
171         return false;
172      }
173   }
174
175   // @formatter:off
176   private static final AsciiSet
177      WS = AsciiSet.of(" \t"),
178      OP = AsciiSet.of(",|&"),
179      META = AsciiSet.of("*?");
180   // @formatter:on
181
182   private static Exp parseOperand(String operand) {
183      boolean hasMeta = false;
184      for (var i = 0; i < operand.length() && ! hasMeta; i++) {
185         var c = operand.charAt(i);
186         hasMeta |= META.contains(c);
187      }
188      return hasMeta ? new Match(operand) : new Eq(operand);
189   }
190
191   private final Exp exp;
192
193   /**
194    * Constructor.
195    *
196    * @param expression The string expression.
197    * @throws ParseException Malformed input encountered.
198    */
199   public StringExpressionMatcher(String expression) throws ParseException {
200      this.exp = parse(expression);
201   }
202
203   /**
204    * Returns all the tokens used in this expression.
205    *
206    * @return All the tokens used in this expression.
207    */
208   public Set<String> getTokens() {
209      var set = new TreeSet<String>();
210      exp.appendTokens(set);
211      return set;
212   }
213
214   /**
215    * Returns <jk>true</jk> if the specified string matches this expression.
216    *
217    * @param input The input string.
218    * @return
219    *    <jk>true</jk> if the specified string matches this expression.
220    *    <br>Always <jk>false</jk> if the string is <jk>null</jk>.
221    */
222   public boolean matches(String input) {
223      return nn(input) && exp.matches(input);
224   }
225
226   @Override /* Overridden from Object */
227   public String toString() {
228      return exp.toString();
229   }
230
231   private Exp parse(String expression) throws ParseException {
232      if (b(expression))
233         return new Never();
234
235      expression = expression.trim();
236
237      List<Exp> ors = list();
238      List<Exp> ands = list();
239
240      StateEnum state = S1;
241      int i = 0, mark = -1;
242      int pDepth = 0;
243      boolean error = false;
244
245      for (i = 0; i < expression.length(); i++) {
246         var c = expression.charAt(i);
247         if (state == S1) {
248            // S1 = Looking for start
249            if (! WS.contains(c)) {
250               if (c == '(') {
251                  state = S2;
252                  pDepth = 0;
253                  mark = i + 1;
254               } else if (OP.contains(c)) {
255                  error = true;
256                  break;
257               } else {
258                  state = S3;
259                  mark = i;
260               }
261            }
262         } else if (state == S2) {
263            // S2 = Found [(], looking for [)].
264            if (c == '(')
265               pDepth++;
266            if (c == ')') {
267               if (pDepth > 0)
268                  pDepth--;
269               else {
270                  ands.add(parse(expression.substring(mark, i)));
271                  mark = -1;
272                  state = S4;
273               }
274            }
275         } else if (state == S3) {
276            // S3 = Found [A], looking for end of A.
277            if (WS.contains(c) || OP.contains(c)) {
278               ands.add(parseOperand(expression.substring(mark, i)));
279               mark = -1;
280               if (WS.contains(c)) {
281                  state = S4;
282               } else {
283                  i--;
284                  state = S5;
285               }
286            }
287         } else if (state == S4) {
288            // S4 = Found [A ], looking for & or | or ,.
289            if (! WS.contains(c)) {
290               if (OP.contains(c)) {
291                  i--;
292                  state = S5;
293               } else {
294                  error = true;
295                  break;
296               }
297            }
298         } else if (state == S5) {
299            // S5 = Found & or | or ,.
300            if (c == '&') {
301               //ands.add(operand);
302               state = S6;
303            } else /* (c == '|' || c == ',') */ {
304               if (ands.size() == 1) {
305                  ors.add(ands.get(0));
306               } else {
307                  ors.add(new And(ands));
308               }
309               ands.clear();
310               if (c == '|') {
311                  state = S7;
312               } else {
313                  state = S1;
314               }
315            }
316         } else if (state == S6) {
317            // S6 = Found &, looking for & or other
318            if (! WS.contains(c)) {
319               if (c != '&')
320                  i--;
321               state = S1;
322            }
323         } else /* (state == S7) */ {
324            // S7 = Found |, looking for | or other
325            if (! WS.contains(c)) {
326               if (c != '|')
327                  i--;
328               state = S1;
329            }
330         }
331      }
332
333      if (error)
334         throw new ParseException("Invalid character in expression '" + expression + "' at position " + i + ". state=" + state, i);
335
336      if (state == S1)
337         throw new ParseException("Could not find beginning of clause in '" + expression + "'", i);
338      if (state == S2)
339         throw new ParseException("Could not find matching parenthesis in expression '" + expression + "'", i);
340      if (state == S5 || state == S6 || state == S7)
341         throw new ParseException("Dangling clause in expression '" + expression + "'", i);
342
343      if (mark != -1)
344         ands.add(parseOperand(expression.substring(mark, expression.length())));
345      if (ands.size() == 1)
346         ors.add(ands.get(0));
347      else
348         ors.add(new And(ands));
349
350      if (ors.size() == 1)
351         return ors.get(0);
352      return new Or(ors);
353   }
354}