1
using System;
2
using System.Collections.Generic;
3
using System.Linq;
4
using System.Text;
5

6
namespace TurnerSoftware.RobotsExclusionTools.Tokenization.TokenParsers
7
{
8
	/// <summary>
9
	/// Based on the rules defined by Google's documentation for Robots Meta Tag
10
	/// https://developers.google.com/search/reference/robots_meta_tag
11
	/// </summary>
12
	public class RobotsPageTokenParser : IRobotsPageTokenParser
13
	{
14
		private class PageAccessParseState
15
		{
16 1
			public string UserAgent { get; set; }
17 1
			public string Field { get; set; }
18 1
			public List<string> Values { get; } = new List<string>();
19
		}
20
		
21
		public IEnumerable<PageAccessEntry> GetPageAccessEntries(IEnumerable<Token> tokens)
22 1
		{
23 1
			var processedStates = new List<PageAccessParseState>();
24 1
			var parseState = new PageAccessParseState();
25 1
			var moveTokens = new[] { TokenType.Field, TokenType.Value, TokenType.NewLine };
26
			
27 1
			using (var enumerator = tokens.GetEnumerator())
28 1
			{
29 1
				while (enumerator.MoveTo(moveTokens))
30 1
				{
31 1
					var current = enumerator.Current;
32
					
33 1
					if (current.TokenType == TokenType.NewLine)
34 1
					{
35 1
						processedStates.Add(parseState);
36 1
						parseState = new PageAccessParseState();
37 1
					}
38 1
					else if (current.TokenType == TokenType.Field)
39 1
					{
40 1
						if (current.Value.Equals("unavailable_after", StringComparison.InvariantCultureIgnoreCase))
41 1
						{
42 1
							parseState.Field = current.Value;
43 1
						}
44
						else
45 1
						{
46 1
							parseState.UserAgent = current.Value;
47 1
						}
48 1
					}
49
					else
50 1
					{
51 1
						if (current.Value.Equals("none", StringComparison.InvariantCultureIgnoreCase))
52 1
						{
53 1
							parseState.Values.Add("nofollow");
54 1
							parseState.Values.Add("noindex");
55 1
						}
56
						else
57 1
						{
58 1
							parseState.Values.Add(current.Value);
59 1
						}
60 1
					}
61 1
				}
62

63 1
				processedStates.Add(parseState);
64 1
			}
65
			
66
			PageAccessRule[] ConvertToRules(IEnumerable<PageAccessParseState> userAgentStates)
67 1
			{
68 1
				return userAgentStates.SelectMany(s => s.Values.Select(v => new PageAccessRule
69 1
				{
70 1
					//Everything is a field (noindex, nofollow etc)
71 1
					RuleName = s.Field ?? v,
72 1
					//Only "unavailable_after" has a value
73 1
					RuleValue = s.Field == null ? null : v
74 1
				}))
75 1
				//Squish multiple of the same-name rules together
76 1
				.GroupBy(r => r.RuleName, StringComparer.InvariantCultureIgnoreCase)
77 1
				.Select(rg => rg.LastOrDefault())
78 1
				.ToArray();
79 1
			}
80

81 1
			var globalRules = processedStates.Where(s => s.UserAgent == null).ToArray();
82

83 1
			var result = processedStates
84 1
				//Merge variations of User Agent definitions (case insensitive)
85 1
				.GroupBy(s => s.UserAgent, StringComparer.InvariantCultureIgnoreCase)
86 1
				.Select(g =>
87 1
					new PageAccessEntry
88 1
					{
89 1
						UserAgent = g.Key ?? "*",
90 1
						Rules = ConvertToRules(g.Key == null ? g : globalRules.Concat(g))
91 1
					}
92 1
				).ToArray();
93

94 1
			return result;
95 1
		}
96
	}
97
}

Read our documentation on viewing source code .

Loading