Bump Microsoft.NET.Test.Sdk from 16.8.0 to 16.8.3
1 |
using System; |
|
2 |
using System.Collections.Generic; |
|
3 |
using System.Linq; |
|
4 |
using System.Text; |
|
5 |
|
|
6 |
namespace TurnerSoftware.RobotsExclusionTools.Tokenization.TokenParsers |
|
7 |
{
|
|
8 |
/// <summary>
|
|
9 |
/// Based on the rules defined by Google's documentation for Robots Meta Tag
|
|
10 |
/// https://developers.google.com/search/reference/robots_meta_tag
|
|
11 |
/// </summary>
|
|
12 |
public class RobotsPageTokenParser : IRobotsPageTokenParser |
|
13 |
{
|
|
14 |
private class PageAccessParseState |
|
15 |
{
|
|
16 | 1 |
public string UserAgent { get; set; } |
17 | 1 |
public string Field { get; set; } |
18 | 1 |
public List<string> Values { get; } = new List<string>(); |
19 |
}
|
|
20 |
|
|
21 |
public IEnumerable<PageAccessEntry> GetPageAccessEntries(IEnumerable<Token> tokens) |
|
22 | 1 |
{
|
23 | 1 |
var processedStates = new List<PageAccessParseState>(); |
24 | 1 |
var parseState = new PageAccessParseState(); |
25 | 1 |
var moveTokens = new[] { TokenType.Field, TokenType.Value, TokenType.NewLine }; |
26 |
|
|
27 | 1 |
using (var enumerator = tokens.GetEnumerator()) |
28 | 1 |
{
|
29 |
while (enumerator.MoveTo(moveTokens)) |
|
30 | 1 |
{
|
31 | 1 |
var current = enumerator.Current; |
32 |
|
|
33 |
if (current.TokenType == TokenType.NewLine) |
|
34 | 1 |
{
|
35 | 1 |
processedStates.Add(parseState); |
36 | 1 |
parseState = new PageAccessParseState(); |
37 | 1 |
}
|
38 |
else if (current.TokenType == TokenType.Field) |
|
39 | 1 |
{
|
40 |
if (current.Value.Equals("unavailable_after", StringComparison.InvariantCultureIgnoreCase)) |
|
41 | 1 |
{
|
42 | 1 |
parseState.Field = current.Value; |
43 | 1 |
}
|
44 |
else
|
|
45 | 1 |
{
|
46 | 1 |
parseState.UserAgent = current.Value; |
47 | 1 |
}
|
48 | 1 |
}
|
49 |
else
|
|
50 | 1 |
{
|
51 |
if (current.Value.Equals("none", StringComparison.InvariantCultureIgnoreCase)) |
|
52 | 1 |
{
|
53 | 1 |
parseState.Values.Add("nofollow"); |
54 | 1 |
parseState.Values.Add("noindex"); |
55 | 1 |
}
|
56 |
else
|
|
57 | 1 |
{
|
58 | 1 |
parseState.Values.Add(current.Value); |
59 | 1 |
}
|
60 | 1 |
}
|
61 | 1 |
}
|
62 |
|
|
63 | 1 |
processedStates.Add(parseState); |
64 | 1 |
}
|
65 |
|
|
66 |
PageAccessRule[] ConvertToRules(IEnumerable<PageAccessParseState> userAgentStates) |
|
67 | 1 |
{
|
68 |
return userAgentStates.SelectMany(s => s.Values.Select(v => new PageAccessRule |
|
69 |
{
|
|
70 |
//Everything is a field (noindex, nofollow etc)
|
|
71 |
RuleName = s.Field ?? v, |
|
72 |
//Only "unavailable_after" has a value
|
|
73 |
RuleValue = s.Field == null ? null : v |
|
74 |
}))
|
|
75 |
//Squish multiple of the same-name rules together
|
|
76 |
.GroupBy(r => r.RuleName, StringComparer.InvariantCultureIgnoreCase) |
|
77 |
.Select(rg => rg.LastOrDefault()) |
|
78 |
.ToArray(); |
|
79 | 1 |
}
|
80 |
|
|
81 |
var globalRules = processedStates.Where(s => s.UserAgent == null).ToArray(); |
|
82 |
|
|
83 |
var result = processedStates |
|
84 |
//Merge variations of User Agent definitions (case insensitive)
|
|
85 |
.GroupBy(s => s.UserAgent, StringComparer.InvariantCultureIgnoreCase) |
|
86 |
.Select(g => |
|
87 |
new PageAccessEntry |
|
88 |
{
|
|
89 |
UserAgent = g.Key ?? "*", |
|
90 |
Rules = ConvertToRules(g.Key == null ? g : globalRules.Concat(g)) |
|
91 |
}
|
|
92 |
).ToArray(); |
|
93 |
|
|
94 | 1 |
return result; |
95 | 1 |
}
|
96 |
}
|
|
97 |
}
|
Read our documentation on viewing source code .