1
using TurnerSoftware.RobotsExclusionTools.Tokenization;
2
using TurnerSoftware.RobotsExclusionTools.Tokenization.Tokenizers;
3
using TurnerSoftware.RobotsExclusionTools.Tokenization.TokenParsers;
4
using TurnerSoftware.RobotsExclusionTools.Tokenization.Validators;
5
using System;
6
using System.IO;
7
using System.Threading.Tasks;
8
using System.Collections.Generic;
9
using System.Net.Http;
10
using System.Net;
11
using System.Threading;
12

13
namespace TurnerSoftware.RobotsExclusionTools
14
{
15
	public class RobotsFileParser : IRobotsFileParser
16
	{
17 1
		private ITokenizer Tokenizer { get; }
18 1
		private ITokenPatternValidator PatternValidator { get; }
19 1
		private IRobotsFileTokenParser TokenParser { get; }
20

21 1
		private HttpClient HttpClient { get; }
22

23 1
		public RobotsFileParser() : this(new HttpClient()) { }
24

25 1
		public RobotsFileParser(HttpClient client) : this(client, new RobotsFileTokenizer(), new RobotsFileTokenPatternValidator(), new RobotsEntryTokenParser()) { }
26

27 1
		public RobotsFileParser(HttpClient client, ITokenizer tokenizer, ITokenPatternValidator patternValidator, IRobotsFileTokenParser tokenParser)
28 1
		{
29 1
			HttpClient = client ?? throw new ArgumentNullException(nameof(client));
30 1
			Tokenizer = tokenizer ?? throw new ArgumentNullException(nameof(tokenizer));
31 1
			PatternValidator = patternValidator ?? throw new ArgumentNullException(nameof(patternValidator));
32 1
			TokenParser = tokenParser ?? throw new ArgumentNullException(nameof(tokenParser));
33 1
		}
34
		
35
		public RobotsFile FromString(string robotsText, Uri baseUri)
36 1
		{
37 1
			using (var memoryStream = new MemoryStream())
38 1
			{
39 1
				var streamWriter = new StreamWriter(memoryStream);
40 1
				streamWriter.Write(robotsText);
41 1
				streamWriter.Flush();
42

43 1
				memoryStream.Seek(0, SeekOrigin.Begin);
44
				
45 1
				var streamReader = new StreamReader(memoryStream);
46 1
				var tokens = Tokenizer.Tokenize(streamReader);
47 1
				return FromTokens(tokens, baseUri);
48
			}
49 1
		}
50

51
		public async Task<RobotsFile> FromUriAsync(Uri robotsUri, CancellationToken cancellationToken = default)
52 1
		{
53 1
			var baseUri = new Uri(robotsUri.GetLeftPart(UriPartial.Authority));
54 1
			robotsUri = new UriBuilder(robotsUri) { Path = "/robots.txt" }.Uri;
55
			
56 1
			using (var response = await HttpClient.GetAsync(robotsUri, cancellationToken))
57 1
			{
58 1
				cancellationToken.ThrowIfCancellationRequested(); // '.NET Framework' and '.NET Core 2.1' workaround
59 1
				if (response.StatusCode == HttpStatusCode.NotFound)
60 1
				{
61 1
					return RobotsFile.AllowAllRobots(baseUri);
62
				}
63 1
				else if (response.StatusCode == HttpStatusCode.Unauthorized || response.StatusCode == HttpStatusCode.Forbidden)
64 1
				{
65 1
					return RobotsFile.DenyAllRobots(baseUri);
66
				}
67 1
				else if ((int)response.StatusCode >= 200 && (int)response.StatusCode < 300)
68 1
				{
69 1
					using (var stream = await response.Content.ReadAsStreamAsync())
70 1
					{
71 1
						cancellationToken.ThrowIfCancellationRequested();
72 1
						return await FromStreamAsync(stream, baseUri, cancellationToken);
73
					}
74
				}
75 1
			}
76

77 1
			return RobotsFile.AllowAllRobots(baseUri);
78 1
		}
79

80
		public async Task<RobotsFile> FromStreamAsync(Stream stream, Uri baseUri, CancellationToken cancellationToken = default)
81 1
		{
82 1
			var streamReader = new StreamReader(stream);
83 1
			var tokens = await Tokenizer.TokenizeAsync(streamReader, cancellationToken);
84 1
			return FromTokens(tokens, baseUri);
85 1
		}
86

87
		private RobotsFile FromTokens(IEnumerable<Token> tokens, Uri baseUri)
88 1
		{
89 1
			var validationResult = PatternValidator.Validate(tokens);
90

91 1
			if (validationResult.IsValid)
92 1
			{
93 1
				return new RobotsFile(baseUri)
94 1
				{
95 1
					SiteAccessEntries = TokenParser.GetSiteAccessEntries(tokens),
96 1
					SitemapEntries = TokenParser.GetSitemapUrlEntries(tokens)
97 1
				};
98
			}
99

100 0
			return RobotsFile.AllowAllRobots(baseUri);
101 1
		}
102
	}
103
}

Read our documentation on viewing source code .

Loading