From bd8a63c98c155f589e360ffa0718453d9b8cd9b7 Mon Sep 17 00:00:00 2001 From: Ehssan Doust Date: Fri, 5 Jan 2018 00:39:38 +0100 Subject: [PATCH 1/3] Migrated from PCL to .NET Standard libraries for compatibility reasons - NuGet package that is automatically built needs to be edited manually after creation - Changes not thoroughly tested, some test cases fail (but referenced websites seem to be outdated / not as they were when writing the tests anyway) --- .gitignore | 1 + .../NReadability/NReadability.csproj | 109 -------- .../NReadability/Properties/AssemblyInfo.cs | 30 --- PortablePorts/SgmlReader/HTMLspecial.ent | 77 ------ PortablePorts/SgmlReader/HTMLsymbol.ent | 241 ------------------ .../SgmlReader/Properties/AssemblyInfo.cs | 30 --- PortablePorts/SgmlReader/SgmlReader.csproj | 84 ------ PortablePorts/SgmlReader/htmllat1.ent | 194 -------------- ReadSharp.Tests/ReadSharp.Tests.csproj | 76 +++--- ReadSharp.Tests/ReadTests.cs | 4 +- ReadSharp.Tests/app.config | 8 +- ReadSharp.Tests/packages.config | 25 +- ReadSharp.sln | 47 ++-- ReadSharp/Properties/AssemblyInfo.cs | 26 -- ReadSharp/ReadSharp.csproj | 155 ++--------- ReadSharp/ReadSharp.nuspec | 27 -- ReadSharp/packages.config | 7 - .../AttributeTransformationInput.cs | 0 .../AttributeTransformationResult.cs | 0 .../NReadability/ChildNodesTraverser.cs | 0 .../NReadability/Consts.cs | 0 .../NReadability/DomExtensions.cs | 0 .../NReadability/DomSerializationParams.cs | 0 .../NReadability/ElementsTraverser.cs | 0 .../NReadability/EncodedStringWriter.cs | 0 .../NReadability/EnumerableExtensions.cs | 0 .../NReadability/Enums.cs | 0 .../NReadability/HtmlUtils.cs | 0 .../NReadability/InternalErrorException.cs | 3 +- .../NReadability/MetaExtractor.cs | 0 .../NReadability/NReadability.csproj | 21 ++ .../NReadability/NReadabilityTranscoder.cs | 0 .../NReadability/Resources/readability.css | 0 .../NReadability/SgmlDomBuilder.cs | 0 .../NReadability/SgmlDomSerializer.cs | 0 .../NReadability/TranscodingInput.cs | 0 .../NReadability/TranscodingResult.cs | 0 .../NReadability/UtilityExtensions.cs | 0 .../NReadability/WebTranscodingInput.cs | 0 .../NReadability/WebTranscodingResult.cs | 0 .../SgmlReader/Html.dtd | 0 .../SgmlReader/SgmlParser.cs | 106 +++++--- .../SgmlReader/SgmlReader.cs | 64 ++--- StandardPorts/SgmlReader/SgmlReader.csproj | 21 ++ 44 files changed, 256 insertions(+), 1100 deletions(-) delete mode 100644 PortablePorts/NReadability/NReadability.csproj delete mode 100644 PortablePorts/NReadability/Properties/AssemblyInfo.cs delete mode 100644 PortablePorts/SgmlReader/HTMLspecial.ent delete mode 100644 PortablePorts/SgmlReader/HTMLsymbol.ent delete mode 100644 PortablePorts/SgmlReader/Properties/AssemblyInfo.cs delete mode 100644 PortablePorts/SgmlReader/SgmlReader.csproj delete mode 100644 PortablePorts/SgmlReader/htmllat1.ent delete mode 100644 ReadSharp/Properties/AssemblyInfo.cs delete mode 100644 ReadSharp/ReadSharp.nuspec delete mode 100644 ReadSharp/packages.config rename {PortablePorts => StandardPorts}/NReadability/AttributeTransformationInput.cs (100%) rename {PortablePorts => StandardPorts}/NReadability/AttributeTransformationResult.cs (100%) rename {PortablePorts => StandardPorts}/NReadability/ChildNodesTraverser.cs (100%) rename {PortablePorts => StandardPorts}/NReadability/Consts.cs (100%) rename {PortablePorts => StandardPorts}/NReadability/DomExtensions.cs (100%) rename {PortablePorts => StandardPorts}/NReadability/DomSerializationParams.cs (100%) rename {PortablePorts => StandardPorts}/NReadability/ElementsTraverser.cs (100%) rename {PortablePorts => StandardPorts}/NReadability/EncodedStringWriter.cs (100%) rename {PortablePorts => StandardPorts}/NReadability/EnumerableExtensions.cs (100%) rename {PortablePorts => StandardPorts}/NReadability/Enums.cs (100%) rename {PortablePorts => StandardPorts}/NReadability/HtmlUtils.cs (100%) rename {PortablePorts => StandardPorts}/NReadability/InternalErrorException.cs (98%) rename {PortablePorts => StandardPorts}/NReadability/MetaExtractor.cs (100%) create mode 100644 StandardPorts/NReadability/NReadability.csproj rename {PortablePorts => StandardPorts}/NReadability/NReadabilityTranscoder.cs (100%) rename {PortablePorts => StandardPorts}/NReadability/Resources/readability.css (100%) rename {PortablePorts => StandardPorts}/NReadability/SgmlDomBuilder.cs (100%) rename {PortablePorts => StandardPorts}/NReadability/SgmlDomSerializer.cs (100%) rename {PortablePorts => StandardPorts}/NReadability/TranscodingInput.cs (100%) rename {PortablePorts => StandardPorts}/NReadability/TranscodingResult.cs (100%) rename {PortablePorts => StandardPorts}/NReadability/UtilityExtensions.cs (100%) rename {PortablePorts => StandardPorts}/NReadability/WebTranscodingInput.cs (100%) rename {PortablePorts => StandardPorts}/NReadability/WebTranscodingResult.cs (100%) rename {PortablePorts => StandardPorts}/SgmlReader/Html.dtd (100%) rename {PortablePorts => StandardPorts}/SgmlReader/SgmlParser.cs (97%) rename {PortablePorts => StandardPorts}/SgmlReader/SgmlReader.cs (98%) create mode 100644 StandardPorts/SgmlReader/SgmlReader.csproj diff --git a/.gitignore b/.gitignore index 1f2121f..d82d826 100644 --- a/.gitignore +++ b/.gitignore @@ -163,3 +163,4 @@ $RECYCLE.BIN/ # ========================= # Project # ========================= +/.vs/ReadSharp/v15/Server/sqlite3 diff --git a/PortablePorts/NReadability/NReadability.csproj b/PortablePorts/NReadability/NReadability.csproj deleted file mode 100644 index 9b45820..0000000 --- a/PortablePorts/NReadability/NReadability.csproj +++ /dev/null @@ -1,109 +0,0 @@ - - - - - 11.0 - Debug - AnyCPU - {14C3EE6A-54A4-4A37-8B56-D52A3802F1C2} - Library - Properties - ReadSharp.Ports.NReadability - ReadSharp.Ports.NReadability - v4.5 - Profile259 - 512 - {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} - - - - - 4.0 - publish\ - true - Disk - false - Foreground - 7 - Days - false - false - true - 0 - 1.0.0.%2a - false - false - true - - - true - full - false - bin\Debug\ - DEBUG;TRACE - prompt - 4 - - - pdbonly - true - bin\Release\ - TRACE - prompt - 4 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - {9112414c-e2d1-43ba-a298-a89f77d94332} - SgmlReader - - - - - False - .NET Framework 3.5 SP1 Client Profile - false - - - False - .NET Framework 3.5 SP1 - false - - - - - \ No newline at end of file diff --git a/PortablePorts/NReadability/Properties/AssemblyInfo.cs b/PortablePorts/NReadability/Properties/AssemblyInfo.cs deleted file mode 100644 index 913278d..0000000 --- a/PortablePorts/NReadability/Properties/AssemblyInfo.cs +++ /dev/null @@ -1,30 +0,0 @@ -using System.Resources; -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -// General Information about an assembly is controlled through the following -// set of attributes. Change these attribute values to modify the information -// associated with an assembly. -[assembly: AssemblyTitle("NReadabilityPCL")] -[assembly: AssemblyDescription("")] -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("NReadabilityPCL")] -[assembly: AssemblyCopyright("Copyright © 2013")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] -[assembly: NeutralResourcesLanguage("en")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Build and Revision Numbers -// by using the '*' as shown below: -// [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("1.0.0.0")] -[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/PortablePorts/SgmlReader/HTMLspecial.ent b/PortablePorts/SgmlReader/HTMLspecial.ent deleted file mode 100644 index 5ce5c6a..0000000 --- a/PortablePorts/SgmlReader/HTMLspecial.ent +++ /dev/null @@ -1,77 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/PortablePorts/SgmlReader/HTMLsymbol.ent b/PortablePorts/SgmlReader/HTMLsymbol.ent deleted file mode 100644 index 524bfe1..0000000 --- a/PortablePorts/SgmlReader/HTMLsymbol.ent +++ /dev/null @@ -1,241 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/PortablePorts/SgmlReader/Properties/AssemblyInfo.cs b/PortablePorts/SgmlReader/Properties/AssemblyInfo.cs deleted file mode 100644 index 5c355f5..0000000 --- a/PortablePorts/SgmlReader/Properties/AssemblyInfo.cs +++ /dev/null @@ -1,30 +0,0 @@ -using System.Resources; -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -// General Information about an assembly is controlled through the following -// set of attributes. Change these attribute values to modify the information -// associated with an assembly. -[assembly: AssemblyTitle("SgmlReader")] -[assembly: AssemblyDescription("")] -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("SgmlReader")] -[assembly: AssemblyCopyright("Copyright © 2013")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] -[assembly: NeutralResourcesLanguage("en")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Build and Revision Numbers -// by using the '*' as shown below: -// [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("1.0.0.0")] -[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/PortablePorts/SgmlReader/SgmlReader.csproj b/PortablePorts/SgmlReader/SgmlReader.csproj deleted file mode 100644 index d9d6098..0000000 --- a/PortablePorts/SgmlReader/SgmlReader.csproj +++ /dev/null @@ -1,84 +0,0 @@ - - - - - 11.0 - Debug - AnyCPU - {9112414C-E2D1-43BA-A298-A89F77D94332} - Library - Properties - ReadSharp.Ports.SgmlReader - ReadSharp.Ports.SgmlReader - v4.5 - Profile259 - 512 - {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} - - - - - 4.0 - publish\ - true - Disk - false - Foreground - 7 - Days - false - false - true - 0 - 1.0.0.%2a - false - false - true - - - true - full - false - bin\Debug\ - TRACE;DEBUG;PORTABLE - prompt - 4 - - - pdbonly - true - bin\Release\ - TRACE;PORTABLE - prompt - 4 - - - - - - - - - - - - - False - .NET Framework 3.5 SP1 Client Profile - false - - - False - .NET Framework 3.5 SP1 - false - - - - - \ No newline at end of file diff --git a/PortablePorts/SgmlReader/htmllat1.ent b/PortablePorts/SgmlReader/htmllat1.ent deleted file mode 100644 index c644486..0000000 --- a/PortablePorts/SgmlReader/htmllat1.ent +++ /dev/null @@ -1,194 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/ReadSharp.Tests/ReadSharp.Tests.csproj b/ReadSharp.Tests/ReadSharp.Tests.csproj index 2344964..12c24ce 100644 --- a/ReadSharp.Tests/ReadSharp.Tests.csproj +++ b/ReadSharp.Tests/ReadSharp.Tests.csproj @@ -1,6 +1,7 @@  - - + + + Debug AnyCPU @@ -9,7 +10,7 @@ Properties ReadSharp.Tests ReadSharp.Tests - v4.5 + v4.6 512 {3AC096D0-A1C2-E12C-1390-A8335801FDAB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} 10.0 @@ -21,6 +22,7 @@ true + true @@ -40,45 +42,41 @@ 4 - - ..\packages\Microsoft.Bcl.Async.1.0.168\lib\net40\Microsoft.Threading.Tasks.dll - True + + ..\packages\HtmlAgilityPack.1.6.11\lib\Net45\HtmlAgilityPack.dll - - ..\packages\Microsoft.Bcl.Async.1.0.168\lib\net40\Microsoft.Threading.Tasks.Extensions.dll + + True - - ..\packages\Microsoft.Bcl.Async.1.0.168\lib\net40\Microsoft.Threading.Tasks.Extensions.Desktop.dll + True - - - - - ..\packages\Microsoft.Net.Http.2.2.29\lib\net45\System.Net.Http.Extensions.dll + + ..\packages\System.Security.Cryptography.Algorithms.4.3.1\lib\net46\System.Security.Cryptography.Algorithms.dll True - - ..\packages\Microsoft.Net.Http.2.2.29\lib\net45\System.Net.Http.Primitives.dll + + ..\packages\System.Security.Cryptography.Encoding.4.3.0\lib\net46\System.Security.Cryptography.Encoding.dll + + + ..\packages\System.Security.Cryptography.Primitives.4.3.0\lib\net46\System.Security.Cryptography.Primitives.dll + + + ..\packages\System.Security.Cryptography.X509Certificates.4.3.2\lib\net46\System.Security.Cryptography.X509Certificates.dll True - - ..\packages\xunit.abstractions.2.0.0\lib\net35\xunit.abstractions.dll - True + ..\packages\xunit.abstractions.2.0.1\lib\net35\xunit.abstractions.dll - - ..\packages\xunit.assert.2.1.0\lib\dotnet\xunit.assert.dll - True + + ..\packages\xunit.assert.2.3.1\lib\netstandard1.1\xunit.assert.dll - - ..\packages\xunit.extensibility.core.2.1.0\lib\dotnet\xunit.core.dll - True + + ..\packages\xunit.extensibility.core.2.3.1\lib\netstandard1.1\xunit.core.dll - - ..\packages\xunit.extensibility.execution.2.1.0\lib\net45\xunit.execution.desktop.dll - True + + ..\packages\xunit.extensibility.execution.2.3.1\lib\net452\xunit.execution.desktop.dll @@ -104,9 +102,20 @@ - {06219cad-e85e-48e2-a216-2539805a9ee8} + {47494d4e-4b29-4e6d-ae43-9b8842d09f5e} ReadSharp + + {e17e2edf-557d-47a6-82d3-63856dc24e8e} + NReadability + + + {3fe96608-208c-4be1-9a7d-b55ffc06888c} + SgmlReader + + + + @@ -129,14 +138,15 @@ - This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - ..\packages\HtmlAgilityPack.1.4.9\lib\portable-net45+netcore45+wpa81+wp8+MonoAndroid+MonoTouch\HtmlAgilityPack.dll - True - - - ..\packages\Microsoft.Net.Http.2.2.29\lib\portable-net40+sl4+win8+wp71+wpa81\System.Net.Http.dll - True - - - ..\packages\Microsoft.Net.Http.2.2.29\lib\portable-net40+sl4+win8+wp71+wpa81\System.Net.Http.Extensions.dll - True - - - ..\packages\Microsoft.Net.Http.2.2.29\lib\portable-net40+sl4+win8+wp71+wpa81\System.Net.Http.Primitives.dll - True - + + - - {14c3ee6a-54a4-4a37-8b56-d52a3802f1c2} - NReadability - - - {9112414c-e2d1-43ba-a298-a89f77d94332} - SgmlReader - + - - - - - - This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - - - - \ No newline at end of file + + diff --git a/ReadSharp/ReadSharp.nuspec b/ReadSharp/ReadSharp.nuspec deleted file mode 100644 index 8dab213..0000000 --- a/ReadSharp/ReadSharp.nuspec +++ /dev/null @@ -1,27 +0,0 @@ - - - - $id$ - $version$ - $title$ - $author$ - $author$ - https://raw.github.com/ceee/ReadSharp/master/LICENSE-MIT - https://github.com/ceee/ReadSharp - https://raw.github.com/ceee/ReadSharp/master/Assets/readsharp.png - false - - - - en-US - - - - Copyright by cee, 2016 - ReadSharp PocketAPI Pocket PocketSharp Tobias Klika cee NReadability SgmlReader Reader Article SDK Poki - - \ No newline at end of file diff --git a/ReadSharp/packages.config b/ReadSharp/packages.config deleted file mode 100644 index 3040ad8..0000000 --- a/ReadSharp/packages.config +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - \ No newline at end of file diff --git a/PortablePorts/NReadability/AttributeTransformationInput.cs b/StandardPorts/NReadability/AttributeTransformationInput.cs similarity index 100% rename from PortablePorts/NReadability/AttributeTransformationInput.cs rename to StandardPorts/NReadability/AttributeTransformationInput.cs diff --git a/PortablePorts/NReadability/AttributeTransformationResult.cs b/StandardPorts/NReadability/AttributeTransformationResult.cs similarity index 100% rename from PortablePorts/NReadability/AttributeTransformationResult.cs rename to StandardPorts/NReadability/AttributeTransformationResult.cs diff --git a/PortablePorts/NReadability/ChildNodesTraverser.cs b/StandardPorts/NReadability/ChildNodesTraverser.cs similarity index 100% rename from PortablePorts/NReadability/ChildNodesTraverser.cs rename to StandardPorts/NReadability/ChildNodesTraverser.cs diff --git a/PortablePorts/NReadability/Consts.cs b/StandardPorts/NReadability/Consts.cs similarity index 100% rename from PortablePorts/NReadability/Consts.cs rename to StandardPorts/NReadability/Consts.cs diff --git a/PortablePorts/NReadability/DomExtensions.cs b/StandardPorts/NReadability/DomExtensions.cs similarity index 100% rename from PortablePorts/NReadability/DomExtensions.cs rename to StandardPorts/NReadability/DomExtensions.cs diff --git a/PortablePorts/NReadability/DomSerializationParams.cs b/StandardPorts/NReadability/DomSerializationParams.cs similarity index 100% rename from PortablePorts/NReadability/DomSerializationParams.cs rename to StandardPorts/NReadability/DomSerializationParams.cs diff --git a/PortablePorts/NReadability/ElementsTraverser.cs b/StandardPorts/NReadability/ElementsTraverser.cs similarity index 100% rename from PortablePorts/NReadability/ElementsTraverser.cs rename to StandardPorts/NReadability/ElementsTraverser.cs diff --git a/PortablePorts/NReadability/EncodedStringWriter.cs b/StandardPorts/NReadability/EncodedStringWriter.cs similarity index 100% rename from PortablePorts/NReadability/EncodedStringWriter.cs rename to StandardPorts/NReadability/EncodedStringWriter.cs diff --git a/PortablePorts/NReadability/EnumerableExtensions.cs b/StandardPorts/NReadability/EnumerableExtensions.cs similarity index 100% rename from PortablePorts/NReadability/EnumerableExtensions.cs rename to StandardPorts/NReadability/EnumerableExtensions.cs diff --git a/PortablePorts/NReadability/Enums.cs b/StandardPorts/NReadability/Enums.cs similarity index 100% rename from PortablePorts/NReadability/Enums.cs rename to StandardPorts/NReadability/Enums.cs diff --git a/PortablePorts/NReadability/HtmlUtils.cs b/StandardPorts/NReadability/HtmlUtils.cs similarity index 100% rename from PortablePorts/NReadability/HtmlUtils.cs rename to StandardPorts/NReadability/HtmlUtils.cs diff --git a/PortablePorts/NReadability/InternalErrorException.cs b/StandardPorts/NReadability/InternalErrorException.cs similarity index 98% rename from PortablePorts/NReadability/InternalErrorException.cs rename to StandardPorts/NReadability/InternalErrorException.cs index 8feae6d..041f087 100644 --- a/PortablePorts/NReadability/InternalErrorException.cs +++ b/StandardPorts/NReadability/InternalErrorException.cs @@ -1,4 +1,4 @@ -/* +/* * NReadability * http://code.google.com/p/nreadability/ * @@ -19,7 +19,6 @@ */ using System; -using System.Runtime.Serialization; namespace ReadSharp.Ports.NReadability { diff --git a/PortablePorts/NReadability/MetaExtractor.cs b/StandardPorts/NReadability/MetaExtractor.cs similarity index 100% rename from PortablePorts/NReadability/MetaExtractor.cs rename to StandardPorts/NReadability/MetaExtractor.cs diff --git a/StandardPorts/NReadability/NReadability.csproj b/StandardPorts/NReadability/NReadability.csproj new file mode 100644 index 0000000..ea85b39 --- /dev/null +++ b/StandardPorts/NReadability/NReadability.csproj @@ -0,0 +1,21 @@ + + + + netstandard1.1 + ReadSharp.Ports.NReadability + ReadSharp.Ports.NReadability + + + + + + + + + + + + + + + diff --git a/PortablePorts/NReadability/NReadabilityTranscoder.cs b/StandardPorts/NReadability/NReadabilityTranscoder.cs similarity index 100% rename from PortablePorts/NReadability/NReadabilityTranscoder.cs rename to StandardPorts/NReadability/NReadabilityTranscoder.cs diff --git a/PortablePorts/NReadability/Resources/readability.css b/StandardPorts/NReadability/Resources/readability.css similarity index 100% rename from PortablePorts/NReadability/Resources/readability.css rename to StandardPorts/NReadability/Resources/readability.css diff --git a/PortablePorts/NReadability/SgmlDomBuilder.cs b/StandardPorts/NReadability/SgmlDomBuilder.cs similarity index 100% rename from PortablePorts/NReadability/SgmlDomBuilder.cs rename to StandardPorts/NReadability/SgmlDomBuilder.cs diff --git a/PortablePorts/NReadability/SgmlDomSerializer.cs b/StandardPorts/NReadability/SgmlDomSerializer.cs similarity index 100% rename from PortablePorts/NReadability/SgmlDomSerializer.cs rename to StandardPorts/NReadability/SgmlDomSerializer.cs diff --git a/PortablePorts/NReadability/TranscodingInput.cs b/StandardPorts/NReadability/TranscodingInput.cs similarity index 100% rename from PortablePorts/NReadability/TranscodingInput.cs rename to StandardPorts/NReadability/TranscodingInput.cs diff --git a/PortablePorts/NReadability/TranscodingResult.cs b/StandardPorts/NReadability/TranscodingResult.cs similarity index 100% rename from PortablePorts/NReadability/TranscodingResult.cs rename to StandardPorts/NReadability/TranscodingResult.cs diff --git a/PortablePorts/NReadability/UtilityExtensions.cs b/StandardPorts/NReadability/UtilityExtensions.cs similarity index 100% rename from PortablePorts/NReadability/UtilityExtensions.cs rename to StandardPorts/NReadability/UtilityExtensions.cs diff --git a/PortablePorts/NReadability/WebTranscodingInput.cs b/StandardPorts/NReadability/WebTranscodingInput.cs similarity index 100% rename from PortablePorts/NReadability/WebTranscodingInput.cs rename to StandardPorts/NReadability/WebTranscodingInput.cs diff --git a/PortablePorts/NReadability/WebTranscodingResult.cs b/StandardPorts/NReadability/WebTranscodingResult.cs similarity index 100% rename from PortablePorts/NReadability/WebTranscodingResult.cs rename to StandardPorts/NReadability/WebTranscodingResult.cs diff --git a/PortablePorts/SgmlReader/Html.dtd b/StandardPorts/SgmlReader/Html.dtd similarity index 100% rename from PortablePorts/SgmlReader/Html.dtd rename to StandardPorts/SgmlReader/Html.dtd diff --git a/PortablePorts/SgmlReader/SgmlParser.cs b/StandardPorts/SgmlReader/SgmlParser.cs similarity index 97% rename from PortablePorts/SgmlReader/SgmlParser.cs rename to StandardPorts/SgmlReader/SgmlParser.cs index 020f23b..f12edd2 100644 --- a/PortablePorts/SgmlReader/SgmlParser.cs +++ b/StandardPorts/SgmlReader/SgmlParser.cs @@ -26,7 +26,10 @@ using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.IO; +using System.Linq.Expressions; using System.Net; +using System.Net.Http; +using System.Runtime.CompilerServices; using System.Text; using System.Threading; using System.Xml; @@ -36,7 +39,7 @@ namespace ReadSharp.Ports.Sgml /// /// Thrown if any errors occur while parsing the source. /// -#if !PORTABLE +#if !NETSTANDARD1_1 [Serializable] #endif public class SgmlParseException : Exception @@ -81,7 +84,7 @@ public SgmlParseException(string message, Exception innerException) { } -#if !PORTABLE +#if !NETSTANDARD1_1 /// /// Initializes a new instance of the SgmlParseException class with serialized data. /// @@ -465,7 +468,7 @@ public void Open(Entity parent, Uri baseUri) } Stream stream = null; -#if PORTABLE +#if NETSTANDARD1_1 Encoding e = Encoding.UTF8; #else Encoding e = Encoding.Default; @@ -474,32 +477,39 @@ public void Open(Entity parent, Uri baseUri) { case "file": { -#if PORTABLE - throw new NotSupportedException("The file scheme is not supported by the .NETPortable framework."); +#if NETSTANDARD1_1 + throw new NotSupportedException("The file scheme is not supported by the .NETNETSTANDARD1_1 framework."); #else - string path = this.m_resolvedUri.LocalPath; - stream = new FileStream(path, FileMode.Open, FileAccess.Read); + string path = this.m_resolvedUri.LocalPath; + stream = new FileStream(path, FileMode.Open, FileAccess.Read); #endif } break; default: //Console.WriteLine("Fetching:" + ResolvedUri.AbsoluteUri); +#if NETSTANDARD1_1 + var response = GetResponse(ResolvedUri, TimeSpan.FromSeconds(10)); + Uri actual = response.RequestMessage.RequestUri; +#else HttpWebRequest wr = (HttpWebRequest)WebRequest.Create(ResolvedUri); -#if !PORTABLE - wr.UserAgent = "Mozilla/4.0 (compatible;);"; - wr.Timeout = 10000; // in case this is running in an ASPX page. - if (m_proxy != null) - wr.Proxy = new WebProxy(m_proxy); - wr.PreAuthenticate = false; - // Pass the credentials of the process. - wr.Credentials = CredentialCache.DefaultCredentials; -#endif + wr.UserAgent = "Mozilla/4.0 (compatible;);"; + wr.Timeout = 10000; // in case this is running in an ASPX page. + if (m_proxy != null) + wr.Proxy = new WebProxy(m_proxy); + wr.PreAuthenticate = false; + // Pass the credentials of the process. + wr.Credentials = CredentialCache.DefaultCredentials; WebResponse resp = GetResponse(wr, TimeSpan.FromSeconds(10)); Uri actual = resp.ResponseUri; +#endif if (!string.Equals(actual.AbsoluteUri, this.m_resolvedUri.AbsoluteUri, StringComparison.OrdinalIgnoreCase)) { this.m_resolvedUri = actual; } + +#if NETSTANDARD1_1 + string mimeType = response.Content.Headers.ContentType.MediaType; +#else string contentType = resp.ContentType.ToLowerInvariant(); string mimeType = contentType; int i = contentType.IndexOf(';'); @@ -507,19 +517,19 @@ public void Open(Entity parent, Uri baseUri) { mimeType = contentType.Substring(0, i); } +#endif if (StringUtilities.EqualsIgnoreCase(mimeType, "text/html")) { this.m_isHtml = true; } - i = contentType.IndexOf("charset"); -#if PORTABLE +#if NETSTANDARD1_1 e = Encoding.UTF8; + stream = response.Content.ReadAsStreamAsync().ConfigureAwait(false).GetAwaiter().GetResult(); #else - e = Encoding.Default; -#endif - + i = contentType.IndexOf("charset"); + e = Encoding.Default; if (i >= 0) { int j = contentType.IndexOf("=", i); @@ -540,8 +550,8 @@ public void Open(Entity parent, Uri baseUri) } } } - stream = resp.GetResponseStream(); +#endif break; } @@ -552,6 +562,27 @@ public void Open(Entity parent, Uri baseUri) } } +#if NETSTANDARD1_1 + private static HttpResponseMessage GetResponse(Uri uri, TimeSpan timeout) + { + try + { + using (var client = new HttpClient(new HttpClientHandler { AllowAutoRedirect = true }) { Timeout = timeout }) + { + return client.GetAsync(uri, HttpCompletionOption.ResponseHeadersRead, CancellationToken.None) + .ConfigureAwait(false).GetAwaiter().GetResult(); + } + } + catch (OperationCanceledException) + { + throw new TimeoutException("Timeout waiting for response"); + } + catch (Exception ex) + { + throw new IOException(ex.Message, ex); + } + } +#else private static HttpWebResponse GetResponse(HttpWebRequest wr, TimeSpan timeout) { HttpWebResponse response = null; @@ -586,6 +617,7 @@ private static HttpWebResponse GetResponse(HttpWebRequest wr, TimeSpan timeout) return response; } +#endif /// /// Gets the character encoding for this entity. @@ -631,10 +663,10 @@ public char SkipWhitespace() public string ScanToken(StringBuilder sb, string term, bool nmtoken) { if (sb == null) - throw new ArgumentNullException("sb"); + throw new ArgumentNullException(nameof(sb)); if (term == null) - throw new ArgumentNullException("term"); + throw new ArgumentNullException(nameof(term)); sb.Length = 0; char ch = m_lastchar; @@ -669,7 +701,7 @@ public string ScanToken(StringBuilder sb, string term, bool nmtoken) public string ScanLiteral(StringBuilder sb, char quote) { if (sb == null) - throw new ArgumentNullException("sb"); + throw new ArgumentNullException(nameof(sb)); sb.Length = 0; char ch = ReadChar(); @@ -712,7 +744,7 @@ public string ScanLiteral(StringBuilder sb, char quote) public string ScanToEnd(StringBuilder sb, string type, string terminators) { if (terminators == null) - throw new ArgumentNullException("terminators"); + throw new ArgumentNullException(nameof(terminators)); if (sb != null) sb.Length = 0; @@ -935,7 +967,7 @@ private int ReadNumericEntityCode(out string value) static int[] CtrlMap = new int[] { // This is the windows-1252 mapping of the code points 0x80 through 0x9f. 8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 141, - 381, 143, 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, + 381, 143, 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 157, 382, 376 }; @@ -1644,7 +1676,7 @@ internal override int GetFullChars(byte[] bytes, int byteIndex, int byteCount, c UInt32 code; int i, j; byteCount += byteIndex; - for (i = byteIndex, j = charIndex; i + 3 < byteCount; ) + for (i = byteIndex, j = charIndex; i + 3 < byteCount;) { code = (UInt32)(((bytes[i + 3]) << 24) | (bytes[i + 2] << 16) | (bytes[i + 1] << 8) | (bytes[i])); if (code > 0x10FFFF) @@ -1681,7 +1713,7 @@ internal override int GetFullChars(byte[] bytes, int byteIndex, int byteCount, c UInt32 code; int i, j; byteCount += byteIndex; - for (i = byteIndex, j = charIndex; i + 3 < byteCount; ) + for (i = byteIndex, j = charIndex; i + 3 < byteCount;) { code = (UInt32)(((bytes[i]) << 24) | (bytes[i + 1] << 16) | (bytes[i + 2] << 8) | (bytes[i + 3])); if (code > 0x10FFFF) @@ -1812,7 +1844,7 @@ public AttDef FindAttribute(string name) public void AddAttDefs(Dictionary list) { if (list == null) - throw new ArgumentNullException("list"); + throw new ArgumentNullException(nameof(list)); if (m_attList == null) { @@ -2227,7 +2259,7 @@ public void AddOccurrence(char c) public bool CanContain(string name, SgmlDtd dtd) { if (dtd == null) - throw new ArgumentNullException("dtd"); + throw new ArgumentNullException(nameof(dtd)); // Do a simple search of members. foreach (object obj in Members) @@ -3458,11 +3490,11 @@ internal static class UTF32Utilities { public static string ConvertFromUtf32(int utf32) { -#if PORTABLE +#if NETSTANDARD1_1 if (utf32 < 0 || utf32 > 0x10FFFF) - throw new ArgumentOutOfRangeException("utf32", "The argument must be from 0 to 0x10FFFF."); + throw new ArgumentOutOfRangeException(nameof(utf32), "The argument must be from 0 to 0x10FFFF."); if (0xD800 <= utf32 && utf32 <= 0xDFFF) - throw new ArgumentOutOfRangeException("utf32", "The argument must not be in surrogate pair range."); + throw new ArgumentOutOfRangeException(nameof(utf32), "The argument must not be in surrogate pair range."); if (utf32 < 0x10000) return new string((char)utf32, 1); utf32 -= 0x10000; @@ -3476,11 +3508,11 @@ public static string ConvertFromUtf32(int utf32) public static int ConvertToUtf32(char highSurrogate, char lowSurrogate) { -#if PORTABLE +#if NETSTANDARD1_1 if (highSurrogate < 0xD800 || 0xDBFF < highSurrogate) - throw new ArgumentOutOfRangeException("highSurrogate"); + throw new ArgumentOutOfRangeException(nameof(highSurrogate)); if (lowSurrogate < 0xDC00 || 0xDFFF < lowSurrogate) - throw new ArgumentOutOfRangeException("lowSurrogate"); + throw new ArgumentOutOfRangeException(nameof(lowSurrogate)); return 0x10000 + ((highSurrogate - 0xD800) << 10) + (lowSurrogate - 0xDC00); #else diff --git a/PortablePorts/SgmlReader/SgmlReader.cs b/StandardPorts/SgmlReader/SgmlReader.cs similarity index 98% rename from PortablePorts/SgmlReader/SgmlReader.cs rename to StandardPorts/SgmlReader/SgmlReader.cs index f3d5ede..cdb5e69 100644 --- a/PortablePorts/SgmlReader/SgmlReader.cs +++ b/StandardPorts/SgmlReader/SgmlReader.cs @@ -65,9 +65,9 @@ public enum CaseFolding ToLower } -#if PORTABLE +#if NETSTANDARD1_1 /// - /// System.Xml.WhitespaceHandling is not available in .NETPortable. + /// System.Xml.WhitespaceHandling is not available in .NETNETSTANDARD1_1. /// public enum WhitespaceHandling { @@ -491,10 +491,10 @@ private void LazyLoadDtd(Uri baseUri) } else { -#if PORTABLE +#if NETSTANDARD1_1 throw new InvalidOperationException("Cannot load DTD without specifying base URI."); #else - baseUri = new Uri(new Uri(Directory.GetCurrentDirectory() + "/"), this.m_syslit); + baseUri = new Uri(new Uri(Directory.GetCurrentDirectory() + "/"), this.m_syslit); #endif } this.m_dtd = SgmlDtd.Parse(baseUri, this.m_docType, this.m_pubid, baseUri.AbsoluteUri, this.m_subset, this.m_proxy, null); @@ -655,10 +655,10 @@ public string Href } else { -#if PORTABLE +#if NETSTANDARD1_1 throw new ArgumentException("Relative URLs are not supported."); #else - this.m_baseUri = new Uri("file:///" + Directory.GetCurrentDirectory() + "//"); + this.m_baseUri = new Uri("file:///" + Directory.GetCurrentDirectory() + "//"); #endif } } @@ -723,12 +723,12 @@ public TextWriter ErrorLog /// /// DTD validation errors are written to this log file. /// -#if PORTABLE - [Obsolete("Not supported on the .NETPortable runtime.")] +#if NETSTANDARD1_1 + [Obsolete("Not supported on the .NETNETSTANDARD1_1 runtime.")] #endif public string ErrorLogFile { -#if PORTABLE +#if NETSTANDARD1_1 get { throw new NotSupportedException(); @@ -738,15 +738,15 @@ public string ErrorLogFile throw new NotSupportedException(); } #else - get - { - return this.m_errorLogFile; - } - set - { - this.m_errorLogFile = value; - this.m_log = new StreamWriter(value); - } + get + { + return this.m_errorLogFile; + } + set + { + this.m_errorLogFile = value; + this.m_log = new StreamWriter(value); + } #endif } @@ -1151,10 +1151,10 @@ public override bool IsDefault /// /// This property applies only to an attribute node. /// -#if PORTABLE +#if NETSTANDARD1_1 public char QuoteChar #else - public override char QuoteChar + public override char QuoteChar #endif { get @@ -2455,11 +2455,11 @@ public override bool EOF } } -#if !PORTABLE - public override void Close() - { - ((IDisposable)this).Dispose(); - } +#if !NETSTANDARD1_1 + public override void Close() + { + ((IDisposable)this).Dispose(); + } #endif /// @@ -2507,10 +2507,10 @@ public override ReadState ReadState /// Reads the contents of an element or text node as a string. /// /// The contents of the element or an empty string. -#if PORTABLE +#if NETSTANDARD1_1 public override string ReadContentAsString() #else - public override string ReadString() + public override string ReadString() #endif { if (this.m_node.NodeType == XmlNodeType.Element) @@ -2567,10 +2567,10 @@ public override string ReadInnerXml() break; } -#if PORTABLE +#if NETSTANDARD1_1 xw.Dispose(); #else - xw.Close(); + xw.Close(); #endif return sw.ToString(); } @@ -2586,10 +2586,10 @@ public override string ReadOuterXml() StringWriter sw = new StringWriter(CultureInfo.InvariantCulture); XmlWriter xw = XmlWriter.Create(sw, new XmlWriterSettings { Indent = true, IndentChars = " " }); xw.WriteNode(this, true); -#if PORTABLE +#if NETSTANDARD1_1 xw.Dispose(); #else - xw.Close(); + xw.Close(); #endif return sw.ToString(); } @@ -2791,4 +2791,4 @@ private void ValidateContent(Node node) } } } -} \ No newline at end of file +} diff --git a/StandardPorts/SgmlReader/SgmlReader.csproj b/StandardPorts/SgmlReader/SgmlReader.csproj new file mode 100644 index 0000000..caa9d0e --- /dev/null +++ b/StandardPorts/SgmlReader/SgmlReader.csproj @@ -0,0 +1,21 @@ + + + + netstandard1.1 + ReadSharp.Ports.SgmlReader + ReadSharp.Ports.SgmlReader + + + + TRACE;DEBUG;NETSTANDARD1_1 + + + + + + + + + + + From 3a1d6c551ce6fb5edd41b7653fc9a904c752e1a7 Mon Sep 17 00:00:00 2001 From: Ehssan Doust Date: Sun, 7 Jan 2018 19:06:30 +0100 Subject: [PATCH 2/3] Added methods for reading from stream --- ReadSharp.Tests/ReadTests.cs | 96 +++++++++++++++--------------- ReadSharp/IReader.cs | 54 +++++++++++++++-- ReadSharp/Reader.cs | 111 ++++++++++++++++++++++++++++++----- 3 files changed, 193 insertions(+), 68 deletions(-) diff --git a/ReadSharp.Tests/ReadTests.cs b/ReadSharp.Tests/ReadTests.cs index 77e19c0..eaab434 100644 --- a/ReadSharp.Tests/ReadTests.cs +++ b/ReadSharp.Tests/ReadTests.cs @@ -20,7 +20,7 @@ public ReadTests() : base() [Fact] public async Task ReadArticleTest() { - Article result = await reader.Read(new Uri("http://frontendplay.com/story/4/http-caching-demystified-part-2-implementation")); + Article result = await reader.ReadAsync(new Uri("http://frontendplay.com/story/4/http-caching-demystified-part-2-implementation")); Assert.DoesNotContain("", result.Content); Assert.True(result.Content.Length > 15000); @@ -30,7 +30,7 @@ public async Task ReadArticleTest() [Fact] public async Task ReadArticleWithContainerNoHeadlineTest() { - Article result = await reader.Read(new Uri("http://frontendplay.com/story/4/http-caching-demystified-part-2-implementation"), new ReadOptions() + Article result = await reader.ReadAsync(new Uri("http://frontendplay.com/story/4/http-caching-demystified-part-2-implementation"), new ReadOptions() { HasHeaderTags = true }); @@ -44,7 +44,7 @@ public async Task ReadArticleWithContainerNoHeadlineTest() [Fact] public async Task ReadArticleWithImagesTest() { - Article result = await reader.Read(new Uri("https://hacks.mozilla.org/2013/12/application-layout-with-css3-flexible-box-module/")); + Article result = await reader.ReadAsync(new Uri("https://hacks.mozilla.org/2013/12/application-layout-with-css3-flexible-box-module/")); List images = result.Images.ToList(); Assert.True(images.Count >= 3); Assert.True(images[0].Uri.ToString().StartsWith("https://hacks.mozilla.org")); @@ -57,7 +57,7 @@ public async Task ReadArticleWithImagePlaceholdersTest() { ReadOptions options = ReadOptions.CreateDefault(); options.ReplaceImagesWithPlaceholders = true; - Article result = await reader.Read(new Uri("https://hacks.mozilla.org/2013/12/application-layout-with-css3-flexible-box-module/"), options); + Article result = await reader.ReadAsync(new Uri("https://hacks.mozilla.org/2013/12/application-layout-with-css3-flexible-box-module/"), options); List images = result.Images.ToList(); Assert.True(images.Count >= 3); @@ -72,7 +72,7 @@ public async Task ReadArticleWithImagePlaceholdersTest() [Fact] public async Task ReadArticleWithNoImagesTest() { - Article result = await reader.Read(new Uri("http://getpocket.com/hits/awards/2013/")); + Article result = await reader.ReadAsync(new Uri("http://getpocket.com/hits/awards/2013/")); Assert.True(result.Images == null || result.Images.Count() < 1); } @@ -82,7 +82,7 @@ public async Task ReadArticleWithInvalidUriTest() { await ThrowsAsync(async () => { - await reader.Read(new Uri("http://frontendplayyyyy.com")); + await reader.ReadAsync(new Uri("http://frontendplayyyyy.com")); }); } @@ -90,7 +90,7 @@ await ThrowsAsync(async () => [Fact] public async Task IsBodyOnlyProperlyResolved() { - Article result = await reader.Read(new Uri("http://calebjacob.com/tooltipster/")); + Article result = await reader.ReadAsync(new Uri("http://calebjacob.com/tooltipster/")); Assert.True(result.Content.Substring(0, 4) == "", result.Content); - result = await reader.Read(new Uri("https://developer.mozilla.org/en-US/docs/Web/CSS/image-rendering"), new ReadOptions() + result = await reader.ReadAsync(new Uri("https://developer.mozilla.org/en-US/docs/Web/CSS/image-rendering"), new ReadOptions() { UseDeepLinks = false }); @@ -120,15 +120,15 @@ public async Task DoesUseDeepLinksWork() public async Task TestCzechCharsets() { string expectedTitle = "Kouzelné české Vánoce"; - Article result = await reader.Read(new Uri("http://www.czech.cz/cz/Zivot-a-prace/Jak-se-zije-v-CR/Zvyky-a-tradice/Kouzelne-ceske-Vanoce")); + Article result = await reader.ReadAsync(new Uri("http://www.czech.cz/cz/Zivot-a-prace/Jak-se-zije-v-CR/Zvyky-a-tradice/Kouzelne-ceske-Vanoce")); Assert.Equal(result.Title, expectedTitle); expectedTitle = "Kolik se dá vydělat na volné noze?"; - result = await reader.Read(new Uri("http://navolnenoze.cz/blog/vydelky/")); + result = await reader.ReadAsync(new Uri("http://navolnenoze.cz/blog/vydelky/")); Assert.Equal(result.Title, expectedTitle); expectedTitle = "Zkázoděl | dialog.ihned.cz - Komentáře"; - result = await reader.Read(new Uri("http://dialog.ihned.cz/komentare/c1-61530110-zkazodel")); + result = await reader.ReadAsync(new Uri("http://dialog.ihned.cz/komentare/c1-61530110-zkazodel")); Assert.Equal(result.Title, expectedTitle); } @@ -138,58 +138,58 @@ public async Task TestDifferentCharsets() { // chinese? string expectedTitle = "优艺客-专注互联网品牌建设-原韩雪冬网页设计工作室(公司站)"; - Article result = await reader.Read(new Uri("http://www.uelike.com")); + Article result = await reader.ReadAsync(new Uri("http://www.uelike.com")); Assert.Equal(result.Title, expectedTitle); // arabic - result = await reader.Read(new Uri("http://www.it-scoop.com/2014/01/internet-of-things-google-nest/")); + result = await reader.ReadAsync(new Uri("http://www.it-scoop.com/2014/01/internet-of-things-google-nest/")); Assert.NotEmpty(result.Content); } [Fact] public async Task TestHintUrlsReturnFullArticles() { - Article result = await reader.Read(new Uri("http://www.theverge.com/2013/11/18/5116360/nokia-lumia-1520-review")); + Article result = await reader.ReadAsync(new Uri("http://www.theverge.com/2013/11/18/5116360/nokia-lumia-1520-review")); Assert.Contains("Three years ago, Nokia shipped over 110 million smartphones worldwide. ", result.Content); Assert.True(result.Content.Length > 6000); - result = await reader.Read(new Uri("http://blog.bufferapp.com/connections-in-the-brain-understanding-creativity-and-intelligenceconnections")); + result = await reader.ReadAsync(new Uri("http://blog.bufferapp.com/connections-in-the-brain-understanding-creativity-and-intelligenceconnections")); Assert.Contains("The Tweet resulted in over 1,000 retweets", result.Content); } [Fact] public async Task AreMultipageArticlesWorking() { - Article result = await reader.Read(new Uri("http://www.anandtech.com/show/7594/samsung-ssd-840-evo-msata-120gb-250gb-500gb-1tb-review")); + Article result = await reader.ReadAsync(new Uri("http://www.anandtech.com/show/7594/samsung-ssd-840-evo-msata-120gb-250gb-500gb-1tb-review")); Assert.Equal(result.NextPage.ToString(), "http://www.anandtech.com/show/7594/samsung-ssd-840-evo-msata-120gb-250gb-500gb-1tb-review/2"); - result = await reader.Read(new Uri("http://www.zeit.de/gesellschaft/2014-02/alice-schwarzer-steuerhinterziehung-doppelmoral")); + result = await reader.ReadAsync(new Uri("http://www.zeit.de/gesellschaft/2014-02/alice-schwarzer-steuerhinterziehung-doppelmoral")); Assert.Equal(result.NextPage.ToString(), "http://www.zeit.de/gesellschaft/2014-02/alice-schwarzer-steuerhinterziehung-doppelmoral/seite-2"); - result = await reader.Read(new Uri("http://www.sueddeutsche.de/wirtschaft/netzbetreiber-und-die-energiewende-im-kampf-gegen-blackouts-und-buergerproteste-1.1880754")); + result = await reader.ReadAsync(new Uri("http://www.sueddeutsche.de/wirtschaft/netzbetreiber-und-die-energiewende-im-kampf-gegen-blackouts-und-buergerproteste-1.1880754")); Assert.Equal(result.NextPage.ToString(), "http://www.sueddeutsche.de/wirtschaft/netzbetreiber-und-die-energiewende-im-kampf-gegen-blackouts-und-buergerproteste-1.1880754-2"); - result = await reader.Read(new Uri("http://arstechnica.com/apple/2014/01/two-steps-forward-a-review-of-the-2013-mac-pro/")); + result = await reader.ReadAsync(new Uri("http://arstechnica.com/apple/2014/01/two-steps-forward-a-review-of-the-2013-mac-pro/")); Assert.Equal(result.NextPage.ToString(), "http://arstechnica.com/apple/2014/01/two-steps-forward-a-review-of-the-2013-mac-pro/2"); } [Fact] public async Task AreSinglepageArticlesNotPopulatingNextPage() { - Article result = await reader.Read(new Uri("http://www.wpcentral.com/developers-leak-new-features-windows-phone-81-sdk"), new ReadOptions() { MultipageDownload = true }); + Article result = await reader.ReadAsync(new Uri("http://www.wpcentral.com/developers-leak-new-features-windows-phone-81-sdk"), new ReadOptions() { MultipageDownload = true }); Assert.Null(result.NextPage); Assert.Equal(result.PageCount, 1); - result = await reader.Read(new Uri("http://arstechnica.com/apple/2014/01/two-steps-forward-a-review-of-the-2013-mac-pro/7/")); + result = await reader.ReadAsync(new Uri("http://arstechnica.com/apple/2014/01/two-steps-forward-a-review-of-the-2013-mac-pro/7/")); Assert.Null(result.NextPage); - result = await reader.Read(new Uri("http://www.buzzfeed.com/mattlynley/the-16-most-interesting-things-to-come-out-of-bill-gates-qa")); + result = await reader.ReadAsync(new Uri("http://www.buzzfeed.com/mattlynley/the-16-most-interesting-things-to-come-out-of-bill-gates-qa")); Assert.Null(result.NextPage); - result = await reader.Read(new Uri("http://www.sueddeutsche.de/wirtschaft/netzbetreiber-und-die-energiewende-im-kampf-gegen-blackouts-und-buergerproteste-1.1880754-2")); + result = await reader.ReadAsync(new Uri("http://www.sueddeutsche.de/wirtschaft/netzbetreiber-und-die-energiewende-im-kampf-gegen-blackouts-und-buergerproteste-1.1880754-2")); Assert.Null(result.NextPage); - result = await reader.Read(new Uri("http://www.zeit.de/gesellschaft/2014-02/alice-schwarzer-steuerhinterziehung-doppelmoral/seite-2")); + result = await reader.ReadAsync(new Uri("http://www.zeit.de/gesellschaft/2014-02/alice-schwarzer-steuerhinterziehung-doppelmoral/seite-2")); Assert.Null(result.NextPage); } @@ -199,55 +199,55 @@ public async Task AreMultiPagesDownloadedAndMergedCorrectly() { ReadOptions options = new ReadOptions() { MultipageDownload = true }; - Article result = await reader.Read(new Uri("http://www.maximumpc.com/article/features/modders_toolkit_everything_you_need_make_kick-ass_custom_case_mods"), options); + Article result = await reader.ReadAsync(new Uri("http://www.maximumpc.com/article/features/modders_toolkit_everything_you_need_make_kick-ass_custom_case_mods"), options); Assert.Equal(result.PageCount, 4); - result = await reader.Read(new Uri("http://www.anandtech.com/show/7594/samsung-ssd-840-evo-msata-120gb-250gb-500gb-1tb-review"), options); + result = await reader.ReadAsync(new Uri("http://www.anandtech.com/show/7594/samsung-ssd-840-evo-msata-120gb-250gb-500gb-1tb-review"), options); Assert.Equal(result.PageCount, 9); - result = await reader.Read(new Uri("http://www.zeit.de/gesellschaft/2014-02/alice-schwarzer-steuerhinterziehung-doppelmoral"), options); + result = await reader.ReadAsync(new Uri("http://www.zeit.de/gesellschaft/2014-02/alice-schwarzer-steuerhinterziehung-doppelmoral"), options); Assert.Equal(result.PageCount, 2); Assert.True(result.WordCount > 800); - result = await reader.Read(new Uri("http://www.zeit.de/gesellschaft/2014-02/alice-schwarzer-steuerhinterziehung-doppelmoral")); + result = await reader.ReadAsync(new Uri("http://www.zeit.de/gesellschaft/2014-02/alice-schwarzer-steuerhinterziehung-doppelmoral")); Assert.True(result.PageCount == 1 && result.WordCount < 500); - result = await reader.Read(new Uri("http://arstechnica.com/apple/2014/01/two-steps-forward-a-review-of-the-2013-mac-pro/"), options); + result = await reader.ReadAsync(new Uri("http://arstechnica.com/apple/2014/01/two-steps-forward-a-review-of-the-2013-mac-pro/"), options); Assert.Equal(result.PageCount, 7); Assert.True(result.WordCount > 13000 && result.Images.Count() > 10); - result = await reader.Read(new Uri("http://www.sueddeutsche.de/wirtschaft/netzbetreiber-und-die-energiewende-im-kampf-gegen-blackouts-und-buergerproteste-1.1880754"), options); + result = await reader.ReadAsync(new Uri("http://www.sueddeutsche.de/wirtschaft/netzbetreiber-und-die-energiewende-im-kampf-gegen-blackouts-und-buergerproteste-1.1880754"), options); Assert.Equal(result.PageCount, 2); } [Fact] public async Task TestCriticalURIs() { - Article result = await reader.Read(new Uri("http://wpcentral.com.feedsportal.com/c/33999/f/616880/s/35a02b5e/sc/15/l/0L0Swpcentral0N0Cgameloft0Ediscusses0Etheir0Enew0Egame0Ebrothers0Earms0E30Esons0Ewar0Eceslive/story01.htm")); + Article result = await reader.ReadAsync(new Uri("http://wpcentral.com.feedsportal.com/c/33999/f/616880/s/35a02b5e/sc/15/l/0L0Swpcentral0N0Cgameloft0Ediscusses0Etheir0Enew0Egame0Ebrothers0Earms0E30Esons0Ewar0Eceslive/story01.htm")); Assert.NotEmpty(result.Content); - result = await reader.Read(new Uri("http://www.fastcoexist.com/3016005/futurist-forum/10-creative-ideas-for-thriving-cities-of-the-future")); + result = await reader.ReadAsync(new Uri("http://www.fastcoexist.com/3016005/futurist-forum/10-creative-ideas-for-thriving-cities-of-the-future")); Assert.Contains("1: 311", result.Content); - result = await reader.Read(new Uri("http://msdn.microsoft.com/en-us/library/windows/apps/hh464925.aspx")); + result = await reader.ReadAsync(new Uri("http://msdn.microsoft.com/en-us/library/windows/apps/hh464925.aspx")); Assert.NotEmpty(result.Content); - result = await reader.Read(new Uri("http://bit.ly/KAh7FJ")); + result = await reader.ReadAsync(new Uri("http://bit.ly/KAh7FJ")); Assert.NotEmpty(result.Content); - result = await reader.Read(new Uri("http://www.nytimes.com/2014/01/31/world/europe/ukraine-unrest.html?hp&_r=0")); + result = await reader.ReadAsync(new Uri("http://www.nytimes.com/2014/01/31/world/europe/ukraine-unrest.html?hp&_r=0")); Assert.True(result.Images != null && result.Images.Count() > 0); - result = await reader.Read(new Uri("http://www.polygon.com/2013/2/25/4026668/tomb-raider-review")); + result = await reader.ReadAsync(new Uri("http://www.polygon.com/2013/2/25/4026668/tomb-raider-review")); Assert.True(result.Images != null && result.Images.Count() > 3 && result.Content.Contains("For a reboot of a series that had lost its focus and purpose")); - result = await reader.Read(new Uri("http://www.polygon.com/2014/1/31/5364728/super-bowl-xlviii-xbox-activities-new-york")); + result = await reader.ReadAsync(new Uri("http://www.polygon.com/2014/1/31/5364728/super-bowl-xlviii-xbox-activities-new-york")); Assert.True(result.Content.Contains("week for Super Bowl XLVIII") && result.Content.Contains("two tickets to the Super Bowl.")); - result = await reader.Read(new Uri("http://habrahabr.ru/post/211905/")); + result = await reader.ReadAsync(new Uri("http://habrahabr.ru/post/211905/")); Assert.NotEmpty(result.Content); - result = await reader.Read(new Uri("http://www.dgtle.com/article-5682-1.html")); + result = await reader.ReadAsync(new Uri("http://www.dgtle.com/article-5682-1.html")); Assert.Contains("http://img.dgtle.com/forum/201402/13/162237x8oumb8i0i0y0087.jpeg!680px", result.Content); } @@ -256,27 +256,27 @@ public async Task TestCriticalURIs2() { ReadOptions options; - Article result = await reader.Read(new Uri("https://medium.com/best-thing-i-found-online-today/9e7455ca375b")); + Article result = await reader.ReadAsync(new Uri("https://medium.com/best-thing-i-found-online-today/9e7455ca375b")); Assert.Contains("16. Be confident in how you ask", result.Content); - result = await reader.Read(new Uri("http://www.dgtle.com/article-5682-1.html")); + result = await reader.ReadAsync(new Uri("http://www.dgtle.com/article-5682-1.html")); Assert.Contains("http://img.dgtle.com/forum/201402/13/162237x8oumb8i0i0y0087.jpeg!680px", result.Content); - result = await reader.Read(new Uri("http://m.spiegel.de/spiegelgeschichte/a-946060.html")); + result = await reader.ReadAsync(new Uri("http://m.spiegel.de/spiegelgeschichte/a-946060.html")); Assert.DoesNotContain("Detecting browser settings", result.Content); - result = await reader.Read(new Uri("https://vimeo.com/84391640")); + result = await reader.ReadAsync(new Uri("https://vimeo.com/84391640")); Assert.Contains("twitter.com/pokiapp", result.Content); - result = await reader.Read(new Uri("http://www.youtube.com/watch?v=GI2lHSPkW1c")); + result = await reader.ReadAsync(new Uri("http://www.youtube.com/watch?v=GI2lHSPkW1c")); Assert.Contains("IT PAST MIDNIGHT A COUPLE HOURS AGO, IT'S FEELS COLDER", result.Content); - result = await reader.Read(new Uri("http://www.jn.pt/PaginaInicial/Politica/Interior.aspx?content_id=3996648&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+JN-ULTIMAS+%28JN+-+Ultimas%29")); + result = await reader.ReadAsync(new Uri("http://www.jn.pt/PaginaInicial/Politica/Interior.aspx?content_id=3996648&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+JN-ULTIMAS+%28JN+-+Ultimas%29")); Assert.DoesNotContain("Alberto João Jardim", result.Content); options = ReadOptions.CreateDefault(); options.PreferHTMLEncoding = false; - result = await reader.Read(new Uri("http://www.jn.pt/PaginaInicial/Politica/Interior.aspx?content_id=3996648&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+JN-ULTIMAS+%28JN+-+Ultimas%29"), options); + result = await reader.ReadAsync(new Uri("http://www.jn.pt/PaginaInicial/Politica/Interior.aspx?content_id=3996648&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+JN-ULTIMAS+%28JN+-+Ultimas%29"), options); Assert.Contains("Alberto João Jardim", result.Content); } @@ -286,7 +286,7 @@ public async Task DebugArticle() { string uri = "http://nstarikov.ru/blog/45260"; - Article result = await reader.Read(new Uri(uri), new ReadOptions() + Article result = await reader.ReadAsync(new Uri(uri), new ReadOptions() { MultipageDownload = true }); diff --git a/ReadSharp/IReader.cs b/ReadSharp/IReader.cs index 1ce74d3..47ef930 100644 --- a/ReadSharp/IReader.cs +++ b/ReadSharp/IReader.cs @@ -1,4 +1,6 @@ -using System; +using System; +using System.IO; +using System.Text; using System.Threading; using System.Threading.Tasks; @@ -7,9 +9,51 @@ namespace ReadSharp public interface IReader { /// - /// Reads article content from the given URI. + /// Reads article content from the given uri. /// - /// An URI to extract the content from. + /// A uri to extract the content from. + /// The cancellation token. + /// + /// An article with extracted content and meta information. + /// + /// + /// + /// + Task
ReadAsync(Uri uri, CancellationToken cancellationToken); + + /// + /// Reads article content from the given uri. + /// + /// A uri to extract the content from. + /// The transform options. + /// The cancellation token. + /// + /// An article with extracted content and meta information. + /// + /// + /// + /// + Task
ReadAsync(Uri uri, ReadOptions options, CancellationToken cancellationToken); + + /// + /// Reads article content from the given stream. + /// + /// The stream to extract the content from. + /// The stream encoding. + /// The cancellation token. + /// + /// An article with extracted content and meta information. + /// + /// + /// + /// + Article Read(Stream stream, Encoding encoding, CancellationToken cancellationToken); + + /// + /// Reads article content from the given stream. + /// + /// The stream to extract the content from. + /// The stream encoding. /// The transform options. /// The cancellation token. /// @@ -18,6 +62,6 @@ public interface IReader /// /// /// - Task
Read(Uri uri, ReadOptions options = null, CancellationToken cancellationToken = default(CancellationToken)); + Article Read(Stream stream, Encoding encoding, ReadOptions options, CancellationToken cancellationToken); } -} \ No newline at end of file +} diff --git a/ReadSharp/Reader.cs b/ReadSharp/Reader.cs index fa22cfb..07be29f 100644 --- a/ReadSharp/Reader.cs +++ b/ReadSharp/Reader.cs @@ -1,4 +1,4 @@ -using ReadSharp.Ports.NReadability; +using ReadSharp.Ports.NReadability; using System; using System.Collections.Generic; using System.IO; @@ -57,13 +57,18 @@ public class Reader : IReader { "//m.spiegel.de", "//www.spiegel.de" } }; - - /// /// Initializes a new instance of the class. /// - /// The HTTP options. - public Reader(HttpOptions options = null) + public Reader() : this(null) + { + } + + /// + /// Initializes a new instance of the class. + /// + /// The http options. + public Reader(HttpOptions options) { // initialize transcoder _transcoder = new NReadabilityTranscoder( @@ -113,6 +118,18 @@ public Reader(HttpOptions options = null) } + /// + /// Reads article content from the given uri. + /// + /// A uri to extract the content from. + /// The cancellation token. + /// + /// An article with extracted content and meta information. + /// + public Task
ReadAsync(Uri uri, CancellationToken cancellationToken) + { + return this.ReadAsync(uri, null, cancellationToken); + } /// /// Reads article content from the given URI. @@ -123,10 +140,7 @@ public Reader(HttpOptions options = null) /// /// An article with extracted content and meta information. /// - /// - /// - /// - public async Task
Read(Uri uri, ReadOptions options = null, CancellationToken cancellationToken = default(CancellationToken)) + public async Task
ReadAsync(Uri uri, ReadOptions options = null, CancellationToken cancellationToken = default(CancellationToken)) { _currentPages = new List(); @@ -203,6 +217,76 @@ public Reader(HttpOptions options = null) }; } + /// + /// Reads article content from the given stream. + /// + /// The stream to extract the content from. + /// The stream encoding. + /// The cancellation token. + /// + /// An article with extracted content and meta information. + /// + public Article Read(Stream stream, Encoding encoding, CancellationToken cancellationToken) + { + return Read(stream, encoding, ReadOptions.CreateDefault(), cancellationToken); + } + + /// + /// Reads article content from the given stream. + /// + /// The stream to extract the content from. + /// The stream encoding. + /// The transform options. + /// The cancellation token. + /// + /// An article with extracted content and meta information. + /// + public Article Read(Stream stream, Encoding encoding, ReadOptions options, CancellationToken cancellationToken) + { + // readability + TranscodingResult transcodingResult; + try + { + // transcode content + transcodingResult = ExtractReadableInformation(null, stream, options, encoding); + } + catch (Exception exc) + { + throw new ReadException(exc.Message); + } + + // get word count and plain text + string plainContent; + int wordCount = 0; + + try + { + plainContent = HtmlUtilities.ConvertToPlainText(transcodingResult.ExtractedContent); + wordCount = HtmlUtilities.CountWords(plainContent); + } + catch + { + plainContent = null; + } + + // create article + return new Article + { + Title = transcodingResult.ExtractedTitle, + Description = transcodingResult.ExtractedDescription, + Content = transcodingResult.ExtractedContent, + ContentExtracted = transcodingResult.ContentExtracted && wordCount > 0, + Raw = _rawHTML, + PlainContent = plainContent, + WordCount = wordCount, + PageCount = 1, + FrontImage = transcodingResult.ExtractedImage, + Images = new ArticleImage[0], + Favicon = transcodingResult.ExtractedFavicon, + NextPage = transcodingResult.NextPageUrl != null ? new Uri(transcodingResult.NextPageUrl, UriKind.Absolute) : null, + Encoding = encoding + }; + } /// @@ -227,7 +311,7 @@ protected TranscodingResult ExtractReadableInformation( // set properties for processing TranscodingInput transcodingInput = new TranscodingInput(_rawHTML) { - Url = uri.ToString(), + Url = uri?.ToString(), DomSerializationParams = new DomSerializationParams() { BodyOnly = !options.HasHeaderTags, @@ -245,8 +329,6 @@ protected TranscodingResult ExtractReadableInformation( return _transcoder.Transcode(transcodingInput); } - - /// /// Reverses the deep links. /// @@ -279,13 +361,12 @@ private AttributeTransformationResult ReverseDeepLinks(AttributeTransformationIn } - /// /// Fetches a resource /// /// The URI. /// The options. - /// if set to true [is continued page]. + /// /// The cancellation token. /// /// @@ -447,4 +528,4 @@ private Response MergeResponses(Response original, Response append) }; } } -} \ No newline at end of file +} From d569479fd3ffecd7fc9bcc14aa0d25d21de1e702 Mon Sep 17 00:00:00 2001 From: Ehssan Doust Date: Sun, 7 Jan 2018 19:10:03 +0100 Subject: [PATCH 3/3] incremented version numbers --- ReadSharp/ReadSharp.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ReadSharp/ReadSharp.csproj b/ReadSharp/ReadSharp.csproj index d7d5810..56a9e12 100644 --- a/ReadSharp/ReadSharp.csproj +++ b/ReadSharp/ReadSharp.csproj @@ -11,7 +11,7 @@ en-US ReadSharp PocketAPI Pocket PocketSharp Tobias Klika cee NReadability SgmlReader Reader Article SDK Poki Copyright by cee, 2016 - 6.3.1 + 6.3.2 ceee