Code:
' Do some empirical text tidying up that I might typically have done in a final video title
Let Title = Replace(Title, "ä", "ae", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "ü", "ue", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "ö", "oe", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "-", " ", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "#wiegehtyoutube", "", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "!", "", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "?", "", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "ß", "ss", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "€", "", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, ":", " ", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "#", "", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "&", " ", 1, -1, vbBinaryCompare) '
Let Title = Replace(Title, "'", " ", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "‚", " ", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, """", "", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "“", " ", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "„", " ", 1, -1, vbBinaryCompare) ' „ajdffak“
Let Title = Replace(Title, "+", "", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, ".", "", 1, -1, vbBinaryCompare)
' Let Title = Replace(Title, "-", " ", 1, -1, vbBinaryCompare)
' Let Title = Replace(Title, "-", " ", 1, -1, vbBinaryCompare)
Let Title = Application.WorksheetFunction.Trim(Title) ' In case any spaces caused by removing stuff
Here the full current coding:
Code:
' https://excelfox.com/forum/showthread.php/2840-Notes-tests-Scrapping-YouTube/page10#post19704
Sub GetStuffFrom11DigitYouTube2() ' https://excelfox.com/forum/showthread.php/2840-Notes-tests-Scrapping-YouTube/page11#post19733
Rem 0 Worksheets info
Dim WsYT11 As Worksheet: Set WsYT11 = ThisWorkbook.Worksheets("ElevenDigitYT(1)")
Dim RngWsYT11 As Range: Set RngWsYT11 = WsYT11.Range("A1:A680") ' 692")
Dim Cnt As Long
WsYT11.Activate
ActiveWindow.Panes(3).Activate ' To get out of top pane and into bottom pane (I have worksheet divided at line 1) This is so only the bottom pane is scrolled and the first line in pane 1 with the headings in stays there
Rem 1 The main stuff now - take every 11 digit bit , make a full link of it, scrape all the text and do all the business
For Cnt = 500 To 680 ' 692
RngWsYT11.Item(Cnt).Select
'If RngWsYT11.Item(Cnt).Offset(0, 1).Value2 <> "" Then
With CreateObject("MSXML2.ServerXMLHTTP") ' .Server - Yasser http://www.eileenslounge.com/viewtopic.php?p=303638#p303638
.Open "GET", "https://www.youtube.com/watch?v=" & RngWsYT11.Item(Cnt).Value2, False ' '
.setRequestHeader "User-Agent", "Chrome" ' - SpeakEasy Mike https://eileenslounge.com/viewtopic.php?p=303639#p303639
.send ' varBody:= ' No extra info for type GET. .send actually makes the request
While .readyState <> 4: DoEvents: Wend ' Allow other processes to run while the web page loads. Think this is part of the True option
Dim PageSrc As String: Let PageSrc = .responseText ' Save the HTML code in the (Global) variable. ': Range("P1").Value = PageSrc 'For me for a print out copy to text file etc. The responseText property returns the information requested by the Open method as a text string
End With
'_1(ii) Optional secion to put the text string into a text file , for ease of code developments and debugging
Dim FileNum2 As Long: Let FileNum2 = FreeFile(0) ' https://msdn.microsoft.com/en-us/vba/language-reference-vba/articles/freefile-function
Dim PathAndFileName2 As String
Let PathAndFileName2 = ThisWorkbook.Path & "\Videos(1)\WieGehtsYouTubeServerChrome" & RngWsYT11.Item(Cnt).Value2 & ".txt" '
Open PathAndFileName2 For Output As #FileNum2 ' ' The text file will be made if not there, and if it is there and already contains data, then the data will be overwritten
Print #FileNum2, PageSrc '
Close #FileNum2
' If RngWsYT11.Item(Cnt).Offset(0, 1).Value2 = "GWGzz5p5D80" Then Stop In case I want to stop at a particular 11 digit bit for debugging
Dim TextBit As String ' The idea is to get a chunk of text that seems to have all the info I want
Let TextBit = Split(PageSrc, "videoDescriptionHeaderRenderer""", 2, vbBinaryCompare)(1)
'Let TextBit = Mid(TextBit, 1, 600)
Let TextBit = Mid(TextBit, 1, 1400)
Dim Title As String
' Let Title = Split(TextBit, ":{""title"":{""runs"":[{""text"":""", 2, vbBinaryCompare)(1)
' 'Let Title = Split(Title, """}]},""channel""", 2, vbBinaryCompare)(0)
' Let Title = Split(Title, """}", 2, vbBinaryCompare)(0)
' If InStr(1, TextBit, """,""navigationEndpoint""", vbBinaryCompare) <> 0 Then
' Let Title = Split(TextBit, ":{""title"":{""runs"":[{""text"":""", 2, vbBinaryCompare)(1)
' Dim Pos1nav As Long, Pos2nav As Long
' Let Pos1nav = InStr(1, Title, """,""navigationEndpoint""", vbBinaryCompare)
' Let Pos2nav = InStr(Pos1nav, Title, "{""text"":""", vbBinaryCompare)
' Let Title = Left(Title, Pos1nav - 1) & Right(Title, Len(Title) - (Pos2nav + 8))
' Let Title = Split(Title, """}", 2, vbBinaryCompare)(0)
' Else
' End If
' That above checked for link text crap, but only worked if the link was at the beginning of the title: If the link was inside it or at the end, we will end up with multiple text bits, (and the link text crap) I need to join together and ignore link text crap - BTW "link text crap" is what comes after the text seen for the link which is needed to make it work, but i don't want it .... Text1 .. Textseenforlink .. textcrapneededtomakelinkwork ... Text3 etc
Dim posTxtTag As Long ' Usually there will be just one bit of {"text":" after which come the title, but sometimes there may be a few as in the case of a link in the title..
Let posTxtTag = InStr(1, TextBit, "{""text"":""", vbBinaryCompare)
Do While posTxtTag <> 0 And posTxtTag < InStr(1, TextBit, """channel""", vbBinaryCompare) ' hopefully the text channel always comes after the title and before anything else I want
Dim posTxtTagEnd As Long, posTxtTagEnd2 As Long
Let posTxtTagEnd = InStr(posTxtTag, TextBit, """,""navigationEndpoint", vbBinaryCompare) ' This gives me the end of the text for the case of a link text
If posTxtTagEnd = 0 Then Let posTxtTagEnd = 999 ' If I ain't got a link then I make this big so it does not get used
Let posTxtTagEnd2 = InStr(posTxtTag, TextBit, """}", vbBinaryCompare) ' mostly this would be the end of a text bit
Let posTxtTagEnd = Application.WorksheetFunction.Min(posTxtTagEnd, posTxtTagEnd2) ' Whatever text end I have or watever text end comes first will be used
Let Title = Title & Mid(TextBit, posTxtTag + 9, (posTxtTagEnd - (posTxtTag + 9))) ' mostly this would only be done once, just more times for building the text if its split into bits by the use of a link in the title
Let posTxtTag = InStr(posTxtTagEnd, TextBit, "{""text"":""", vbBinaryCompare) ' This will mostly be 0, unless text is split into bits by the use of a link in the title
Loop ' While posTxtTag <> 0
' some initially empirically found Title tidying up
Let Title = Replace(Title, "\u0026", "&", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "\""", " ", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "/", " ", 1, -1, vbBinaryCompare)
' Do some empirical text tidying up that I might typically have done in a final video title
Let Title = Replace(Title, "ä", "ae", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "ü", "ue", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "ö", "oe", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "Ä", "AE", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "Ü", "UE", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "Ö", "OE", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "-", " ", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "#wiegehtyoutube", "", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "!", "", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "?", "", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "ß", "ss", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "€", "", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, ":", " ", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "#", "", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "&", " ", 1, -1, vbBinaryCompare) '
Let Title = Replace(Title, "'", " ", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "‚", " ", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, """", "", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "“", " ", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "„", " ", 1, -1, vbBinaryCompare) ' „ajdffak“
Let Title = Replace(Title, "+", "", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, ".", "", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "YouTube", "UT", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "[", "(", 1, -1, vbBinaryCompare)
Let Title = Replace(Title, "]", ")", 1, -1, vbBinaryCompare)
' Let Title = Replace(Title, "-", " ", 1, -1, vbBinaryCompare)
' Let Title = Replace(Title, "-", " ", 1, -1, vbBinaryCompare)
' Let Title = Replace(Title, "-", " ", 1, -1, vbBinaryCompare)
' Let Title = Replace(Title, "-", " ", 1, -1, vbBinaryCompare)
' Let Title = Replace(Title, "-", " ", 1, -1, vbBinaryCompare)
Let Title = Application.WorksheetFunction.Trim(Title) ' In case any spaces caused by removing stuff
Let RngWsYT11.Item(Cnt).Offset(0, 2).Value2 = Title
Let Title = "" ' If i don't do this the Title = Title & coding will keep adding the titles together
RngWsYT11.Item(Cnt).Offset(0, 2).Select ' This is helpful or else I cant see the worksheet being filled after the first few
' To get the next info i use a lot the Split Split bit Yasser showed me https://eileenslounge.com/viewtopic.php?p=303638#p303638
Dim Views As String
Let Views = Split(TextBit, """},""views"":{""simpleText"":""", 2, vbBinaryCompare)(1)
Let Views = Split(Views, " Aufrufe""}", 2, vbBinaryCompare)(0)
Let Views = Replace(Views, ".", "", 1, -1, vbBinaryCompare) ' I don't like seperators of any kind
Let RngWsYT11.Item(Cnt).Offset(0, 4).Value2 = Views
Dim PubDate As String
Let PubDate = Split(TextBit, "publishDate"":{""simpleText"":""", 2, vbBinaryCompare)(1)
Let PubDate = Split(PubDate, """},""factoid"":[{", 2, vbBinaryCompare)(0)
Let RngWsYT11.Item(Cnt).Offset(0, 3).Value2 = PubDate
' date nightmares again
Dim PubDateV2 As Long, Naw As Long ' DateSerial(year, month, day)
Dim PubDateRaw As String: Let PubDateRaw = Replace(PubDate, "Premiere am ", "", 1, 1, vbBinaryCompare)
Let PubDateRaw = Replace(PubDateRaw, "Live übertragen am ", "", 1, 1, vbBinaryCompare)
Let PubDateV2 = DateSerial(Right(PubDateRaw, 4), Mid(PubDateRaw, 4, 2), Left(PubDateRaw, 2))
Let RngWsYT11.Item(Cnt).Offset(0, 1).Value2 = PubDateV2
Let Naw = Evaluate("=NOW()"): ' Debug.Print Naw
' Debug.Print Format(PubDateV2, "\Da\y i\s " & "dddd")
' Debug.Print Format(PubDateV2, "dddd DD mmm YYYY") & " " & Format(Naw, "dddd DD mmm YYYY")
Let RngWsYT11.Item(Cnt).Offset(0, 3).Value2 = PubDateV2
Let RngWsYT11.Item(Cnt).Offset(0, 3).Value = PubDate
Let RngWsYT11.Item(Cnt).Offset(0, 3).Value = Format(PubDateV2, "dddd DD mmm YYYY")
Let RngWsYT11.Item(Cnt).Offset(0, 5).Value = Int(Views / (Naw - PubDateV2))
Dim Likeses As String
Let Likeses = Split(TextBit, "Mag ich\""-Bewertungen""},""accessibilityText"":""", 2, vbBinaryCompare)(1)
Let Likeses = Split(Likeses, " \""Mag ich\""-Bewertungen", 2, vbBinaryCompare)(0)
If InStr(1, Likeses, "Keine", vbBinaryCompare) = 0 Then ' Sometimes the likes are not there or not showmn or something - only happend in a video not from the main author
'Let RngWsYT11.Item(Cnt).Offset(0, 6).Value = Likeses
Let RngWsYT11.Item(Cnt).Offset(0, 7).Value = Int(Likeses / (Naw - PubDateV2))
Else
Let Likeses = "Keine"
End If
Let RngWsYT11.Item(Cnt).Offset(0, 6).Value = Likeses
RngWsYT11.Parent.Columns("A:H").AutoFit
'Else
'End If
Next Cnt
End Sub
Bookmarks