mirror of https://github.com/WikiTeam/wikiteam
commit
269841c909
@ -1,9 +1,4 @@
|
|||||||
http://de.battlestarwiki.org/w/api.php
|
https://de.battlestarwiki.org/w/api.php
|
||||||
http://en.battlestarwiki.org/w/api.php
|
https://en.battlestarwiki.org/w/api.php
|
||||||
http://es.battlestarwiki.org/w/api.php
|
https://fr.battlestarwiki.ddns.net/api.php
|
||||||
http://fr.battlestarwiki.org/w/api.php
|
https://media.battlestarwiki.org/w/api.php
|
||||||
http://media.battlestarwiki.org/w/api.php
|
|
||||||
http://ms.battlestarwiki.org/w/api.php
|
|
||||||
http://simple.battlestarwiki.org/w/api.php
|
|
||||||
http://tr.battlestarwiki.org/w/api.php
|
|
||||||
http://zh.battlestarwiki.org/w/api.php
|
|
||||||
|
@ -0,0 +1,50 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright (C) 2022 Simon Liu
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import requests
|
||||||
|
from urllib import parse
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
def main():
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20100101 Firefox/24.0',
|
||||||
|
}
|
||||||
|
|
||||||
|
# grab lvl3 links
|
||||||
|
req = requests.get('https://community.fandom.com/wiki/Sitemap?level=2', headers=headers)
|
||||||
|
map_lvl3 = re.findall(r'<a class=\"title\" href=\"([^>]+?)\">', req.text)
|
||||||
|
|
||||||
|
# grab wiki links
|
||||||
|
wikis = []
|
||||||
|
for lvl3 in tqdm(map_lvl3):
|
||||||
|
time.sleep(0.3)
|
||||||
|
req = requests.get('https://community.fandom.com%s' % lvl3)
|
||||||
|
if req.status_code != 200:
|
||||||
|
time.sleep(5)
|
||||||
|
req = requests.get('https://community.fandom.com%s' % lvl3)
|
||||||
|
wikis.extend([wiki.replace('http://', 'https://') for wiki in re.findall(r'<a class=\"title\" href=\"([^>]+?)\">', req.text)])
|
||||||
|
|
||||||
|
wikis = list(set(wikis))
|
||||||
|
wikis.sort()
|
||||||
|
with open('fandom.com', 'w') as f:
|
||||||
|
for wiki in wikis:
|
||||||
|
f.write(parse.urljoin(wiki, 'api.php') + '\n')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
File diff suppressed because it is too large
Load Diff
@ -1,35 +1,53 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright (C) 2014-2017 WikiTeam developers
|
# Copyright (C) 2022 Simon Liu
|
||||||
# This program is free software: you can redistribute it and/or modify
|
# This program is free software: you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License as published by
|
# it under the terms of the GNU General Public License as published by
|
||||||
# the Free Software Foundation, either version 3 of the License, or
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
# (at your option) any later version.
|
# (at your option) any later version.
|
||||||
#
|
#
|
||||||
# This program is distributed in the hope that it will be useful,
|
# This program is distributed in the hope that it will be useful,
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
# GNU General Public License for more details.
|
# GNU General Public License for more details.
|
||||||
#
|
#
|
||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the GNU General Public License
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import time
|
||||||
import requests
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
def nextpage(soup):
|
||||||
|
try:
|
||||||
|
soup.find('span', text='Next page').parent['href']
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20100101 Firefox/24.0',
|
'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20100101 Firefox/24.0',
|
||||||
}
|
}
|
||||||
|
|
||||||
url = 'https://meta.miraheze.org/wiki/Special:SiteMatrix'
|
req = requests.get('https://meta.miraheze.org/wiki/Special:WikiDiscover')
|
||||||
r = requests.get(url, headers=headers)
|
soup = BeautifulSoup(req.content, features='lxml')
|
||||||
raw = r.text
|
wikis = re.findall(r'<td class=\"TablePager_col_wiki_dbname\"><a href=\"([^>]+?)\">', req.text)
|
||||||
m = re.findall(ur'<tr><td>(<del>)?<a href="https://([^>]+?)/">[^<]+</a>', raw)
|
|
||||||
m.sort()
|
while nextpage(soup):
|
||||||
for i in m:
|
time.sleep(0.3)
|
||||||
print 'https://' + i[1] + '/w/api.php'
|
req = requests.get(urljoin('https://meta.miraheze.org', soup.find('span', text='Next page').parent['href']))
|
||||||
|
soup = BeautifulSoup(req.content, features='lxml')
|
||||||
|
wikis.extend(re.findall(r'<td class=\"TablePager_col_wiki_dbname\"><a href=\"([^>]+?)\">', req.text))
|
||||||
|
|
||||||
|
wikis = list(set(wikis))
|
||||||
|
wikis.sort()
|
||||||
|
with open('miraheze.org', 'w') as f:
|
||||||
|
for wiki in wikis:
|
||||||
|
f.write(urljoin(wiki, 'w/api.php') + '\n')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
@ -1,183 +1,183 @@
|
|||||||
http://24.neoseeker.com/w/api.php
|
https://24.neoseeker.com/w/api.php
|
||||||
http://aceattorney.neoseeker.com/w/api.php
|
https://aceattorney.neoseeker.com/w/api.php
|
||||||
http://advancewars.neoseeker.com/w/api.php
|
https://advancewars.neoseeker.com/w/api.php
|
||||||
http://adventuretime.neoseeker.com/w/api.php
|
https://adventuretime.neoseeker.com/w/api.php
|
||||||
http://animalcrossing.neoseeker.com/w/api.php
|
https://animalcrossing.neoseeker.com/w/api.php
|
||||||
http://attackontitan.neoseeker.com/w/api.php
|
https://attackontitan.neoseeker.com/w/api.php
|
||||||
http://avatar.neoseeker.com/w/api.php
|
https://avatar.neoseeker.com/w/api.php
|
||||||
http://banished.neoseeker.com/w/api.php
|
https://banished.neoseeker.com/w/api.php
|
||||||
http://banjokazooie.neoseeker.com/w/api.php
|
https://banjokazooie.neoseeker.com/w/api.php
|
||||||
http://batman.neoseeker.com/w/api.php
|
https://batman.neoseeker.com/w/api.php
|
||||||
http://battlefield.neoseeker.com/w/api.php
|
https://battlefield.neoseeker.com/w/api.php
|
||||||
http://bioshock.neoseeker.com/w/api.php
|
https://bioshock.neoseeker.com/w/api.php
|
||||||
http://bleach.neoseeker.com/w/api.php
|
https://bleach.neoseeker.com/w/api.php
|
||||||
http://boktai.neoseeker.com/w/api.php
|
https://boktai.neoseeker.com/w/api.php
|
||||||
http://bond.neoseeker.com/w/api.php
|
https://bond.neoseeker.com/w/api.php
|
||||||
http://borderlands.neoseeker.com/w/api.php
|
https://borderlands.neoseeker.com/w/api.php
|
||||||
http://boundbyflame.neoseeker.com/w/api.php
|
https://boundbyflame.neoseeker.com/w/api.php
|
||||||
http://bravely.neoseeker.com/w/api.php
|
https://bravely.neoseeker.com/w/api.php
|
||||||
http://breathoffire.neoseeker.com/w/api.php
|
https://breathoffire.neoseeker.com/w/api.php
|
||||||
http://brink.neoseeker.com/w/api.php
|
https://brink.neoseeker.com/w/api.php
|
||||||
http://callofduty.neoseeker.com/w/api.php
|
https://callofduty.neoseeker.com/w/api.php
|
||||||
http://castlecrashers.neoseeker.com/w/api.php
|
https://castlecrashers.neoseeker.com/w/api.php
|
||||||
http://castlevania.neoseeker.com/w/api.php
|
https://castlevania.neoseeker.com/w/api.php
|
||||||
http://childoflight.neoseeker.com/w/api.php
|
https://childoflight.neoseeker.com/w/api.php
|
||||||
http://chrono.neoseeker.com/w/api.php
|
https://chrono.neoseeker.com/w/api.php
|
||||||
http://cnc.neoseeker.com/w/api.php
|
https://cnc.neoseeker.com/w/api.php
|
||||||
http://cowboybebop.neoseeker.com/w/api.php
|
https://cowboybebop.neoseeker.com/w/api.php
|
||||||
http://crash.neoseeker.com/w/api.php
|
https://crash.neoseeker.com/w/api.php
|
||||||
http://crossedge.neoseeker.com/w/api.php
|
https://crossedge.neoseeker.com/w/api.php
|
||||||
http://cubeworld.neoseeker.com/w/api.php
|
https://cubeworld.neoseeker.com/w/api.php
|
||||||
http://danganronpa.neoseeker.com/w/api.php
|
https://danganronpa.neoseeker.com/w/api.php
|
||||||
http://darksouls.neoseeker.com/w/api.php
|
https://darksouls.neoseeker.com/w/api.php
|
||||||
http://deadoralive.neoseeker.com/w/api.php
|
https://deadoralive.neoseeker.com/w/api.php
|
||||||
http://deathnote.neoseeker.com/w/api.php
|
https://deathnote.neoseeker.com/w/api.php
|
||||||
http://demonssouls.neoseeker.com/w/api.php
|
https://demonssouls.neoseeker.com/w/api.php
|
||||||
http://destiny.neoseeker.com/w/api.php
|
https://destiny.neoseeker.com/w/api.php
|
||||||
http://devilmaycry.neoseeker.com/w/api.php
|
https://devilmaycry.neoseeker.com/w/api.php
|
||||||
http://digimon.neoseeker.com/w/api.php
|
https://digimon.neoseeker.com/w/api.php
|
||||||
http://disgaea.neoseeker.com/w/api.php
|
https://disgaea.neoseeker.com/w/api.php
|
||||||
http://doctorwho.neoseeker.com/w/api.php
|
https://doctorwho.neoseeker.com/w/api.php
|
||||||
http://donkeykong.neoseeker.com/w/api.php
|
https://donkeykong.neoseeker.com/w/api.php
|
||||||
http://doom.neoseeker.com/w/api.php
|
https://doom.neoseeker.com/w/api.php
|
||||||
http://dothack.neoseeker.com/w/api.php
|
https://dothack.neoseeker.com/w/api.php
|
||||||
http://doujin.neoseeker.com/w/api.php
|
https://doujin.neoseeker.com/w/api.php
|
||||||
http://dragonage.neoseeker.com/w/api.php
|
https://dragonage.neoseeker.com/w/api.php
|
||||||
http://dragonball.neoseeker.com/w/api.php
|
https://dragonball.neoseeker.com/w/api.php
|
||||||
http://dragonquest.neoseeker.com/w/api.php
|
https://dragonquest.neoseeker.com/w/api.php
|
||||||
http://dragonsdogma.neoseeker.com/w/api.php
|
https://dragonsdogma.neoseeker.com/w/api.php
|
||||||
http://dynastywarriors.neoseeker.com/w/api.php
|
https://dynastywarriors.neoseeker.com/w/api.php
|
||||||
http://elderscrolls.neoseeker.com/w/api.php
|
https://elderscrolls.neoseeker.com/w/api.php
|
||||||
http://endlessocean.neoseeker.com/w/api.php
|
https://endlessocean.neoseeker.com/w/api.php
|
||||||
http://evangelion.neoseeker.com/w/api.php
|
https://evangelion.neoseeker.com/w/api.php
|
||||||
http://fable.neoseeker.com/w/api.php
|
https://fable.neoseeker.com/w/api.php
|
||||||
http://fairytail.neoseeker.com/w/api.php
|
https://fairytail.neoseeker.com/w/api.php
|
||||||
http://fallout.neoseeker.com/w/api.php
|
https://fallout.neoseeker.com/w/api.php
|
||||||
http://familyguy.neoseeker.com/w/api.php
|
https://familyguy.neoseeker.com/w/api.php
|
||||||
http://fatalfury.neoseeker.com/w/api.php
|
https://fatalfury.neoseeker.com/w/api.php
|
||||||
http://fifa.neoseeker.com/w/api.php
|
https://fifa.neoseeker.com/w/api.php
|
||||||
http://finalfantasy.neoseeker.com/w/api.php
|
https://finalfantasy.neoseeker.com/w/api.php
|
||||||
http://fireemblem.neoseeker.com/w/api.php
|
https://fireemblem.neoseeker.com/w/api.php
|
||||||
http://footballmanager.neoseeker.com/w/api.php
|
https://footballmanager.neoseeker.com/w/api.php
|
||||||
http://fullmetalalchemist.neoseeker.com/w/api.php
|
https://fullmetalalchemist.neoseeker.com/w/api.php
|
||||||
http://futurama.neoseeker.com/w/api.php
|
https://futurama.neoseeker.com/w/api.php
|
||||||
http://fzero.neoseeker.com/w/api.php
|
https://fzero.neoseeker.com/w/api.php
|
||||||
http://gearsofwar.neoseeker.com/w/api.php
|
https://gearsofwar.neoseeker.com/w/api.php
|
||||||
http://glee.neoseeker.com/w/api.php
|
https://glee.neoseeker.com/w/api.php
|
||||||
http://godofwar.neoseeker.com/w/api.php
|
https://godofwar.neoseeker.com/w/api.php
|
||||||
http://goldensun.neoseeker.com/w/api.php
|
https://goldensun.neoseeker.com/w/api.php
|
||||||
http://granturismo.neoseeker.com/w/api.php
|
https://granturismo.neoseeker.com/w/api.php
|
||||||
http://growlanser.neoseeker.com/w/api.php
|
https://growlanser.neoseeker.com/w/api.php
|
||||||
http://gta.neoseeker.com/w/api.php
|
https://gta.neoseeker.com/w/api.php
|
||||||
http://gta5.neoseeker.com/w/api.php
|
https://gta5.neoseeker.com/w/api.php
|
||||||
http://guitarhero.neoseeker.com/w/api.php
|
https://guitarhero.neoseeker.com/w/api.php
|
||||||
http://gundam.neoseeker.com/w/api.php
|
https://gundam.neoseeker.com/w/api.php
|
||||||
http://halflife.neoseeker.com/w/api.php
|
https://halflife.neoseeker.com/w/api.php
|
||||||
http://halo.neoseeker.com/w/api.php
|
https://halo.neoseeker.com/w/api.php
|
||||||
http://harrypotter.neoseeker.com/w/api.php
|
https://harrypotter.neoseeker.com/w/api.php
|
||||||
http://haruhi.neoseeker.com/w/api.php
|
https://haruhi.neoseeker.com/w/api.php
|
||||||
http://harvestmoon.neoseeker.com/w/api.php
|
https://harvestmoon.neoseeker.com/w/api.php
|
||||||
http://hearthstone.neoseeker.com/w/api.php
|
https://hearthstone.neoseeker.com/w/api.php
|
||||||
http://heavyrain.neoseeker.com/w/api.php
|
https://heavyrain.neoseeker.com/w/api.php
|
||||||
http://heroesofruin.neoseeker.com/w/api.php
|
https://heroesofruin.neoseeker.com/w/api.php
|
||||||
http://hitman.neoseeker.com/w/api.php
|
https://hitman.neoseeker.com/w/api.php
|
||||||
http://house.neoseeker.com/w/api.php
|
https://house.neoseeker.com/w/api.php
|
||||||
http://hungergames.neoseeker.com/w/api.php
|
https://hungergames.neoseeker.com/w/api.php
|
||||||
http://infamous.neoseeker.com/w/api.php
|
https://infamous.neoseeker.com/w/api.php
|
||||||
http://inheritance.neoseeker.com/w/api.php
|
https://inheritance.neoseeker.com/w/api.php
|
||||||
http://inuyasha.neoseeker.com/w/api.php
|
https://inuyasha.neoseeker.com/w/api.php
|
||||||
http://jakdaxter.neoseeker.com/w/api.php
|
https://jakdaxter.neoseeker.com/w/api.php
|
||||||
http://kairosoft.neoseeker.com/w/api.php
|
https://kairosoft.neoseeker.com/w/api.php
|
||||||
http://kidicarus.neoseeker.com/w/api.php
|
https://kidicarus.neoseeker.com/w/api.php
|
||||||
http://kingdomhearts.neoseeker.com/w/api.php
|
https://kingdomhearts.neoseeker.com/w/api.php
|
||||||
http://kirby.neoseeker.com/w/api.php
|
https://kirby.neoseeker.com/w/api.php
|
||||||
http://koa.neoseeker.com/w/api.php
|
https://koa.neoseeker.com/w/api.php
|
||||||
http://layton.neoseeker.com/w/api.php
|
https://layton.neoseeker.com/w/api.php
|
||||||
http://leagueoflegends.neoseeker.com/w/api.php
|
https://leagueoflegends.neoseeker.com/w/api.php
|
||||||
http://legendofdragoon.neoseeker.com/w/api.php
|
https://legendofdragoon.neoseeker.com/w/api.php
|
||||||
http://littlebigplanet.neoseeker.com/w/api.php
|
https://littlebigplanet.neoseeker.com/w/api.php
|
||||||
http://lotr.neoseeker.com/w/api.php
|
https://lotr.neoseeker.com/w/api.php
|
||||||
http://magicalstarsign.neoseeker.com/w/api.php
|
https://magicalstarsign.neoseeker.com/w/api.php
|
||||||
http://maplestory.neoseeker.com/w/api.php
|
https://maplestory.neoseeker.com/w/api.php
|
||||||
http://mario.neoseeker.com/w/api.php
|
https://mario.neoseeker.com/w/api.php
|
||||||
http://masseffect.neoseeker.com/w/api.php
|
https://masseffect.neoseeker.com/w/api.php
|
||||||
http://megaman.neoseeker.com/w/api.php
|
https://megaman.neoseeker.com/w/api.php
|
||||||
http://megamitensei.neoseeker.com/w/api.php
|
https://megamitensei.neoseeker.com/w/api.php
|
||||||
http://metalgear.neoseeker.com/w/api.php
|
https://metalgear.neoseeker.com/w/api.php
|
||||||
http://metroid.neoseeker.com/w/api.php
|
https://metroid.neoseeker.com/w/api.php
|
||||||
http://minecraft.neoseeker.com/w/api.php
|
https://minecraft.neoseeker.com/w/api.php
|
||||||
http://monsterhunter.neoseeker.com/w/api.php
|
https://monsterhunter.neoseeker.com/w/api.php
|
||||||
http://mortalkombat.neoseeker.com/w/api.php
|
https://mortalkombat.neoseeker.com/w/api.php
|
||||||
http://mother.neoseeker.com/w/api.php
|
https://mother.neoseeker.com/w/api.php
|
||||||
http://mtg.neoseeker.com/w/api.php
|
https://mtg.neoseeker.com/w/api.php
|
||||||
http://mylittlepony.neoseeker.com/w/api.php
|
https://mylittlepony.neoseeker.com/w/api.php
|
||||||
http://naruto.neoseeker.com/w/api.php
|
https://naruto.neoseeker.com/w/api.php
|
||||||
http://ncis.neoseeker.com/w/api.php
|
https://ncis.neoseeker.com/w/api.php
|
||||||
http://needforspeed.neoseeker.com/w/api.php
|
https://needforspeed.neoseeker.com/w/api.php
|
||||||
http://neopets.neoseeker.com/w/api.php
|
https://neopets.neoseeker.com/w/api.php
|
||||||
http://ninjagaiden.neoseeker.com/w/api.php
|
https://ninjagaiden.neoseeker.com/w/api.php
|
||||||
http://ninokuni.neoseeker.com/w/api.php
|
https://ninokuni.neoseeker.com/w/api.php
|
||||||
http://okami.neoseeker.com/w/api.php
|
https://okami.neoseeker.com/w/api.php
|
||||||
http://onepiece.neoseeker.com/w/api.php
|
https://onepiece.neoseeker.com/w/api.php
|
||||||
http://persona.neoseeker.com/w/api.php
|
https://persona.neoseeker.com/w/api.php
|
||||||
http://pes.neoseeker.com/w/api.php
|
https://pes.neoseeker.com/w/api.php
|
||||||
http://pikmin.neoseeker.com/w/api.php
|
https://pikmin.neoseeker.com/w/api.php
|
||||||
http://pokemon.neoseeker.com/w/api.php
|
https://pokemon.neoseeker.com/w/api.php
|
||||||
http://princeofpersia.neoseeker.com/w/api.php
|
https://princeofpersia.neoseeker.com/w/api.php
|
||||||
http://ratchetclank.neoseeker.com/w/api.php
|
https://ratchetclank.neoseeker.com/w/api.php
|
||||||
http://reborn.neoseeker.com/w/api.php
|
https://reborn.neoseeker.com/w/api.php
|
||||||
http://residentevil.neoseeker.com/w/api.php
|
https://residentevil.neoseeker.com/w/api.php
|
||||||
http://resonance.neoseeker.com/w/api.php
|
https://resonance.neoseeker.com/w/api.php
|
||||||
http://rockband.neoseeker.com/w/api.php
|
https://rockband.neoseeker.com/w/api.php
|
||||||
http://rpgmaker.neoseeker.com/w/api.php
|
https://rpgmaker.neoseeker.com/w/api.php
|
||||||
http://runefactory.neoseeker.com/w/api.php
|
https://runefactory.neoseeker.com/w/api.php
|
||||||
http://runescape.neoseeker.com/w/api.php
|
https://runescape.neoseeker.com/w/api.php
|
||||||
http://sandbox.neoseeker.com/w/api.php
|
https://sandbox.neoseeker.com/w/api.php
|
||||||
http://scottpilgrim.neoseeker.com/w/api.php
|
https://scottpilgrim.neoseeker.com/w/api.php
|
||||||
http://shadowofthecolossus.neoseeker.com/w/api.php
|
https://shadowofthecolossus.neoseeker.com/w/api.php
|
||||||
http://shadowrunreturns.neoseeker.com/w/api.php
|
https://shadowrunreturns.neoseeker.com/w/api.php
|
||||||
http://shenmue.neoseeker.com/w/api.php
|
https://shenmue.neoseeker.com/w/api.php
|
||||||
http://simpsons.neoseeker.com/w/api.php
|
https://simpsons.neoseeker.com/w/api.php
|
||||||
http://skate.neoseeker.com/w/api.php
|
https://skate.neoseeker.com/w/api.php
|
||||||
http://skylanders.neoseeker.com/w/api.php
|
https://skylanders.neoseeker.com/w/api.php
|
||||||
http://skyrim.neoseeker.com/w/api.php
|
https://skyrim.neoseeker.com/w/api.php
|
||||||
http://slycooper.neoseeker.com/w/api.php
|
https://slycooper.neoseeker.com/w/api.php
|
||||||
http://smackdown.neoseeker.com/w/api.php
|
https://smackdown.neoseeker.com/w/api.php
|
||||||
http://smashbros.neoseeker.com/w/api.php
|
https://smashbros.neoseeker.com/w/api.php
|
||||||
http://sonic.neoseeker.com/w/api.php
|
https://sonic.neoseeker.com/w/api.php
|
||||||
http://soulcalibur.neoseeker.com/w/api.php
|
https://soulcalibur.neoseeker.com/w/api.php
|
||||||
http://souleater.neoseeker.com/w/api.php
|
https://souleater.neoseeker.com/w/api.php
|
||||||
http://spiderman.neoseeker.com/w/api.php
|
https://spiderman.neoseeker.com/w/api.php
|
||||||
http://spongebob.neoseeker.com/w/api.php
|
https://spongebob.neoseeker.com/w/api.php
|
||||||
http://spyro.neoseeker.com/w/api.php
|
https://spyro.neoseeker.com/w/api.php
|
||||||
http://starcraft.neoseeker.com/w/api.php
|
https://starcraft.neoseeker.com/w/api.php
|
||||||
http://starfox.neoseeker.com/w/api.php
|
https://starfox.neoseeker.com/w/api.php
|
||||||
http://stargate.neoseeker.com/w/api.php
|
https://stargate.neoseeker.com/w/api.php
|
||||||
http://starocean.neoseeker.com/w/api.php
|
https://starocean.neoseeker.com/w/api.php
|
||||||
http://starwars.neoseeker.com/w/api.php
|
https://starwars.neoseeker.com/w/api.php
|
||||||
http://streetfighter.neoseeker.com/w/api.php
|
https://streetfighter.neoseeker.com/w/api.php
|
||||||
http://tales.neoseeker.com/w/api.php
|
https://tales.neoseeker.com/w/api.php
|
||||||
http://tekken.neoseeker.com/w/api.php
|
https://tekken.neoseeker.com/w/api.php
|
||||||
http://terraria.neoseeker.com/w/api.php
|
https://terraria.neoseeker.com/w/api.php
|
||||||
http://thedarkness.neoseeker.com/w/api.php
|
https://thedarkness.neoseeker.com/w/api.php
|
||||||
http://thesims.neoseeker.com/w/api.php
|
https://thesims.neoseeker.com/w/api.php
|
||||||
http://thewarriors.neoseeker.com/w/api.php
|
https://thewarriors.neoseeker.com/w/api.php
|
||||||
http://theworldendswithyou.neoseeker.com/w/api.php
|
https://theworldendswithyou.neoseeker.com/w/api.php
|
||||||
http://thief.neoseeker.com/w/api.php
|
https://thief.neoseeker.com/w/api.php
|
||||||
http://timesplitters.neoseeker.com/w/api.php
|
https://timesplitters.neoseeker.com/w/api.php
|
||||||
http://tonyhawk.neoseeker.com/w/api.php
|
https://tonyhawk.neoseeker.com/w/api.php
|
||||||
http://twilight.neoseeker.com/w/api.php
|
https://twilight.neoseeker.com/w/api.php
|
||||||
http://twistedmetal.neoseeker.com/w/api.php
|
https://twistedmetal.neoseeker.com/w/api.php
|
||||||
http://uncharted.neoseeker.com/w/api.php
|
https://uncharted.neoseeker.com/w/api.php
|
||||||
http://valkyriachronicles.neoseeker.com/w/api.php
|
https://valkyriachronicles.neoseeker.com/w/api.php
|
||||||
http://vivapinata.neoseeker.com/w/api.php
|
https://vivapinata.neoseeker.com/w/api.php
|
||||||
http://wakfu.neoseeker.com/w/api.php
|
https://wakfu.neoseeker.com/w/api.php
|
||||||
http://warcraft.neoseeker.com/w/api.php
|
https://warcraft.neoseeker.com/w/api.php
|
||||||
http://warhammer.neoseeker.com/w/api.php
|
https://warhammer.neoseeker.com/w/api.php
|
||||||
http://watchdogs.neoseeker.com/w/api.php
|
https://watchdogs.neoseeker.com/w/api.php
|
||||||
http://whiteknightchronicles.neoseeker.com/w/api.php
|
https://whiteknightchronicles.neoseeker.com/w/api.php
|
||||||
http://wikiguides.neoseeker.com/w/api.php
|
https://wikiguides.neoseeker.com/w/api.php
|
||||||
http://wow.neoseeker.com/w/api.php
|
https://wow.neoseeker.com/w/api.php
|
||||||
http://xenoblade.neoseeker.com/w/api.php
|
https://xenoblade.neoseeker.com/w/api.php
|
||||||
http://yugioh.neoseeker.com/w/api.php
|
https://yugioh.neoseeker.com/w/api.php
|
||||||
http://zelda.neoseeker.com/w/api.php
|
https://zelda.neoseeker.com/w/api.php
|
@ -1,8 +1,8 @@
|
|||||||
Wikifarm: http://neowiki.neoseeker.com/wiki/Main_Page
|
Wikifarm: http://neowiki.neoseeker.com/wiki/Main_Page
|
||||||
Last update: 2017-06-30
|
Last update: 2022-04-12
|
||||||
|
|
||||||
Details:
|
Details:
|
||||||
|
|
||||||
There is a dynamic list http://neowiki.neoseeker.com/wiki/Special:WikiList
|
There is a dynamic list http://neowiki.neoseeker.com/wiki/Special:WikiList
|
||||||
|
|
||||||
Run script: python neoseeker-spider.py > newlist
|
Run script: python3 neoseeker-spider.py
|
||||||
|
@ -1,55 +1,74 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright (C) 2014 WikiTeam developers
|
# Copyright (C) 2022 Simon Liu
|
||||||
# This program is free software: you can redistribute it and/or modify
|
# This program is free software: you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License as published by
|
# it under the terms of the GNU General Public License as published by
|
||||||
# the Free Software Foundation, either version 3 of the License, or
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
# (at your option) any later version.
|
# (at your option) any later version.
|
||||||
#
|
#
|
||||||
# This program is distributed in the hope that it will be useful,
|
# This program is distributed in the hope that it will be useful,
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
# GNU General Public License for more details.
|
# GNU General Public License for more details.
|
||||||
#
|
#
|
||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the GNU General Public License
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import json
|
|
||||||
import random
|
|
||||||
import re
|
|
||||||
import requests
|
|
||||||
import time
|
import time
|
||||||
|
import requests
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
headers = {
|
ids, wikis = [], []
|
||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20100101 Firefox/24.0',
|
gcont = 'tmp'
|
||||||
|
url = 'http://www.shoutwiki.com/w/api.php'
|
||||||
|
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20100101 Firefox/24.0'}
|
||||||
|
|
||||||
|
# grab wiki pages
|
||||||
|
params = {
|
||||||
|
'action': 'query',
|
||||||
|
'format': 'json',
|
||||||
|
'prop': 'info',
|
||||||
|
'generator': 'categorymembers',
|
||||||
|
'inprop': 'url',
|
||||||
|
'gcmtitle': 'Category:Flat_list_of_all_wikis',
|
||||||
|
'gcmlimit': 'max'
|
||||||
|
}
|
||||||
|
while gcont:
|
||||||
|
if gcont != 'tmp':
|
||||||
|
params['gcmcontinue'] = gcont
|
||||||
|
json = requests.get(url, params=params, headers=headers).json()
|
||||||
|
gcont = json['continue']['gcmcontinue'] if 'continue' in json else ''
|
||||||
|
query = json['query']['pages']
|
||||||
|
for wiki in query:
|
||||||
|
ids.append(wiki)
|
||||||
|
|
||||||
|
# grab wiki API
|
||||||
|
params = {
|
||||||
|
'action': 'query',
|
||||||
|
'format': 'json',
|
||||||
|
'prop': 'revisions',
|
||||||
|
'formatversion': '2',
|
||||||
|
'rvprop': 'content',
|
||||||
|
'rvslots': '*'
|
||||||
}
|
}
|
||||||
swfrom = 1
|
for n in tqdm(range(0, len(ids), 50)):
|
||||||
swlimit = 500
|
params['pageids'] = '|'.join(ids[n:n+50])
|
||||||
while swfrom:
|
json = requests.get(url, params=params, headers=headers).json()
|
||||||
params = {
|
|
||||||
'action': 'listwikis',
|
for wiki in json['query']['pages']:
|
||||||
'swfrom': swfrom,
|
for val in wiki['revisions'][0]['slots']['main']['content'].split('\n|'):
|
||||||
'swlimit': swlimit,
|
if 'subdomain' in val:
|
||||||
'format': 'json',
|
wikis.append('http://%s.shoutwiki.com/w/api.php' % val.split('subdomain =')[-1].strip())
|
||||||
}
|
break
|
||||||
url = 'http://www.shoutwiki.com/w/api.php'
|
|
||||||
r = requests.get(url, params=params, headers=headers)
|
time.sleep(0.3)
|
||||||
jsonsites = json.loads(r.text)
|
wikis = list(set(wikis))
|
||||||
|
wikis.sort()
|
||||||
for site in jsonsites['query']['listwikis']:
|
|
||||||
siteid = int(site['id'])
|
with open('shoutwiki.com', 'w') as f:
|
||||||
siteurl = site['url']
|
f.write('\n'.join(wikis))
|
||||||
print siteurl
|
|
||||||
|
|
||||||
if len(jsonsites['query']['listwikis']) == int(swlimit):
|
|
||||||
#there are more
|
|
||||||
swfrom = siteid + 1
|
|
||||||
else:
|
|
||||||
swfrom = ''
|
|
||||||
|
|
||||||
time.sleep(random.randint(3,10))
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,38 +1,43 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright (C) 2014 WikiTeam developers
|
# Copyright (C) 2014-2022 WikiTeam developers
|
||||||
# This program is free software: you can redistribute it and/or modify
|
# This program is free software: you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License as published by
|
# it under the terms of the GNU General Public License as published by
|
||||||
# the Free Software Foundation, either version 3 of the License, or
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
# (at your option) any later version.
|
# (at your option) any later version.
|
||||||
#
|
#
|
||||||
# This program is distributed in the hope that it will be useful,
|
# This program is distributed in the hope that it will be useful,
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
# GNU General Public License for more details.
|
# GNU General Public License for more details.
|
||||||
#
|
#
|
||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the GNU General Public License
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import requests
|
import requests
|
||||||
|
from urllib import parse
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20100101 Firefox/24.0',
|
'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20100101 Firefox/24.0',
|
||||||
}
|
}
|
||||||
|
|
||||||
urls = [
|
urls = [
|
||||||
'http://www.wiki.co.il/active-wiki-all.html',
|
'http://www.wiki.co.il/active-wiki-all.html',
|
||||||
'http://www.wiki.co.il/active-wiki-en.html',
|
'http://www.wiki.co.il/active-wiki-en.html',
|
||||||
]
|
]
|
||||||
|
wikis = []
|
||||||
for url in urls:
|
for url in urls:
|
||||||
r = requests.get(url, headers=headers)
|
req = requests.get(url, headers=headers)
|
||||||
raw = r.text
|
wikis.extend(re.findall(r'<td><a href="([^>]+?)"', req.text))
|
||||||
m = re.findall(ur'<td><a href="([^>]+?)"', raw)
|
|
||||||
for i in m:
|
wikis = list(set(wikis))
|
||||||
print i
|
wikis.sort()
|
||||||
|
with open('wiki-site.com', 'w') as f:
|
||||||
|
for wiki in wikis:
|
||||||
|
f.write(parse.urljoin(wiki, 'api.php') + '\n')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue