forked from yacy/yacy_search_server
-
Notifications
You must be signed in to change notification settings - Fork 0
/
yacy.init
1420 lines (1204 loc) · 63 KB
/
yacy.init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
###
### YaCy Init File
###
# These properties will be loaded upon installation.
# They are used only once for set-up.
# If you make changes to this file and want these to make any effect,
# you must delete the yacy.conf file in DATA/SETTINGS
# ----------------------------------------------------------------------------
# port number where the server should bind to
port = 8090
# optinal ssl port (https port) the server should bind to
port.ssl = 8443
# port to listen for a shutdown signal ( -1 = disable use of a shutdown port, 8005 = recommended default )
port.shutdown = -1
# use UPnP [true/false]
upnp.enabled = true
# remote host on UPnP device (for more than one connection)
upnp.remoteHost =
#sometimes you may want yacy to bind to another port, than the one reachable from outside.
#then set bindPort to the port yacy should bind on, and port to the port, visible from outside
#to run yacy on port 8090, reachable from port 80, set bindPort=8090, port=80 and use
#iptables -t nat -A PREROUTING -p tcp -s 192.168.24.0/16 --dport 80 -j DNAT --to 192.168.24.1:8090
#(of course you need to customize the ips)
bindPort =
# TLS/SSL support:
#
# For a German manual see http://yacy-websuche.de/wiki/index.php/De:Interface%C3%9CberHTTPS
#
# English speaking user read below:
#
# With this you can access your peer using https://localhost:8443
#
# There are two possibilities to specify which certificate should
# be used by YaCy.
#
# 1) Create a new certificate:
#
# *) For testing purposes, you can create a keystore with a self-signed certificate,
# using the following command:
# C:\> keytool -keystore mySrvKeystore -genkey -keyalg RSA -alias mycert
#
# *) Then configure the keyStoreXXXX properties accordingly, e.g.
# keyStore = c:/yacy/DATA/SETTINGS/mySrvKeystore
# keyStorePassword = mypwd
#
# 2) Import an existing certificate:
#
# Alternatively you can import an existing certificate in pkcs12 format into
# the Java keystore.
#
# This can be done by setting the pkcs12XXX and keyStoreXXXX properties accordingly, e.g.
# pkcs12ImportFile = c:/temp/keystore.pkcs12
# pkcs12ImportPwd = test
# keyStore = DATA/SETTINGS/keystore.jks
# keyStorePassword = mypwd
#
# If the property keyStore is not specified, then a new keystore file
# DATA/SETTINGS/myPeerKeystore will be created.
# Path to the Java KeyStore (JKS) holding the certificate to use when TLS/SSL is enabled on the server (server.https=true)
keyStore=defaults/freeworldKeystore
# Password to access the Java KeyStore (JKS)
keyStorePassword=freeworld
# Path to a PKCS #12 keystore holding a certificate to import into the Java KeyStore
# This property is automatically reset to empty once the import has been successfully completed
pkcs12ImportFile =
# Password to access a PKCS #12 keystore holding a certificate to import
# This property is automatically reset to empty once the import has been successfully completed
pkcs12ImportPwd =
# The Java KeyStore is only used, if server.https is set to true.
# If server.https=true, then the YaCy web interface is available at
# https://localhost:<port.ssl>/ and at http://localhost:<port>/
server.https=false
# property that collects the names of all servlets that had been used so far
# that is used to track if the user has already done some configuration steps
# if the used missed configuration steps that should be done, then a help system
# is possible which leads the used based on the list of servlets that had been used
# the list distinguishes called and submitted servlets
server.servlets.called =
server.servlets.submitted =
# server tracking: maximum time a track entry is hold in the internal cache
# value is in milliseconds, default is one hour
server.maxTrackingTime = 3600000
# maximum number of tracks per host
server.maxTrackingCount = 1000
# maximum number of hosts that are tracked
server.maxTrackingHostCount = 100
# When set to true, enable gzip compression of HTTP responses when the user-agent (browser, another YaCy peer or any kind of client)
# accept gzip encoding (by including gzip in a 'Accept-Encoding' HTTP request header)
# This adds some processing overhead, but reduces the amount of bytes sent over network.
server.response.compress.gzip = true
# Global HTTP Referrer policy delivered by meta tag (see https://www.w3.org/TR/referrer-policy/ for available policies)
# Can be left empty : the browser should then fallback to the default "no-referrer-when-downgrade" policy
# Be careful, some policies will also affect YaCy internal links : "no-referrer", "same-origin", "origin" and "strict-origin". This can be useful
# for example when proxyURL.useforresults is set to true, to control the referrer information initially sent by YaCy proxy when visiting search results
referrer.meta.policy=origin-when-cross-origin
# maximum file sizes: since some users experience problems with too large files
# the file size of database files can be limited. Larger files can be used to get a
# better IO performance and to use less RAM; however, if the size must be limited
# because of limitations of the file system, the maximum size can be set here
filesize.max.win = 2147483647
filesize.max.other = 8589934591
# Network Definition
# There can be separate YaCy networks, and managed sub-groups of the general network.
# The essentials of the network definition are attached in separate property files.
# The property here can also be a url where the definition can be loaded.
# In case of privately managed networks, this configuration must be changed BEFORE it is released
# to the members of the separated network peers.
network.unit.definition = defaults/yacy.network.freeworld.unit
#network.unit.definition = defaults/yacy.network.webportal.unit
#network.unit.definition = defaults/yacy.network.intranet.unit
# distinguish intranet/internet IPs:
# if this setting is set to true, then only URL-Hashes with 'intranet'-Flag is created, even if the
# url is in the internet. This can be done to enhance the crawling speed dramatically since a DNS-lookup
# to check if a host is in the internet oder an intranet can be omited.
# This option is only valid if the network.unit.domain property is set to 'any'
network.unit.domain.nocheck = false
# A client may have its own agent name. This name can be set by the user and is set to a random value
# if not overwritten by the user. As an alternative, the name can be set with this property.
# This option is only used if the value is non-empty.
network.unit.agent =
# Prefer https for in-protocol operations when available on remote peers
# A distinct general setting is available to control whether https sould be used for remote search queries : remotesearch.https.preferred
network.unit.protocol.https.preferred = false
# Update process properties
# The update server location is given in the network.unit.definition,
# but the settings for update processing and cycles are individual.
# the update process can be either 'manual' (no automatic lookup for new versions),
# 'guided' (automatic lookup, but user is asked before update is performed',
# or 'auto' (whenever an update is available, the update is loaded and installed)
update.process = manual
# the cycle value applies only if the process is automatic or guided. The value means hours.
# There is currently a fixed minimum number of hours of 24 hours for updates
update.cycle = 168
# a version number blacklist can restrict automatic or guided updates to a specific
# range of version numbers. The restriction is done with a blacklist (standard regexpr)
# It is recommended to set this list to low developer version numbers
update.blacklist =
# a update can also restricted with a concept property, which can decide if an
# update is only valid if it either is a main release or any release including new development releases
# Valid keywords are 'main' and 'any'
update.concept = any
# the following values are set automatically:
# the lookup time when the last time a lookup to the network update server(s) where done
update.time.lookup = 0
# the download time when the last time a release was downloaded
update.time.download = 0
# the deploy time when the last update was done; milliseconds since epoch
update.time.deploy = 0
# delete old downloaded files after this amount of days to free disk space
# the latest release is always kept
update.deleteOld = 30
# only install sign files
update.onlySignedFiles = 1
# restart-option
# a peer can be re-started periodically
# restart.process can be either 'off' (no automatic restart) or 'time' (time- rule-based, see below)
restart.process = off
# the restart.cycle is the number of hours that must pass before a restart is done
restart.cycle = 20
# the restart.hour is a pattern that must match with the hour string (two-digit, 24h)
# when the restart should be performed
restart.hour = 03
# the following values are set automatically
restart.time = 0
# clusters within a network:
# every network can have an unlimited number of clusters. Clusters may be also completely
# sealed and have no connection to other peers. When a cluster does not use the
# p2p protocol and the bootstraping mechanism to contact other peers, we call them
# Robinson peers. They can appear in different 'visibilities':
# - privatepeer: no connection and no data exchange to any other peer
# - privatecluster: connections only to self-defined addresses (other peers in same mode)
# - publiccluster: like privatecluster, but visible and searcheable by public p2p nodes
# - publicpeer: a single peer without cluster connection, but visible for p2p nodes
# all public robinson peers should use a peer tag string to be searcheable if in the
# search request these tags appear
cluster.mode=publicpeer
cluster.peers.yacydomain=localpeer.yacy
cluster.peers.ipport=localhost:8090
# bootstrapLoadTimeout
# this is the time-out for loading of the seedlist files during bootstraping
# If the time-out is too short, there is the danger that the peer stays in virgin mode
bootstrapLoadTimeout = 20000
# time-out of client control socket in milliseconds
# since this applies only to the client-proxy connection,
# it can be rather short
# milliseconds
clientTimeout = 10000
# maximal number of httpd sessions
# a client may open several connections at once, and the httpdMaxBusySessions value sets
# a limit on the number of concurrent connections
httpdMaxBusySessions = 200
# maximum number of simultaneously open outgoing HTTP connections in the general pool (net.yacy.cora.protocol.http.HTTPClient)
http.outgoing.pool.general.maxTotal = 200
# maximum number of simultaneously open outgoing HTTP connections in the remote Solr pool (net.yacy.cora.federate.solr.instance.RemoteInstance)
http.outgoing.pool.remoteSolr.maxTotal = 100
# TLS Server Name Indication (SNI) extension support for outgoing HTTP connections
# Must be enabled to load some https URLs (for websites deployed with different certificats and host names on the same shared IP address), otherwise loading fails with errors such as "Received fatal alert: handshake_failure"
# It can be necessary to disable it to load some https URLs served by old and misconfigured web servers, otherwise loading fails with the exception javax.net.ssl.SSLProtocolException: "handshake alert: unrecognized_name"
# In 2019, this last error should occur less frequently as in the early days of the extension implementation as its standard definition has evolved from RFC 4366 to RFC 6066 which revised how server implementations should handle the unrecognized_name(112) alert.
# Can also be configured with the JVM option jsse.enableSNIExtension, but in that case a server restart is required when you want to modify the setting and it is not customizable per http client (general or for remote Solr)
# Set to true to enable TLS Server Name Indication (SNI) extension on outgoing HTTP connections in the general http client (net.yacy.cora.protocol.http.HTTPClient)
http.outgoing.general.tls.sniExtension.enabled = true
# Set to true to enable TLS Server Name Indication (SNI) extension on outgoing HTTP connections in the remote Solr http client (net.yacy.cora.federate.solr.instance.RemoteInstance)
http.outgoing.remoteSolr.tls.sniExtension.enabled = true
# default root path for the file server
# may be overridden by the htdocs parameter
# users shall be encouraged to use the htdocs path for individual content,
# not this path defined here
htRootPath = htroot
# the htroot path
# root path for the httpd file server
htDefaultPath=htroot
# individual htroot folder
# every user may publicize her/his own web pages
# these pages shall be placed in the path defined here
# the htdocs path shares its content with the htroot path
htDocsPath = DATA/HTDOCS
# the default files (typically index.html), if no file name is given
# The complete path to this file is created by combination with the rootPath
# you can set a list of defaults, separated by comma
# the first one is preferred
defaultFiles = index.html,index.htm,default.html,search.html,console.html,control.html,welcome.html,wiki.html,forum.html,blog.html,email.html,content.html,monitor.html,share.html,dir.html,readme.txt
# locale-options: YaCy supports localization.
# Web pages for special languages are located in the htLocalePath
# The htLocaleLang defines a list of language options as <dir>/<named-language>
# the <dir> must exist as sub-path to htLocalePath
# the htLocaleSelection selects from the given locales, value=one-of-<dir>
locale.source=locales
locale.translated_html=DATA/LOCALE/htroot
locale.language=default
# virtual host for httpdFileServlet access
# for example http://<fileHost>/ shall access the file servlet and
# return the defaultFile at rootPath
# either way, http://<fileHost>/ denotes the same as http://localhost:<port>/
# for the preconfigured value 'localpeer', the URL is:
# http://localpeer/
fileHost = localpeer
# specify the path to the MIME matching file table
mimeTable = defaults/httpd.mime
# specify the path to the sessionid name file
sessionidNamesFile = defaults/sessionid.names
# a path to the file cache, used for the internal proxy and as crawl buffer
# This will be used if the server is addressed as a proxy
proxyCache = DATA/HTCACHE
# the maximum disc cache size for files in Cache in megabytes
# default: 4 Gigabyte
proxyCacheSize = 4096
# The compression level for cached content
# Supported values ranging from 0 - no compression (lower CPU, higher disk usage), to 9 - best compression (higher CPU, lower disk use)
proxyCache.compressionLevel = 9
# Timeout value (in milliseconds) for acquiring a synchronization lock on getContent/store Cache operations
# When timeout occurs, loader should fall back to regular remote resource loading
proxyCache.sync.lockTimeout = 2000
# you can use the proxy with fresh/stale rules or in a always-fresh mode
proxyAlwaysFresh = false
# a path to the surrogate input directory
surrogates.in = DATA/SURROGATES/in
# a path to the surrogate output directory
surrogates.out = DATA/SURROGATES/out
# a path to the dictionaries directory
# this directory also contains subdirectories for input sources, the did-you-mean function and other
dictionaries = DATA/DICTIONARIES
# Set of comma separated vocabulary names whose terms should only be matched
# from linked data types annotations in documents (with microdata, RDFa, microformats...)
# instead of cleartext words
vocabularies.matchLinkedData.names =
# a path to the classification directory
# each subdirectory is the name of a context (which becomes a navigator) with '.txt' files
# containing texts to teach a bayesian filter. One of the files must be named 'negative.txt'.
# The text files can be created with the Export functionality using the option "Only Text".
classification = DATA/CLASSIFICATION
# storage place for new releases
releases = DATA/RELEASE
# the following mime-types are a blacklist for indexing:
# parser.mime.deny: specifies mime-types that shall not be indexed
parser.mime.deny=
parser.extensions.deny=iso,apk,dmg
# The audioTagParser is disabled by default as it needs to create a temporary file each time an audio resource is parsed
# Audio file extensions and media types can be enabled in the ConfigParser_p.html page if this is not a problem with your install
parser.enableAudioTags=false
# experimental single-page parser for pdf files: split one pdf into individual pages;
# the key is the property name in the post arguments that gets a page number assigned,
# page numbers start with 1
parser.pdf.individualpages=false
parser.pdf.individualpages.key=page
# Promotion Strings
# These strings appear in the Web Mask of the YACY search client
# Set these Strings to cusomize your peer and give any message to
# other peer users
promoteSearchPageGreeting = Web Search by the People, for the People
# if the following property is set to true, the network name is used as greeting
promoteSearchPageGreeting.useNetworkName = false
# the following attributes can be used to define a custom image, alternative text and home page on the search page
promoteSearchPageGreeting.homepage = https://yacy.net
promoteSearchPageGreeting.imageAlt = YaCy project web site
promoteSearchPageGreeting.largeImage = env/grafics/YaCyLogo_120ppi.png
promoteSearchPageGreeting.smallImage = env/grafics/YaCyLogo_60ppi.png
# the path to the public reverse word index for text files (web pages)
# the primary path is relative to the data root, the secondary path is an absolute path
# when the secondary path should be equal to the primary, it must be declared empty
indexPrimaryPath=DATA/INDEX
# the path to index archive dumps
indexArchivePath=DATA/ARCHIVE
# the path to the LISTS files. Most lists are used to filter web content
listsPath=DATA/LISTS
# path to additional databases, like messages, blog data and bookmarks
workPath=DATA/WORK
# the path to the SKINS files.
skinPath=DATA/SKINS
# the yellow-list; URL's elements
# (the core of an URL; like 'yahoo' in 'de.yahoo.com')
# appearing in this list will not get a manipulated user agent string
proxyYellowList=yacy.yellow
# the black-list; URLs appearing in this list will not be loaded;
# instead always a 404 is returned
# all these files will be placed in the listsPath
BlackLists.Shared=url.default.black
BlackLists.DefaultList=url.default.black
#these are not needed as default. they just keep the values from beeing deleted ...
proxy.BlackLists=url.default.black
crawler.BlackLists=url.default.black
dht.BlackLists=url.default.black
search.BlackLists=url.default.black
surftips.BlackLists=url.default.black
news.BlackLists=url.default.black
# the blue-list;
# no search result is locally presented that has any word of the bluelist
# in the search words, the URL or the URL's description
plasmaBlueList=yacy.blue
# this proxy may in turn again access another proxy
# if you wish to do that, specify it here
# if you want to switch on the proxy use, set remoteProxyUse=true
# remoteProxyNoProxy is a no-proxy pattern list for the remote proxy
remoteProxyUse=false
remoteProxyUse4SSL=true
remoteProxyHost=192.168.2.2
remoteProxyPort=4239
remoteProxyUser=
remoteProxyPwd=
remoteProxyNoProxy=10\..*,127\..*,172\.(1[6-9]|2[0-9]|3[0-1])\..*,169\.254\..*,192\.168\..*,localhost,0:0:0:0:0:0:0:1
# the proxy may filter the content of transferred web pages
# the bluelist removes specific keywords from web pages
proxyBlueList=yacy.blue
# security settigns
# we provide proxy and server security through a 2-stage security gate:
# 1st stage: firewall-like access control trough ip filter for clients
# 2nd stage: password settings for proxy, server and server administrators
# by default, these settings are weak to simplify set-up and testing
# every user/adiministrator shall be encouraged to change these settings
# your can change them also online during run-time on
# http://localhost:8090/
# proxyClient: client-ip's that may connect the proxy for proxy service
# if several ip's are allowed then they must be separated by a ','
# regular expressions may be used
#proxyClient=192.168.0.4
proxyClient=localhost,127\.0\.0\.1,192\.168\..*,10\..*,0:0:0:0:0:0:0:1.*
# YaCyHop: allow public usage of proxy for yacy-protocol
# this enables usage of the internal http proxy for everyone,
# if the file path starts with /yacy/
# This is used to enable anonymization of yacy protocol requests
# Instead of asking a remote peer directly, a peer in between is asked
# to prevent that the asked peer knows which peer asks.
YaCyHop=true
# serverClient: comma separated client-ip's that may connect to the web server,
# thus are allowed to use the search service
# if you set this to another value, search requests from others
# are blocked, but you will also be blocked from using others
# search services.
serverClient=*
# use_proxyAccounts: set to true to restrict proxy-access to some identified users.
#use User_p.html to create some Users.
use_proxyAccounts=true
# adminAccountBase64MD5: a encoded user:password - pair
# For the administration of settings through the web interface.
# Should be set to a secret.
# but you are encouraged to set it to another value on the page
# http://localhost:8090/ConfigAccounts_p.html
# The hash is calculated with net.yacy.cora.order.Digest, i.e.
# > java -classpath classes net.yacy.cora.order.Digest -strfhex "admin:<realm>:<pw>"
#adminAccountBase64=MD5:{admin:realm:mysecretpassword}
# The default password for the "admin" account is set to "yacy"
adminAccountUserName=admin
adminAccountBase64MD5=MD5:8cffbc0d66567a0987a4aba1ec46d63c
# special access handling for users from localhost:
# access from localhost may be granted with administration authority
# if this flag is set. It is set to true by default to make usage of YaCy easy
# if you use YaCy on a headless server, you should set this to false
# or configure this on http://localhost:8090/ConfigAccounts_p.html
# during the first 10 minutes of operation of YaCy;
# if the admin account password is still empty after 10 minutes a random
# password is generated an access is then ONLY from localhost, which will cause
# inaccessibility for installations on headless servers.
adminAccountForLocalhost=true
# adminAccountAllPages: if set to false, then all pages without the extension "_p" are
# accessible without authorization. Some servlets may individually decide to use or request
# administration rights. If adminAccountAllPages is set to true, then administration
# rights are needed to access all pages without any exception. Setting adminAccountAllPages
# to true therefore closes the YaCy web pages for everyone.
adminAccountAllPages=false
# adminRealm: a internal name (like a group name) for the login setting of the admin frontend
# ATTENTION: changing this name will invalidate all currently password hashes
# - With DIGEST authentication mode is this realm name of generated password hashes
# (RFC 2617 standard and recommendation). If you want to share password configuration
# with additional machines they have to belong to the same realm
# - authentication defaults to BASIC
# - and can be configured in defaults/web.xml , tag <auth-method>
#adminRealm=YaCy-AdminUI
adminRealm=The YaCy access is limited to administrators. If you don't know the password, you can change it using <yacy-home>/bin/passwd.sh <new-password>
# if you are running a principal peer, you must update the following variables
# The upload method that should be used to upload the seed-list file to
# a public accessible webserver where it can be loaded by other peers.
#
# You can set the seedUploadMethod-Property to
# - None
# - Ftp
# - File
# - Scp (only if you have installed the optional addon)
#
seedUploadMethod=none
# This is the most common method to upload the seed-list
#
# This is an ftp account with all relevant information.
# The update is only made if there had been changes in between.
seedFTPServer=
seedFTPAccount=
seedFTPPassword=
seedFTPPath=
# alternatively to an FTP account, a peer can also become a principal peer
# if the seed-list can be generated as a file and that file is also accessible from
# the internet. In this case, omit any ftp settings and set this path here.
# if this path stays empty, an ftp account is considered
# however, you must always set a seedURL because it is used to check if the
# file is actually accessible from the internet
seedFilePath=
# Settings needed to upload the seed-list file via scp
#
# Please note that this upload method can only be used if you have installed
# this optional upload method.
seedScpServer=
seedScpServerPort=
seedScpAccount=
seedScpPassword=
seedScpPath=
# every peer periodically scans for other peers. you can set the time
# of the period here (minutes). You might switch it on in kubernetes clusters
scan.enabled=false
scan.peerCycle=2
# debug flags
debug.search.local.dht.off=false
debug.search.local.solr.off=false
debug.search.remote.dht.off=false
debug.search.remote.dht.testlocal=false
debug.search.remote.solr.off=false
debug.search.remote.solr.testlocal=false
# Set to true to enable computation of statistics on text snippets processing
debug.snippets.statistics.enabled=false
#staticIP if you have a static IP, you can use this setting
staticIP=
#publicPort if you use a different port to access YaCy than the one it listens on, you can use this setting
publicPort=
# each time YaCy starts up, it can trigger the local browser to show the
# status page. This is active by default, to make it easier for first-time
# users to understand what this application does. You can disable browser
# pop-up here or set a different start page, like the search page
browserPopUpTrigger=true
browserPopUpPage=index.html
# a forward page can be given for the index.html page
# when a user accesses the index.html page, he/she is forwarded to the page
# as given by indexForward. This is by default not defined which means 'no forward'
indexForward =
# defines if the YaCy icon appears in the system tray on supported platforms
tray.icon.enabled=true
tray.icon.force=false
tray.icon.label=YaCy
tray.menu.enabled=true
# index sharing attributes: by default, sharing is on.
# If you want to use YaCy only for local indexing (robinson mode),
# you may switch this off
allowDistributeIndex=true
allowDistributeIndexWhileCrawling=false
allowDistributeIndexWhileIndexing=true
allowReceiveIndex=true
allowReceiveIndex.search=true
indexReceiveBlockBlacklist=true
# the frequency is the number of links per minute, that the peer allowes
# _every_ other peer to send to this peer
defaultWordReceiveFrequency=100
defaultLinkReceiveFrequency=30
# the default may be overridden for each peer individually, these
# settings are only available through the online interface
# prefetch parameters
# the prefetch depth assigns a specific depth to the prefetch mechanism
# prefetch of 0 means no prefetch; a prefetch of 1 means to prefetch all
# embedded URLs, but since embedded image links are loaded by the browser
# this means that only embedded anchors are prefetched additionally
# a prefetch of 2 would result in loading of all images and anchor pages
# of all embedded anchors. Be careful with this value, since even a prefetch
# of 2 would result in hundreds of prefetched URLs for each single proxy fill.
proxyPrefetchDepth=0
proxyStoreHTCache=true
proxyIndexingRemote=false
proxyIndexingLocalText=true
proxyIndexingLocalMedia=true
# proxy usage only for .yacy-Domains for autoconfig
proxyYacyOnly=false
# enable proxy via url (/proxy.html?url=https://yacy.net)
proxyURL=false
proxyURL.access=127.0.0.1,0:0:0:0:0:0:0:1
# which urls to rewrite to /proxy.html?url=x (values: all, domainlist)
proxyURL.rewriteURLs=domainlist
proxyURL.useforresults=false
# Autocrawl configuration
autocrawl=false
autocrawl.index.text=true
autocrawl.index.media=true
autocrawl.ratio=50
autocrawl.rows=100
autocrawl.days=1
autocrawl.query=*:*
autocrawl.deep.depth=3
autocrawl.shallow.depth=1
# From the 'IndexCreate' menu point you can also define a crawling start point.
# The crawling works the same way as the prefetch, but it is possible to
# assign a different crawling depth.
# Be careful with this number. Consider a branching factor of average 20;
# A prefetch-depth of 8 would index 25.600.000.000 pages, maybe the whole WWW.
crawlingDepth=3
crawlingDirectDocByURL=true
crawlingIfOlder=-1
crawlingDomFilterDepth=-1
crawlingDomMaxPages=-1
indexText=true
indexMedia=true
# Filter for crawling; may be used to restrict a crawl to a specific domain
# URLs are only indexed and further crawled if they match this filter
crawlingFilter=.*
crawlingQ=true
followFrames=true
obeyHtmlRobotsNoindex=true
obeyHtmlRobotsNofollow=false
storeHTCache=true
storeTXCache=true
# peers may initiate remote crawling tasks.
# every peer may allow or disallow to be used as crawling-peer;
# you can also set a maximum crawl depth that can be requested or accepted
# order=parameters for requester; response=parameters for responder
# these values apply only for senior-senior - communication
# The delay value is number of seconds bewteen two separate orders
# crawlOrder: default value for remote crawl starts
# crawlResponse: set to true if a peer should retrieve remote crawl urls from other peers
crawlOrder=true
crawlOrderDepth=0
crawlResponse=false
crawlResponseDepth=0
# indexing-exclusion - rules
# There rules are important to reduce the number of words that are indexed
# We distinguish three different sets of stop-words:
# static - excludes all words given in the file yacy.stopwords from indexing,
# dynamic - excludes all words from indexing which are listed by statistic rules,
# parental - excludes all words from indexing which had been indexed in the parent web page.
xsstopw=true
xdstopw=true
xpstopw=true
# Topwords filtering
# If set to true, all stopwords (stopwords.yacy) are filtered from the topwords
# Change to false if requesting hits from peers with modified stopwords-file and using the unchanged client-version
filterOutStopwordsFromTopwords=true
# crawling steering: must-match/must-not-match
crawlingIPMustMatch=.*
crawlingIPMustNotMatch=
# the default country codes are all codes for countries in Europe
crawlingCountryMustMatch=AD,AL,AT,BA,BE,BG,BY,CH,CY,CZ,DE,DK,EE,ES,FI,FO,FR,GG,GI,GR,HR,HU,IE,IM,IS,IT,JE,LI,LT,LU,LV,MC,MD,MK,MT,NL,NO,PL,PT,RO,RU,SE,SI,SJ,SK,SM,TR,UA,UK,VA,YU
# collections for index data separation
# these collections can either be used to produce search tenants.
# The collection is used in the site-parameter in the GSA interface.
# Collections are assigned during crawl-time and defined in the crawl start.
# The YaCyScheme field collection_sxt must be switched on to use this field.
collection=user
# performance-settings
# delay-times for permanent loops (milliseconds)
# the idlesleep is the pause that an proces sleeps if the last call to the
# process job was without execution of anything;
# the busysleep is the pause after a full job execution
# the prereq-value is a memory pre-requisite: that much bytes must
# be available/free in the heap; othervise the loop is not executed
# and another idlesleep is performed
recrawlindex_busysleep=10
recrawlindex_idlesleep=2000
recrawlindex_loadprereq=8.0
recrawlindex_memprereq=1048576
20_dhtdistribution_idlesleep=30000
20_dhtdistribution_busysleep=15000
20_dhtdistribution_memprereq=12582912
20_dhtdistribution_loadprereq=2.0
20_dhtreceive_loadprereq=2.0
30_peerping_idlesleep=30000
30_peerping_busysleep=30000
30_peerping_memprereq=2097152
30_peerping_loadprereq=4.0
40_peerseedcycle_idlesleep=1800000
40_peerseedcycle_busysleep=1200000
40_peerseedcycle_memprereq=4194304
40_peerseedcycle_loadprereq=2.0
50_localcrawl_idlesleep=2000
50_localcrawl_busysleep=10
50_localcrawl_memprereq=25165824
50_localcrawl_loadprereq=8.0
50_localcrawl_isPaused=false
55_autocrawl_idlesleep=10000
55_autocrawl_busysleep=10000
55_autocrawl_memprereq=25165824
55_autocrawl_loadprereq=8.0
60_remotecrawlloader_idlesleep=4000
60_remotecrawlloader_busysleep=800
60_remotecrawlloader_memprereq=12582912
60_remotecrawlloader_loadprereq=8.0
60_remotecrawlloader_isPaused=false
62_remotetriggeredcrawl_idlesleep=2000
62_remotetriggeredcrawl_busysleep=200
62_remotetriggeredcrawl_memprereq=12582912
62_remotetriggeredcrawl_loadprereq=8.0
62_remotetriggeredcrawl_isPaused=false
70_surrogates_idlesleep=10000
70_surrogates_busysleep=0
70_surrogates_memprereq=12582912
70_surrogates_loadprereq=8.0
720_ccimport_idlesleep=100
720_ccimport_busysleep=1000
720_ccimport_memprereq=1048576
720_ccimport_loadprereq=8.0
730_ccfilter_idlesleep=100
730_ccfilter_busysleep=1000
730_ccfilter_memprereq=1048576
730_ccfilter_loadprereq=8.0
85_scheduler_idlesleep=60000
85_scheduler_busysleep=60000
85_scheduler_memprereq=1048576
85_scheduler_loadprereq=4.0
90_cleanup_idlesleep=300000
90_cleanup_busysleep=300000
90_cleanup_memprereq=0
90_cleanup_loadprereq=16.0
reindexSolr_idlesleep=1000
reindexSolr_busysleep=1
reindexSolr_memprereq=10485760
reindexSolr_loadprereq=16.0
# additional attributes:
# performanceIO is a percent-value. a value of 10 means, that 10% of the busysleep time
# is used to flush the RAM cache, which is the major part of the IO in YaCy
performanceProfile=defaults/yacy.init
performanceSpeed=100
performanceIO=10
# cleanup-process:
# properties for tasks that are performed during cleanup
cleanup.deletionProcessedNews = true
cleanup.deletionPublishedNews = true
cleanup.failedSearchURLtimeout = 86400000
# default memory settings for startup of yacy
# is valid in unix/shell and windows environments but
# not for first startup of YaCy
# -Xmx<size> maximum/init Java heap size
javastart_Xmx=Xmx600m
# YaCy is able to use RAM copies of database tables. This needs a lot of RAM.
# To switch on copying of file tables int RAM, there must be enough memory
# The memory that is available at startup time is used to switch the feature on
# The tableCachingLimit is the amount of free RAM at startup time to switch on the feature
tableCachingLimit=419430400
# some java versions may be limited to a specific array size
# of 134217727 entries. To prevent that tables of that size are generated,
# set this property to false
# If you want to have better performance and switch ramcopy on, try also to
# set this property to true
# this value is automatically set to true, if more than two gigabyte is available
exceed134217727=false
# priority of the yacy-process
# is valid in unix/shell and windows environments but
# not for first startup of YaCy
# UNIX: corresponds to the nice-level
# WIN: -20=realtime;-15=high;-10=above;0=normal;10=below;20=low
javastart_priority=10
# performance properties for the word index cache
# wordCacheMaxLow/High is the number of word indexes that shall be held in the
# ram cache during indexing. If you want to increase indexing speed, increase this
# value i.e. up to one million, but increase also the memory limit to a minimum of 2GB
wordCacheMaxCount = 20000
# Specifies if yacy can be used as transparent http proxy.
#
# Please note that you also have to reconfigure your firewall
# before you can use yacy as transparent proxy. On linux this
# can be done like this:
# iptables -t nat -A PREROUTING -p tcp -s 192.168.0.0/16 \
# --dport 80 -j DNAT --to 192.168.0.1:8090
#
# With this iptables filter listed above all http traffic that
# comes from your private network (in this case 192.168.0.0)
# and goes to any webserver listening on port 80 will be forwarded
# by the firewall to yacy running on port 8090 (192.168.0.1:8090)
isTransparentProxy=false
# Specifies the timeout the proxy sould use
proxy.clientTimeout = 60000
# Specifies if the proxy should send the via header according to RFC
proxy.sendViaHeader=true
# Specifies if the proxy should send the X-Forwarded-For header
proxy.sendXForwardedForHeader=true
# Enable cookie monitoring
proxy.monitorCookies=false
# msgForwarding: Specifies if yacy should forward received messages via
# email to the configured email address
msgForwardingEnabled=false
msgForwardingCmd=/usr/sbin/sendmail
msgForwardingTo=root@localhost
#crawlPause: delay time after specific functions before crawling is resumed
crawlPause.proxy=10
crawlPause.localsearch=50
crawlPause.remotesearch=10
# Some configuration values for the crawler
crawler.clientTimeout=30000
# http crawler specific settings; size in bytes
crawler.http.accept=text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
crawler.http.acceptEncoding=gzip
crawler.http.acceptLanguage=en-us,en;q=0.5
crawler.http.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7
crawler.http.maxFileSize=10485760
crawler.http.FollowRedirects=true
crawler.http.RecordRedirects=false
# ftp crawler specific settings; size in bytes
crawler.ftp.maxFileSize=10485760
# smb crawler specific settings: maximum size
crawler.smb.maxFileSize=100000000
# smb crawler specific settings: maximum size
crawler.file.maxFileSize=100000000
# maximum number of crawler threads
crawler.MaxActiveThreads = 200
# maximum number of same hosts in crawler threads
crawler.MaxSameHostInQueue = 20
# default latency is the start value of the average of remote server response time
crawler.defaultAverageLatency = 500
# the latency factor is a factor that is applied to the average remote server latency.
# The result is the minimum remote server access delay time
crawler.latencyFactor = 0.5
# The onDemandLimit is the maximum number of crawl queues that are concurrently opened
# at the same time. If the number of hosts exceeds this number, onDemand queues are opened
# which are opened each time a queue is accessed which creates high IO load. On the other
# hand, having too many entries in onDemandLimit may exceed the maximum number of file
# pointers. You can increase this number in /proc/sys/fs/file-max and adopt it to the number
# defined here
crawler.onDemandLimit = 1000
# The maximum time in seconds to wait for each wkhtmltopdf call when rendering PDF snapshots
# Beyond that limit the process is killed
snapshots.wkhtmltopdf.timeout = 30
# maximum size of indexing queue
indexer.slots = 100
# maximum size of stacker queue
stacker.slots = 2000
# search options: show advanced options on main search page
search.options = true
# search domains. If set to false then that search is not available
search.text = true
search.image = true
search.audio = false
search.video = false
search.app = false
# Strict content domain filtering : when false, results can be extended to documents including links to documents
# of contentdom type, whithout being themselves of that type.
# Examples :
# - contentdom search param == image, strictContentDom == true
# - jpeg image : acceptable result
# - html page embedding images : rejected
# - contentdom search param == image, strictContentDom == false
# - jpeg image : acceptable result
# - html page embedding images : acceptable result
search.strictContentDom = false
# number of search results per page displayed by default
search.items = 10
# target for search results; this is the href target attribute inside every search result link
# possible values:
# "_blank" (new window), "_self" (same window), "_parent" (the parent frame of a frameset),
# "_top" (top of all frames), "searchresult" (a default custom page name for search results)
# a special pattern can be given for exceptions to the default target according to urls
search.target = _self
search.target.special = _self
search.target.special.pattern =
# When true, override the global referrer.meta.policy value and add the standard noreferrer link type to search results links :
# this instructs the browser that it should not send any referrer information at all when visiting them
# Be careful : some websites might reject requests with no referrer
# Supported by more browsers than the meta referrer tag
search.result.noreferrer=false
# search result lines may show additional information for each search hit
# these information pieces may be switched on or off
# When true the favicon (when available) of the website the URL belongs to is fetched and displayed
search.result.show.favicon = true
search.result.show.keywords = false
# Maximum number of keywords initially displayed. The eventual remaining ones can then be expanded.
search.result.keywords.firstMaxCount = 100
search.result.show.date = true
search.result.show.size = false
search.result.show.metadata = false
search.result.show.parser = false
search.result.show.citation = true
search.result.show.pictures = false
search.result.show.cache = true
search.result.show.proxy = false
search.result.show.indexbrowser = true
search.result.show.vocabulary = false
# Set of comma separated vocabulary names not to be used as search results facets
search.result.show.vocabulary.omit =
search.result.show.snapshots = false
# when true, display the raw ranking score value
search.result.show.ranking = false
# Maximum numbers of accesses within a given time period to the search interface for unauthenticated users and authenticated users with no extended search right
search.public.max.access.3s = 60
search.public.max.access.1mn = 600
search.public.max.access.10mn = 3000
# Maximum numbers of accesses within a given time period to the search interface in P2P mode for unauthenticated users and authenticated users with no extended search right
search.public.max.p2p.access.3s = 1
search.public.max.p2p.access.1mn = 6
search.public.max.p2p.access.10mn = 60
# Maximum numbers of accesses within a given time period to the search interface in P2P mode with browser-side JavaScript remote results resorting for unauthenticated users and authenticated users with no extended search right
search.public.max.p2p.jsresort.access.3s = 1
search.public.max.p2p.jsresort.access.1mn = 1
search.public.max.p2p.jsresort.access.10mn = 10
# Maximum number of accesses within a given time period to the search interface to support fetching remote results snippets for unauthenticated users and authenticated users with no extended search right
search.public.max.remoteSnippet.access.3s = 1
search.public.max.remoteSnippet.access.1mn = 4
search.public.max.remoteSnippet.access.10mn = 20
# search navigators: comma-separated list of default values for search navigation.
# By default navigators keys are sorted by descending counts. To sort by ascending displayed labels, add the :label suffix (example : hosts:label).
# The sort direction can also be specified with the :asc or :desc suffixes (example : hosts:label:desc)
# can be temporary different if search string is given with different navigation values
# assigning no value(s) means that no navigation is shown
search.navigation=location,hosts,authors,namespace,topics,filetype,protocol,language
#search.navigation=location,hosts:label,authors,namespace,topics,filetype,protocol,language,collections,date,year,year:dates_in_content_dts:Event
# max number of items displayed in search navigators
search.navigation.maxcount=100
# max number of items displayed in the dates navigator
search.navigation.dates.maxcount=640
# search result verification and snippet fetch caching rules
# each search result can be verified byloading the link from the web
# this can be enhanced using a cache. In some cases it may be appropriate
# to not verify the link at all and do not compute a snippet
# the possible cases are:
# nocache: no use of web cache, load all snippets online
# iffresh: use the cache if the cache exists and is fresh otherwise load online
# ifexist: use the cache if the cache exist or load online
# cacheonly: never go online, use all content from cache. If no cache entry exist,
# consider content nevertheless as available and show result without snippet
# false: no link verification and not snippet generation:
# all search results are valid without verification
search.verify = ifexist